使用Tabplot可视化大型数据集
2019-03-01 本文已影响4人
热衷组培的二货潜
我是被图mark的,先记录下来。 image.png
require(ggplot2)
# devtools::install_github("mtennekes/tabplot")
library(tabplot)
data(diamonds)
## add some NA's
is.na(diamonds$price) <- diamonds$cut == "Ideal"
is.na(diamonds$cut) <- (runif(nrow(diamonds)) > 0.8)
tableplot(diamonds)
# 下面的表格图由五列组成,其中数据按价格排序。添加的缺失值位于底部,并且(默认情况下)以鲜红色显示
tableplot(diamonds, select = c(carat, price, cut, color, clarity), sortCol = price)
tableplot(diamonds, select = c(carat, price, cut, color, clarity), sortCol = price,
from = 0, to = 5)
tableplot(diamonds, subset = price < 5000 & cut == "Premium")
tablePalettes()
tableplot(diamonds, pals = list(cut="Set1(6)", color="Set5", clarity=rainbow(8)))
diamonds$carat_class <- num2fac(diamonds$carat, n=20)
diamonds$price_class <- num2fac(diamonds$price, n=100)
tableplot(diamonds, select=c(carat, price, carat_class, price_class))
# create large dataset
large_diamonds <- diamonds[rep(seq.int(nrow(diamonds)), 10),]
system.time({
p <- tablePrepare(large_diamonds)
})
# 用户 系统 流逝
# 0.78 0.27 1.06
system.time({
tableplot(p, plot=FALSE)
})
# 用户 系统 流逝
# 0.14 0.09 0.23
system.time({
tableplot(p, sortCol=price, nBins=200, plot=FALSE)
})
# 用户 系统 流逝
# 0.11 0.12 0.24
# 虽然在中等台式计算机上第一步需要几秒钟,但与直接方法相比,从中间结果(对象p)创建表图的处理时间非常短
system.time({
tableplot(p, sample=TRUE)
})
# 用户 系统 流逝
# 0.31 0.25 0.56
carat.norm <- with(diamonds, carat / max(diamonds$carat))
# draw samples
exp.diamonds <- diamonds[sample(1:nrow(diamonds), size=10000, prob=carat.norm, replace=TRUE),]
chp.diamonds <- diamonds[sample(1:nrow(diamonds), size=10000, prob=1-carat.norm, replace=TRUE),]
tp1 <- tableplot(exp.diamonds, plot=FALSE)
tp2 <- tableplot(chp.diamonds, plot=FALSE)
plot(tp2 - tp1)
tab <- tableplot(diamonds, plot = FALSE)
summary(tab)
tableplot(diamonds, select = 1:7, fontsize = 14, legend.lines = 8, title = "Shine on you crazy Diamond", fontsize.title = 18)
tab2 <- tableChange(tab, select_string = c("carat", "price", "cut", "color", "clarity"), pals = list(cut="Set1(2)"))
plot(tab2)
tableSave(tab, filename = "diamonds.png", width = 5, height = 3, fontsize = 6, legend.lines = 6)