作图

第四章 散点图

2021-03-14  本文已影响0人  芋圆学徒

第一节 绘制基本散点图

library(gcookbook)
View(heightweight)
heightweight[,c("ageYear","heightIn")]
library(ggplot2)
ggplot(heightweight,aes(x = ageYear,y = heightIn))+
  geom_point()

ggplot(heightweight,aes(x = ageYear,y = heightIn))+
  geom_point(shape=21)
ggplot(heightweight,aes(x = ageYear,y = heightIn))+
  geom_point(size=1.5)
简单散点图

第二节 使用点的形和颜色属性,并基于某变量对数据进行分组

heightweight[,c("sex","ageYear","heightIn")]
ggplot(heightweight,aes(x = ageYear,y = heightIn,colour=sex))+
  geom_point()

ggplot(heightweight,aes(x = ageYear,y = heightIn,shape=sex))+
  geom_point()

ggplot(heightweight,aes(x = ageYear,y = heightIn,shape=sex,colour=sex))+
  geom_point()
分类变量映射点的属性

第三节 使用不同的默认设置的点形

ggplot(heightweight,aes(x = ageYear,y = heightIn))+
  geom_point(shape=3)

ggplot(heightweight,aes(x = ageYear,y = heightIn,shape=sex))+
  geom_point(size=3)+
  scale_shape_manual(values = c(1,4))  #修改电形

hw <- heightweight
hw$weightGroup <- cut(hw$weightLb,breaks = c(-Inf,100,Inf))
ggplot(hw,aes(hw$ageYear,hw$heightIn,shape=sex,fill=weightGroup))+
  geom_point(size=2.5)+
  scale_shape_manual(values = c(21,24))+
  scale_fill_manual(values = c(NA,"black"),
                    guide=guide_legend(override.aes = list(shape=21)))
scale_shape_manual

第四节 将连续变量映射到点的颜色或大小属性上

ggplot(heightweight,aes(x = ageYear,y = heightIn,colour=weightLb))+
geom_point()
ggplot(heightweight,aes(x = ageYear,y = heightIn,size=weightLb))+
geom_point()


连续变量映射到点的属性

将连续变量映射至颜色上,并 scale_fill_gradient()赋予颜色的范围

ggplot(heightweight,aes(x = ageYear,y = heightIn,fill=weightLb))+
  geom_point(shape=21,size=2.5)+
  scale_fill_gradient(low = "black",high = "white")
连续变量映射至颜色上,并 scale_fill_gradient()赋予颜色的范围

使用离散变量代替色阶

range(heightweight$weightLb)
ggplot(heightweight,aes(x = ageYear,y = heightIn,fill=weightLb))+
  geom_point(shape=21,size=2.5)+
  scale_fill_gradient(low = "black",high = "white",
                      breaks=seq(70,170,by=20),
                      guide=guide_legend())
使用离散变量代替色阶

将连续变量映射给大小,将分类变量映射给颜色

ggplot(heightweight,aes(x = ageYear,y = heightIn,size=weightLb,colour=sex))+
  geom_point(alpha=.5)+
  scale_size_area()+             #使数据点面积正比于变量值
  scale_color_brewer(palette = "Set1")
连续变量映射给大小,将分类变量映射给颜色

第五节 处理图形重叠

当绘制的点数目巨大时,我们怎么办?怎么解决重叠?

sp <- ggplot(diamonds,aes(x = carat,y = price))
sp+geom_point()
图形重叠

解决办法1,使用半透明的点

sp+geom_point(alpha=.5)
sp+geom_point(alpha=.1)
通过透明度调节alpha=.1

解决办法2,将数据分箱,并用矩形表示(适用于量化分析)

sp+stat_bin2d()
sp+stat_bin2d(bins = 50)+
  scale_fill_gradient(low = "lightblue",high = "red",limits=c(0,6000))
数据分箱

解决办法3,将数据分箱,并以六边形表示

library(hexbin)
sp+stat_bin_hex()+
  scale_fill_gradient(low = "lightblue",high = "red",limits=c(0,8000))
sp+stat_bin_hex()+
  scale_fill_gradient(low = "lightblue",high = "red",
                      breaks=c(0,250,500,1000,2000,4000,6000),
                      limits=c(0,8000))
数据分箱,并以六边形表示

解决办法4,离散性数据直接使用箱型图,或者添加随即扰动

sp1 <- ggplot(ChickWeight,aes(x = Time,y = weight))
sp1+geom_point()
离散型数据
sp1+geom_point(position = "jitter")
sp1+geom_point(position = position_jitter(width = .3,height = 0))

sp1+geom_boxplot(aes(group=Time))
扰动
箱型图

第六节 添加回归模拟线

*线性模拟线

library(gcookbook)
View(heightweight)
heightweight[,c("ageYear","heightIn")]
library(ggplot2)
sp <- ggplot(heightweight,aes(x = ageYear,y = heightIn))
sp+geom_point()+geom_smooth(method = lm)
回归模拟线
sp+geom_point()+geom_smooth(method = lm,level = 0.99)  #99%的置信区间  level
sp+geom_point()+geom_smooth(method = lm,se = F)#没有置信区间   se
sp+geom_point(colour="grey60")+geom_smooth(method = lm,se=F,colour="black")
最后一条代码
sp+geom_point()+geom_smooth()  #默认情况下是局部加权多项式,即loess曲线
默认情况下是局部加权多项式,即loess曲线

*logistic模型

library(MASS)
b <- biopsy
b$cla <- factor(ifelse(b$class=="benign",0,1),levels = c(0,1))
ggplot(b,aes(x = V1,y = cla))+
  geom_point(position = position_jitter(width = .3,height=0.06),
             alpha = 0.4,shape = 21,size = 1.5)+
  stat_smooth(method = glm,family=binomial)
分类变量使用logistic回归
sp <- ggplot(heightweight,aes(x = ageYear,y = heightIn,colour=sex))+
  geom_point(alpha=.5)+          #使数据点面积正比于变量值
  scale_color_brewer(palette = "Set1")
sp+stat_smooth(method = lm,se=F,fullrange = T)
分类变量映射至颜色后进一步添加模拟线

第七节 根据已有的模型向散点图添加拟合线

library(gcookbook)
data(heightweight)
model <- lm(heightIn ~ ageYear,+I(ageYear^2),data=heightweight)
model
xmin <- min(heightweight$ageYear)
xmax <- max(heightweight$ageYear)
predicted <- data.frame(ageYear=seq(xmin,xmax,length.out=100))
predicted$heightIn <- predict(model,predicted)
head(predicted)
sp <- ggplot(heightweight,aes(x = ageYear,y = heightIn))+
  geom_point(colour="grey40")
sp+geom_line(data=predicted,size=1)
根据模型heightIn ~ ageYear,+I(ageYear^2)添加点图,预测后添加模拟线
#定义predictvals()函数
sp <- ggplot(heightweight,aes(x = ageYear,y = heightIn))+
  geom_point(colour="grey40")
predictvals <- function(model,xvar,yvar,xrange=NULL,samples=100,...){
  
  if(is.null(xrange)){
    if(any(class(model)%in%c("lm","glm")))
      xrange <- range(model$model[[xvar]])
    else if(any(class(model)%in%"loess"))
    xrange <- range(model$x)
  }
  
  newdata <- data.frame(x=seq(xrange[1],xrange[2],length.out=samples))
  names(newdata) <- xvar
  newdata[[yvar]] <- predict(model,newdata=newdata, ...)
  newdata
}

modelinear <- lm(heightIn~ageYear,heightweight)  #使用lm和loess建模
modelloess <- loess(heightIn~ageYear,heightweight)

lm_predict <- predictvals(modelinear,"ageYear","heightIn")  #调用predictivals()函数进行预测
loess_predict <- predictvals(modelloess,"ageYear","heightIn")

sp+geom_line(data = lm_predict,colour="red",size=.8)+
  geom_line(data = loess_predict,colour="blue",size=.8)
使用lm和loess建模,调用predictivals()函数,并将结果传递给geomline

第八节 添加来自多个模型的模拟曲线

make_model <- function(data){
  lm(heightIn~ageYear,data)
}
library(gcookbook)
library(plyr)
models <- dlply(heightweight,"sex",.fun=make_model)
models
predvals <- ldply(models,.fun = predictvals,xvar="ageYear",yvar="heightIn")
predvals
ggplot(heightweight,aes(x = ageYear,y = heightIn,colour=sex))+
  geom_point()+
  geom_line(data = predvals)

#> table(predvals$sex)

#f   m 
#100 100 
#> range(predvals$ageYear[predvals$sex=="f"])
#[1] 11.58 17.50
#> range(predvals$ageYear[predvals$sex=="m"])
#[1] 11.58 17.17
根据性别添加多条模拟线
predvals <- ldply(models,.fun = predictvals,xvar="ageYear",yvar="heightIn",
                  xrang=range(heightweight$ageYear))
ggplot(heightweight,aes(x = ageYear,y = heightIn,colour=sex))+
  geom_point()+
  geom_line(data = predvals)
根据性别添加多条模拟线

第九节 向散点图添加模型系数

library(gcookbook)
model <- lm(heightIn~ageYear,data = heightweight)
summary(model)

Call:
lm(formula = heightIn ~ ageYear, data = heightweight)

Residuals:
Min 1Q Median 3Q Max
-8.3517 -1.9006 0.1378 1.9071 8.3371

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 37.4356 1.8281 20.48 <2e-16 ***
ageYear 1.7483 0.1329 13.15 <2e-16 ***

Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 ‘ ’ 1

Residual standard error: 2.989 on 234 degrees of freedom
Multiple R-squared: 0.4249, Adjusted R-squared: 0.4225
F-statistic: 172.9 on 1 and 234 DF, p-value: < 2.2e-16

pred <- predictvals(model,"ageYear","heightIn")
sp <- ggplot(heightweight,aes(x = ageYear,y = heightIn))+
  geom_point()+
  geom_line(data = pred)
sp+annotate("text",label = "r^2=0.42",x=16.5,y = 52)

sp+annotate("text",label = "r^2==0.42",x=16.5,y = 52,parse=T)
添加公式

第十节 向散点图添加边际地毯

library(ggplot2)
ggplot(faithful,aes(x = eruptions,y = waiting))+
  geom_point()+
  geom_rug()

ggplot(faithful,aes(x = eruptions,y = waiting))+
  geom_point()+
  geom_rug(position = "jitter",size = .2) #添加扰动,增加线宽
geom_rug(position = "jitter",size = .2)

第十一节 像散点图添加标签

library(gcookbook)
subset(countries,Year==2009 & healthexp>2000)
sp <- ggplot(subset(countries,Year==2009 & healthexp>2000),
             aes(x = healthexp,y = infmortality))+
  geom_point()
sp+annotate("text",x = 4350,y = 5.4,label = "Canada")+
  annotate("text",x = 7400,y = 6.8, label = "USA")
annotate
sp+geom_text(aes(label = Name),size=4)
sp+geom_text(aes(label = Name),size=4,vjust=0)   #vjust文本的上下基线调整
sp+geom_text(aes(y=infmortality+.1, label = Name),size=4,vjust=0)

sp+geom_text(aes(label = Name),size=4,hjust=0)  #hjust文本左右对齐
sp+geom_text(aes(x = healthexp+100,label = Name),size=4,hjust=0) 
hjust右对齐
adat <- subset(countries,Year==2009 & healthexp>2000)
  adat$Name1 <- adat$Name
idx <- adat$Name1%in%c("Canada","Denmark","Austria","Norway","Sweden","Portugal","Ireland")
adat$Name1[!idx] <- NA
ggplot(adat,aes(x = healthexp,y = infmortality))+ geom_point()+
  geom_text(aes(x = healthexp+100,label = Name1),size=4,hjust=0) +
  xlim(2000,10000)
为个别点添加注释

第十二节 绘制气泡图

cdat <- subset(countries,Year==2009&
                 Name%in% c("Canada","Ireland","United States"))
p <- ggplot(cdat,aes(x = healthexp,y = infmortality,size=GDP))+
  geom_point(shape=21,colour="black",fill="cornsilk")
p+scale_size_area(max_size = 15)
通过scale_size_area添加形状

总结

基本绘图的语句不多,但熟能生巧,这些图都基本版图的堆积以及变形!

上一篇下一篇

猜你喜欢

热点阅读