作图R plot

第五章 描述数据分布

2021-04-28  本文已影响0人  芋圆学徒

第六章 描述数据分布----

第一节 绘制简单直方图

我们首先绘制一个简单的直方图geom_histogram()

library(ggplot2)
ggplot(faithful,aes(x=waiting))+geom_histogram()
image.png

未包含在数据框中的数据

这个我也不太理解什么意思,最后做出来的图和上一张图一模一样~

w <- faithful$waiting
ggplot(NULL,aes(x=w))+geom_histogram()

调整组距,颜色及切割分组

使用参数binwidth =调整组间距

ggplot(faithful,aes(x=waiting))+
  geom_histogram(binwidth = 5,fill = "white",colour="black")

binsize <- diff(range(faithful$waiting))/15
ggplot(faithful,aes(x=waiting))+
  geom_histogram(binwidth = binsize,fill = "white",colour="black")
binwidth = 5
自定义边距

设置组距及组边距

组距binwidth =
组边距origin=

h <- ggplot(faithful,aes(x=waiting))
h+geom_histogram(binwidth = 8,fill = "white",colour="black",origin=31)
h+geom_histogram(binwidth = 8,fill = "white",colour="black",origin=35)
origin=31
origin=35

第二节 基于分组数据绘制分组直方图

library(MASS)

1 使用smoke作为分面的条件

ggplot(birthwt,aes(x = bwt))+
  geom_histogram(fill = "white",colour="black")+
  facet_grid(smoke~.)
facet_grid(smoke~.)

2 修改分面的标签

错误展示,直接添加因子水平

level需要和内容相符,这里不顾0,1,直接使用levels = c("no smoke","smoke")

birthwt1 <- birthwt
birthwt1$smoke <- factor(birthwt1$smoke,levels = c("no smoke","smoke"))
levels(birthwt1$smoke)

ggplot(birthwt1,aes(x = bwt))+
  geom_histogram(fill = "white",colour="black")+
  facet_grid(smoke~.)
错误展示

正确方式

可以看到,levels(birthwt1$smoke)是0,1,因此,我们使用plyr包中的revalue函数将因子从新定义

birthwt1 <- birthwt
birthwt1$smoke <- factor(birthwt1$smoke)
levels(birthwt1$smoke)
library(plyr)#改变因子水平
birthwt1$smoke <- revalue(birthwt1$smoke,c("0"="no smoke","1"="smoke"))
levels(birthwt1$smoke)

ggplot(birthwt1,aes(x = bwt))+
  geom_histogram(fill = "white",colour="black")+
  facet_grid(smoke~.)
image.png

3 分面后y轴长度调整scales="free"

默认情况下,scales="fixed",每个分面的刻度尺一致,scales="free"将每个分面的坐标轴刻度根据自己的长度进行了调整

ggplot(birthwt,aes(x = bwt))+
  geom_histogram(fill = "white",colour="black")+
  facet_grid(race~.)

ggplot(birthwt,aes(x = bwt))+
  geom_histogram(fill = "white",colour="black")+
  facet_grid(race~.,scales="free")
image.png
scales="free"

4 另一种分组方式,fill

birthwt1 <- birthwt
birthwt1$smoke <- factor(birthwt1$smoke)
ggplot(birthwt1,aes(x = bwt,fill=smoke))+
  geom_histogram(position = "identity",alpha=.4)
image.png

第三节 绘制密度曲线

以下两句代码得到了一样的图形,这个和书中有差异,但不影响我们继续

ggplot(faithful,aes(x=waiting))+geom_density()
ggplot(faithful,aes(x=waiting))+geom_line(stat = "density")+expand_limits(y=0)
image.png

1 调节曲线光滑程度,adjust, 默认值为1

黑色为默认值,红色光滑度下降,蓝色光滑度增加

ggplot(faithful,aes(x=waiting))+
  geom_line(stat = "density")+
  geom_line(stat = "density",adjust=.25,colour="red")+
  geom_line(stat = "density",adjust=2,colour="blue")
adjust

2 设置x轴范围

xlim(35,105)设置x轴的取值范围,调整后图形更加美观
以下两句代码得到了一样的图形,与课本出入,我个人认为是由于包的更新所致

ggplot(faithful,aes(x=waiting))+
  geom_density(fill="blue",alpha = .2)+
  xlim(35,105)

ggplot(faithful,aes(x=waiting))+
  geom_density(fill="blue",alpha = .2,colour=NA)+
  geom_line(stat = "density")+
  xlim(35,105)
image.png

3 将直方图和密度曲线叠加

直方图和密度曲线叠加 y = ..density..

ggplot(faithful,aes(x=waiting,y = ..density..))+
  geom_histogram(fill = "cornsilk",colour="grey60",size=.2)+
  geom_density()+
  xlim(35,105)
image.png

第四节 基于分组数据绘制分组密度曲线图

两种方法,1把分组变量赋予fill或colour;2使用facet

第一种方式,把分组变量赋予fill或colour

birthwt1 <- birthwt
birthwt1$smoke <- factor(birthwt1$smoke)
ggplot(birthwt1,aes(x = bwt,fill=smoke))+
  geom_density(alpha = .2)

ggplot(birthwt1,aes(x = bwt,colour=smoke))+
  geom_density()
fill=smoke.png
colour=smoke.png

第二种方式,使用facet

library(MASS)
birthwt1 <- birthwt
birthwt1$smoke <- factor(birthwt1$smoke)
levels(birthwt1$smoke)
library(plyr)#改变因子水平
birthwt1$smoke <- revalue(birthwt1$smoke,c("0"="no smoke","1"="smoke"))
levels(birthwt1$smoke)

ggplot(birthwt1,aes(x = bwt))+
  geom_density()+
  facet_grid(smoke~.)
image.png

分面,添加直方图

y = ..density..
facet_grid

ggplot(birthwt1,aes(x=bwt,y = ..density..))+
  geom_histogram(binwidth=200,fill = "cornsilk",colour="grey60",size=.2)+
  geom_density()+
  facet_grid(smoke~.,scales = "free")
image.png

第五节绘制频数多边形

geom_freqpoly()

ggplot(faithful,aes(x=waiting))+geom_freqpoly()
ggplot(faithful,aes(x=waiting))+geom_freqpoly(binwidth=4)

binsize <- diff(range(faithful$waiting))/15
ggplot(faithful,aes(x=waiting))+
  geom_freqpoly(binwidth = binsize)
image.png
image.png
image.png

第六节 绘制基本箱型图

library(ggplot2)
library(MASS)
ggplot(birthwt,aes(x = factor(race),y = bwt))+geom_boxplot()
image.png
调整箱子的宽度width = .5
ggplot(birthwt,aes(x = factor(race),y = bwt))+
  geom_boxplot(width = .5)#
image.png
修改异常值的形状大小,默认值分别为2和16
outlier.size =outlier.shape =
ggplot(birthwt,aes(x = factor(race),y = bwt))+
  geom_boxplot(outlier.size = 1.5,outlier.shape = 21)
image.png

第七节 为箱型图添加槽口

ggplot(birthwt,aes(x = factor(race),y = bwt))+
  geom_boxplot(notch = T)
image.png

第八节 向箱型图添加均值

箱线图中的横线是中位数,我们添加的是均值,所以可能并不重合

ggplot(birthwt,aes(x = factor(race),y = bwt))+
  geom_boxplot()+
  stat_summary(fun.y = "mean",geom = "point",shape = 23,size = 3, fill = "white")
image.png

第九节 绘制小提琴图

library(gcookbook)
p <- ggplot(heightweight,aes(x = sex,y = heightIn))
p+geom_violin()
image.png

传统小提琴图,添加箱型图和中位数点

p+geom_violin()+
  geom_boxplot(width = .1, fill = "black", outlier.colour = "white")+
  stat_summary(fun.y = median,geom = "point",shape = 21,size = 3, fill = "white")
image.png

默认小提琴尾部截断,保留可用trim=F

p+geom_violin()
p+geom_violin(trim=F)
image.png
image.png

矫正,使面积和数量成正比

p+geom_violin(scale = "count")
image.png

调剂小提琴图的光滑程度

p+geom_violin(adjust=2)
p+geom_violin(adjust=.5)
image.png
image.png

第十节 绘制Wilkinson点图

library(ggplot2)
library(gcookbook)
countries2009 <- subset(countries,Year==2009&healthexp>2000)
p <- ggplot(countries2009,aes(x=infmortality))
p+geom_dotplot()
image.png

移除纵坐标,最大组距为0.25,添加边际地毯以示坐标位置

p+geom_dotplot(binwidth = .25)+geom_rug()+
  scale_y_continuous(breaks = NULL)+
  theme(axis.line.y = element_blank())
image.png

histodot固定分组的点图

p+geom_dotplot(binwidth = .25,method = "histodot")+geom_rug()+
  scale_y_continuous(breaks = NULL)+
  theme(axis.line.y = element_blank())
image.png

中心堆叠stackdir = "center"

p+geom_dotplot(binwidth = .25,stackdir = "center")+geom_rug()+
  scale_y_continuous(breaks = NULL)+
  theme(axis.line.y = element_blank())
image.png

第十一节 基于分组数据绘制分组点图

ggplot(heightweight,aes(x = sex,y = heightIn))+
  geom_dotplot(binaxis = "y",binwidth = .5,stackdir = "center")
image.png

添加箱线图,隐去箱线图上的异常点

ggplot(heightweight,aes(x = sex,y = heightIn))+
  geom_boxplot(outlier.color = NA,width = .4)+   #添加箱线图,隐去箱线图上的异常点
  geom_dotplot(binaxis = "y",binwidth = .5,stackdir = "center",fill = NA)#将点变为空心
image.png
#当x为数值型时,必须指定group;当x是数值型时,系统自动设置x轴坐标,需要通过scale_x_continuous自行设置
ggplot(heightweight,aes(x = sex,y = heightIn))+
  geom_boxplot(aes(x=as.numeric(sex)+.2, group = sex),width = .25)+   
  geom_dotplot(aes(x=as.numeric(sex)-.2, group = sex),
               binaxis = "y",binwidth = .5,stackdir = "center",fill = NA)+#将点变为空心
  scale_x_continuous(breaks = 1:nlevels(heightweight$sex),
                     labels = levels(heightweight$sex))
image.png

第十二节 绘制二维数据的密度图

p <- ggplot(faithful,aes(x = eruptions,y = waiting))
p+geom_point()+stat_density2d()
image.png

将height映射到颜色的等高线..level..

p+stat_density2d(aes(colour=..level..))
image.png

将密度估计映射给填充色..density..

p+stat_density2d(aes(fill=..density..),geom = "raster",contour = F)
image.png

带数据点,并将密度估计映射给alpha的瓦片图

p+geom_point()+
  stat_density2d(aes(alpha=..density..),geom = "tile",contour = F)
p+stat_density2d(aes(fill=..density..),geom = "raster",contour = F)
image.png
image.png
上一篇 下一篇

猜你喜欢

热点阅读