第五章 描述数据分布
2021-04-28 本文已影响0人
芋圆学徒
第六章 描述数据分布----
第一节 绘制简单直方图
我们首先绘制一个简单的直方图geom_histogram()
library(ggplot2)
ggplot(faithful,aes(x=waiting))+geom_histogram()
image.png
未包含在数据框中的数据
这个我也不太理解什么意思,最后做出来的图和上一张图一模一样~
w <- faithful$waiting
ggplot(NULL,aes(x=w))+geom_histogram()
调整组距,颜色及切割分组
使用参数binwidth =
调整组间距
ggplot(faithful,aes(x=waiting))+
geom_histogram(binwidth = 5,fill = "white",colour="black")
binsize <- diff(range(faithful$waiting))/15
ggplot(faithful,aes(x=waiting))+
geom_histogram(binwidth = binsize,fill = "white",colour="black")
binwidth = 5
自定义边距
设置组距及组边距
组距binwidth =
组边距origin=
h <- ggplot(faithful,aes(x=waiting))
h+geom_histogram(binwidth = 8,fill = "white",colour="black",origin=31)
h+geom_histogram(binwidth = 8,fill = "white",colour="black",origin=35)
origin=31
origin=35
第二节 基于分组数据绘制分组直方图
library(MASS)
1 使用smoke作为分面的条件
ggplot(birthwt,aes(x = bwt))+
geom_histogram(fill = "white",colour="black")+
facet_grid(smoke~.)
facet_grid(smoke~.)
2 修改分面的标签
错误展示,直接添加因子水平
level需要和内容相符,这里不顾0,1,直接使用levels = c("no smoke","smoke")
birthwt1 <- birthwt
birthwt1$smoke <- factor(birthwt1$smoke,levels = c("no smoke","smoke"))
levels(birthwt1$smoke)
ggplot(birthwt1,aes(x = bwt))+
geom_histogram(fill = "white",colour="black")+
facet_grid(smoke~.)
错误展示
正确方式
可以看到,levels(birthwt1$smoke)是0,1,因此,我们使用plyr包中的revalue函数将因子从新定义
birthwt1 <- birthwt
birthwt1$smoke <- factor(birthwt1$smoke)
levels(birthwt1$smoke)
library(plyr)#改变因子水平
birthwt1$smoke <- revalue(birthwt1$smoke,c("0"="no smoke","1"="smoke"))
levels(birthwt1$smoke)
ggplot(birthwt1,aes(x = bwt))+
geom_histogram(fill = "white",colour="black")+
facet_grid(smoke~.)
image.png
3 分面后y轴长度调整scales="free"
默认情况下,scales="fixed"
,每个分面的刻度尺一致,scales="free"
将每个分面的坐标轴刻度根据自己的长度进行了调整
ggplot(birthwt,aes(x = bwt))+
geom_histogram(fill = "white",colour="black")+
facet_grid(race~.)
ggplot(birthwt,aes(x = bwt))+
geom_histogram(fill = "white",colour="black")+
facet_grid(race~.,scales="free")
image.png
scales="free"
4 另一种分组方式,fill
birthwt1 <- birthwt
birthwt1$smoke <- factor(birthwt1$smoke)
ggplot(birthwt1,aes(x = bwt,fill=smoke))+
geom_histogram(position = "identity",alpha=.4)
image.png
第三节 绘制密度曲线
以下两句代码得到了一样的图形,这个和书中有差异,但不影响我们继续
ggplot(faithful,aes(x=waiting))+geom_density()
ggplot(faithful,aes(x=waiting))+geom_line(stat = "density")+expand_limits(y=0)
image.png
1 调节曲线光滑程度,adjust, 默认值为1
黑色为默认值,红色光滑度下降,蓝色光滑度增加
ggplot(faithful,aes(x=waiting))+
geom_line(stat = "density")+
geom_line(stat = "density",adjust=.25,colour="red")+
geom_line(stat = "density",adjust=2,colour="blue")
adjust
2 设置x轴范围
xlim(35,105)
设置x轴的取值范围,调整后图形更加美观
以下两句代码得到了一样的图形,与课本出入,我个人认为是由于包的更新所致
ggplot(faithful,aes(x=waiting))+
geom_density(fill="blue",alpha = .2)+
xlim(35,105)
ggplot(faithful,aes(x=waiting))+
geom_density(fill="blue",alpha = .2,colour=NA)+
geom_line(stat = "density")+
xlim(35,105)
image.png
3 将直方图和密度曲线叠加
直方图和密度曲线叠加 y = ..density..
ggplot(faithful,aes(x=waiting,y = ..density..))+
geom_histogram(fill = "cornsilk",colour="grey60",size=.2)+
geom_density()+
xlim(35,105)
image.png
第四节 基于分组数据绘制分组密度曲线图
两种方法,1把分组变量赋予fill或colour;2使用facet
第一种方式,把分组变量赋予fill或colour
birthwt1 <- birthwt
birthwt1$smoke <- factor(birthwt1$smoke)
ggplot(birthwt1,aes(x = bwt,fill=smoke))+
geom_density(alpha = .2)
ggplot(birthwt1,aes(x = bwt,colour=smoke))+
geom_density()
fill=smoke.png
colour=smoke.png
第二种方式,使用facet
library(MASS)
birthwt1 <- birthwt
birthwt1$smoke <- factor(birthwt1$smoke)
levels(birthwt1$smoke)
library(plyr)#改变因子水平
birthwt1$smoke <- revalue(birthwt1$smoke,c("0"="no smoke","1"="smoke"))
levels(birthwt1$smoke)
ggplot(birthwt1,aes(x = bwt))+
geom_density()+
facet_grid(smoke~.)
image.png
分面,添加直方图
y = ..density..
facet_grid
ggplot(birthwt1,aes(x=bwt,y = ..density..))+
geom_histogram(binwidth=200,fill = "cornsilk",colour="grey60",size=.2)+
geom_density()+
facet_grid(smoke~.,scales = "free")
image.png
第五节绘制频数多边形
geom_freqpoly()
ggplot(faithful,aes(x=waiting))+geom_freqpoly()
ggplot(faithful,aes(x=waiting))+geom_freqpoly(binwidth=4)
binsize <- diff(range(faithful$waiting))/15
ggplot(faithful,aes(x=waiting))+
geom_freqpoly(binwidth = binsize)
image.png
image.png
image.png
第六节 绘制基本箱型图
library(ggplot2)
library(MASS)
ggplot(birthwt,aes(x = factor(race),y = bwt))+geom_boxplot()
image.png
调整箱子的宽度
width = .5
ggplot(birthwt,aes(x = factor(race),y = bwt))+
geom_boxplot(width = .5)#
image.png
修改异常值的形状大小,默认值分别为2和16
outlier.size =
和outlier.shape =
ggplot(birthwt,aes(x = factor(race),y = bwt))+
geom_boxplot(outlier.size = 1.5,outlier.shape = 21)
image.png
第七节 为箱型图添加槽口
ggplot(birthwt,aes(x = factor(race),y = bwt))+
geom_boxplot(notch = T)
image.png
第八节 向箱型图添加均值
箱线图中的横线是中位数,我们添加的是均值,所以可能并不重合
ggplot(birthwt,aes(x = factor(race),y = bwt))+
geom_boxplot()+
stat_summary(fun.y = "mean",geom = "point",shape = 23,size = 3, fill = "white")
image.png
第九节 绘制小提琴图
library(gcookbook)
p <- ggplot(heightweight,aes(x = sex,y = heightIn))
p+geom_violin()
image.png
传统小提琴图,添加箱型图和中位数点
p+geom_violin()+
geom_boxplot(width = .1, fill = "black", outlier.colour = "white")+
stat_summary(fun.y = median,geom = "point",shape = 21,size = 3, fill = "white")
image.png
默认小提琴尾部截断,保留可用trim=F
p+geom_violin()
p+geom_violin(trim=F)
image.png
image.png
矫正,使面积和数量成正比
p+geom_violin(scale = "count")
image.png
调剂小提琴图的光滑程度
p+geom_violin(adjust=2)
p+geom_violin(adjust=.5)
image.png
image.png
第十节 绘制Wilkinson点图
library(ggplot2)
library(gcookbook)
countries2009 <- subset(countries,Year==2009&healthexp>2000)
p <- ggplot(countries2009,aes(x=infmortality))
p+geom_dotplot()
image.png
移除纵坐标,最大组距为0.25,添加边际地毯以示坐标位置
p+geom_dotplot(binwidth = .25)+geom_rug()+
scale_y_continuous(breaks = NULL)+
theme(axis.line.y = element_blank())
image.png
histodot固定分组的点图
p+geom_dotplot(binwidth = .25,method = "histodot")+geom_rug()+
scale_y_continuous(breaks = NULL)+
theme(axis.line.y = element_blank())
image.png
中心堆叠stackdir = "center"
p+geom_dotplot(binwidth = .25,stackdir = "center")+geom_rug()+
scale_y_continuous(breaks = NULL)+
theme(axis.line.y = element_blank())
image.png
第十一节 基于分组数据绘制分组点图
ggplot(heightweight,aes(x = sex,y = heightIn))+
geom_dotplot(binaxis = "y",binwidth = .5,stackdir = "center")
image.png
添加箱线图,隐去箱线图上的异常点
ggplot(heightweight,aes(x = sex,y = heightIn))+
geom_boxplot(outlier.color = NA,width = .4)+ #添加箱线图,隐去箱线图上的异常点
geom_dotplot(binaxis = "y",binwidth = .5,stackdir = "center",fill = NA)#将点变为空心
image.png
#当x为数值型时,必须指定group;当x是数值型时,系统自动设置x轴坐标,需要通过scale_x_continuous自行设置
ggplot(heightweight,aes(x = sex,y = heightIn))+
geom_boxplot(aes(x=as.numeric(sex)+.2, group = sex),width = .25)+
geom_dotplot(aes(x=as.numeric(sex)-.2, group = sex),
binaxis = "y",binwidth = .5,stackdir = "center",fill = NA)+#将点变为空心
scale_x_continuous(breaks = 1:nlevels(heightweight$sex),
labels = levels(heightweight$sex))
image.png
第十二节 绘制二维数据的密度图
p <- ggplot(faithful,aes(x = eruptions,y = waiting))
p+geom_point()+stat_density2d()
image.png
将height映射到颜色的等高线..level..
p+stat_density2d(aes(colour=..level..))
image.png
将密度估计映射给填充色..density..
p+stat_density2d(aes(fill=..density..),geom = "raster",contour = F)
image.png
带数据点,并将密度估计映射给alpha的瓦片图
p+geom_point()+
stat_density2d(aes(alpha=..density..),geom = "tile",contour = F)
p+stat_density2d(aes(fill=..density..),geom = "raster",contour = F)
image.png
image.png