DatistEQ之ggplot2单变量绘图
2021-03-10 本文已影响0人
了无_数据科学
本文主要讲述使用ggplot2组件,进行单变量的绘图。
构造一个随机数据集。
#构造数据集,随机创建两列数据
set.seed(1234)
wdata <- data.frame(
sex=factor(rep(c("F", "M"), each=200)),
weight=c(rnorm(200, 55), rnorm(200, 58))
)
#wdata等价于前节点推送进来的数据框inputtable
前10行数据
先绘制一个图层a,后面逐步添加图层
a <- ggplot(wdata, aes(x=weight))
可能添加的图层有:
对于一个连续变量:
- 面积图:geom_area()
- 密度图:geom_density()
- 点图:geom_dotplot()
- 频率多边图:geom_freqpoly()
- 直方图:geom_histogram()
- 经验累积密度图:stat_ecdf()
- QQ图:stat_qq()
对于一个离散变量:
- 条形图:geom_bar()
一、单变量:连续型
1. 面积图
a+geom_area(stat = "bin")
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
面积图
改变颜色
a+geom_area(aes(fill=sex), stat = "bin", alpha=0.6)+ theme_classic()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
带颜色的面积图
注意:y轴默认为变量weight的数量即count,如果y轴要显示密度,可用以下代码:
a+geom_area(aes(y=..density..), stat = "bin")
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
密度面积图
可以通过修改不同属性如透明度、填充颜色、大小、线型等自定义图形:
2. 密度图
使用以下函数:
- geom_density():绘制密度图
- geom_vline():添加竖直线
- scale_color_manual():手动修改颜色
a <- ggplot(wdata, aes(x=weight)) + geom_density()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
密度图
根据sex修改颜色,将sex映射给line颜色
a <- ggplot(wdata, aes(x=weight))
a+geom_density(aes(color=sex))
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
带颜色的密度图
修改填充颜色以及透明度
a+geom_density(aes(fill=sex), alpha=0.4)
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
带颜色填充的密度图
添加均值线以及手动修改颜色
if (!require("plyr")) install.packages("plyr")
library(plyr)
mu <- ddply(wdata, "sex", summarise, grp.mean=mean(weight))
a+geom_density(aes(color=sex))+
geom_vline(data=mu, aes(xintercept=grp.mean, color=sex), linetype="dashed")+
scale_color_manual(values = c("red", "blue"))
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
带均值线的密度图
3. 点图
a+geom_dotplot()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
点图
将sex映射给颜色
a+geom_dotplot(aes(fill=sex))
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
带颜色的点图
手动修改颜色
a+geom_dotplot(aes(fill=sex))+
scale_fill_manual(values=c("#999999", "#E69F00"))
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
修改点图的颜色
4. 频率多边图
a+geom_freqpoly()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
频率多边图
y轴显示为密度
a+geom_freqpoly(aes(y=..density..)) + theme_minimal()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
频率多边图
修改颜色以及线型
a+geom_freqpoly(aes(color=sex, linetype=sex))+theme_minimal()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
频率多边图
5. 直方图
header1("直方图")
a+geom_histogram()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
直方图
将sex映射给线颜色
a+geom_histogram(aes(color=sex), fill="white", position = "dodge")+theme_classic()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
直方图
6. 经验累积密度图
a+stat_ecdf()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
经验累积密度图
7. QQ图
ggplot(data = mtcars, aes(sample=mpg))+stat_qq()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
QQ图
二、单变量:离散型
data(mpg)
b <- ggplot(mpg, aes(x=fl))
b+geom_bar()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
条形图
修改填充颜色
b+geom_bar(fill="steelblue", color="black")+theme_classic()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
带颜色的条形图
本文中涉及的所有代码:
#构造数据集
set.seed(1234)
wdata <- data.frame(
sex=factor(rep(c("F", "M"), each=200)),
weight=c(rnorm(200, 55), rnorm(200, 58))
)
output(wdata)
#一个变量:连续型
library(ggplot2)
a <- ggplot(wdata, aes(x=weight)) + geom_density()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#面积图
header1("面积图")
a+geom_area(stat = "bin")
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
a+geom_area(aes(fill=sex), stat = "bin", alpha=0.6)+ theme_classic()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
a+geom_area(aes(y=..density..), stat = "bin")
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#密度图
header1("密度图")
a <- ggplot(wdata, aes(x=weight)) + geom_density()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#根据sex修改颜色,将sex映射给line颜色
a <- ggplot(wdata, aes(x=weight))
a+geom_density(aes(color=sex))
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#修改填充颜色以及透明度
a+geom_density(aes(fill=sex), alpha=0.4)
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#添加均值线以及手动修改颜色
if (!require("plyr")) install.packages("plyr")
library(plyr)
mu <- ddply(wdata, "sex", summarise, grp.mean=mean(weight))
a+geom_density(aes(color=sex))+
geom_vline(data=mu, aes(xintercept=grp.mean, color=sex), linetype="dashed")+
scale_color_manual(values = c("red", "blue"))
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#点图
header1("点图")
a+geom_dotplot()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
a+geom_dotplot(aes(fill=sex))
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#手动修改颜色
a+geom_dotplot(aes(fill=sex))+
scale_fill_manual(values=c("#999999", "#E69F00"))
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#频率多边图
header1("频率多边图")
a+geom_freqpoly()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#y轴显示为密度
a+geom_freqpoly(aes(y=..density..))+
theme_minimal()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#修改颜色以及线型
a+geom_freqpoly(aes(color=sex, linetype=sex))+
theme_minimal()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
header1("直方图")
a+geom_histogram()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#将sex映射给线颜色
a+geom_histogram(aes(color=sex), fill="white", position = "dodge")+theme_classic()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
header1("经验累积密度图")
#经验累积密度图
a+stat_ecdf()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
header1("QQ图")
ggplot(data = mtcars, aes(sample=mpg))+stat_qq()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
header1("一个离散变量")
#加载数据集
data(mpg)
b <- ggplot(mpg, aes(x=fl))
b+geom_bar()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#修改填充颜色
b+geom_bar(fill="steelblue", color="black")+theme_classic()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
header1("两个变量:x,y皆连续")
header2("散点图")
b <- ggplot(data = mtcars, aes(x=wt, y=mpg))
b+geom_point()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#将变量cyl映射给点的颜色和形状
b + geom_point(aes(color = factor(cyl), shape = factor(cyl)))
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#自定义颜色
b+geom_point(aes(color=factor(cyl), shape=factor(cyl)))+
scale_color_manual(values=c("#999999", "#E69F00", "#56B4E9"))+theme_classic()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
header2("平滑线")
b+geom_smooth()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#散点图+回归线
b+geom_point()+
geom_smooth(method = "lm", se=FALSE)#去掉置信区间
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#使用loess方法
b+geom_point()+
geom_smooth(method = "loess")
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#将变量映射给颜色和形状
b+geom_point(aes(color=factor(cyl), shape=factor(cyl)))+
geom_smooth(aes(color=factor(cyl), shape=factor(cyl)), method = "lm", se=FALSE, fullrange=TRUE)
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
参考文献
1、https://blog.csdn.net/woodcorpse/article/details/106552735