DatistEQ之ggplot2单变量绘图
2021-03-10 本文已影响0人
了无_数据科学
本文主要讲述使用ggplot2组件,进行单变量的绘图。
构造一个随机数据集。
#构造数据集,随机创建两列数据
set.seed(1234)
wdata <- data.frame(
sex=factor(rep(c("F", "M"), each=200)),
weight=c(rnorm(200, 55), rnorm(200, 58))
)
#wdata等价于前节点推送进来的数据框inputtable

先绘制一个图层a,后面逐步添加图层
a <- ggplot(wdata, aes(x=weight))
可能添加的图层有:
对于一个连续变量:
- 面积图:geom_area()
- 密度图:geom_density()
- 点图:geom_dotplot()
- 频率多边图:geom_freqpoly()
- 直方图:geom_histogram()
- 经验累积密度图:stat_ecdf()
- QQ图:stat_qq()
对于一个离散变量:
- 条形图:geom_bar()

一、单变量:连续型
1. 面积图
a+geom_area(stat = "bin")
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")

改变颜色
a+geom_area(aes(fill=sex), stat = "bin", alpha=0.6)+ theme_classic()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")

注意:y轴默认为变量weight的数量即count,如果y轴要显示密度,可用以下代码:
a+geom_area(aes(y=..density..), stat = "bin")
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")

可以通过修改不同属性如透明度、填充颜色、大小、线型等自定义图形:
2. 密度图
使用以下函数:
- geom_density():绘制密度图
- geom_vline():添加竖直线
- scale_color_manual():手动修改颜色
a <- ggplot(wdata, aes(x=weight)) + geom_density()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")

根据sex修改颜色,将sex映射给line颜色
a <- ggplot(wdata, aes(x=weight))
a+geom_density(aes(color=sex))
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")

修改填充颜色以及透明度
a+geom_density(aes(fill=sex), alpha=0.4)
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")

添加均值线以及手动修改颜色
if (!require("plyr")) install.packages("plyr")
library(plyr)
mu <- ddply(wdata, "sex", summarise, grp.mean=mean(weight))
a+geom_density(aes(color=sex))+
geom_vline(data=mu, aes(xintercept=grp.mean, color=sex), linetype="dashed")+
scale_color_manual(values = c("red", "blue"))
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")

3. 点图
a+geom_dotplot()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")

将sex映射给颜色
a+geom_dotplot(aes(fill=sex))
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")

手动修改颜色
a+geom_dotplot(aes(fill=sex))+
scale_fill_manual(values=c("#999999", "#E69F00"))
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")

4. 频率多边图
a+geom_freqpoly()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")

y轴显示为密度
a+geom_freqpoly(aes(y=..density..)) + theme_minimal()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")

修改颜色以及线型
a+geom_freqpoly(aes(color=sex, linetype=sex))+theme_minimal()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")

5. 直方图
header1("直方图")
a+geom_histogram()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")

将sex映射给线颜色
a+geom_histogram(aes(color=sex), fill="white", position = "dodge")+theme_classic()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")

6. 经验累积密度图
a+stat_ecdf()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")

7. QQ图
ggplot(data = mtcars, aes(sample=mpg))+stat_qq()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")

二、单变量:离散型
data(mpg)
b <- ggplot(mpg, aes(x=fl))
b+geom_bar()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")

修改填充颜色
b+geom_bar(fill="steelblue", color="black")+theme_classic()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")

本文中涉及的所有代码:
#构造数据集
set.seed(1234)
wdata <- data.frame(
sex=factor(rep(c("F", "M"), each=200)),
weight=c(rnorm(200, 55), rnorm(200, 58))
)
output(wdata)
#一个变量:连续型
library(ggplot2)
a <- ggplot(wdata, aes(x=weight)) + geom_density()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#面积图
header1("面积图")
a+geom_area(stat = "bin")
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
a+geom_area(aes(fill=sex), stat = "bin", alpha=0.6)+ theme_classic()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
a+geom_area(aes(y=..density..), stat = "bin")
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#密度图
header1("密度图")
a <- ggplot(wdata, aes(x=weight)) + geom_density()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#根据sex修改颜色,将sex映射给line颜色
a <- ggplot(wdata, aes(x=weight))
a+geom_density(aes(color=sex))
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#修改填充颜色以及透明度
a+geom_density(aes(fill=sex), alpha=0.4)
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#添加均值线以及手动修改颜色
if (!require("plyr")) install.packages("plyr")
library(plyr)
mu <- ddply(wdata, "sex", summarise, grp.mean=mean(weight))
a+geom_density(aes(color=sex))+
geom_vline(data=mu, aes(xintercept=grp.mean, color=sex), linetype="dashed")+
scale_color_manual(values = c("red", "blue"))
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#点图
header1("点图")
a+geom_dotplot()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
a+geom_dotplot(aes(fill=sex))
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#手动修改颜色
a+geom_dotplot(aes(fill=sex))+
scale_fill_manual(values=c("#999999", "#E69F00"))
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#频率多边图
header1("频率多边图")
a+geom_freqpoly()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#y轴显示为密度
a+geom_freqpoly(aes(y=..density..))+
theme_minimal()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#修改颜色以及线型
a+geom_freqpoly(aes(color=sex, linetype=sex))+
theme_minimal()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
header1("直方图")
a+geom_histogram()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#将sex映射给线颜色
a+geom_histogram(aes(color=sex), fill="white", position = "dodge")+theme_classic()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
header1("经验累积密度图")
#经验累积密度图
a+stat_ecdf()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
header1("QQ图")
ggplot(data = mtcars, aes(sample=mpg))+stat_qq()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
header1("一个离散变量")
#加载数据集
data(mpg)
b <- ggplot(mpg, aes(x=fl))
b+geom_bar()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#修改填充颜色
b+geom_bar(fill="steelblue", color="black")+theme_classic()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
header1("两个变量:x,y皆连续")
header2("散点图")
b <- ggplot(data = mtcars, aes(x=wt, y=mpg))
b+geom_point()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#将变量cyl映射给点的颜色和形状
b + geom_point(aes(color = factor(cyl), shape = factor(cyl)))
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#自定义颜色
b+geom_point(aes(color=factor(cyl), shape=factor(cyl)))+
scale_color_manual(values=c("#999999", "#E69F00", "#56B4E9"))+theme_classic()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
header2("平滑线")
b+geom_smooth()
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#散点图+回归线
b+geom_point()+
geom_smooth(method = "lm", se=FALSE)#去掉置信区间
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#使用loess方法
b+geom_point()+
geom_smooth(method = "loess")
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
#将变量映射给颜色和形状
b+geom_point(aes(color=factor(cyl), shape=factor(cyl)))+
geom_smooth(aes(color=factor(cyl), shape=factor(cyl)), method = "lm", se=FALSE, fullrange=TRUE)
ggsave(gettempfile(), width = 15, height = 10, dpi=300,units = "cm")
参考文献
1、https://blog.csdn.net/woodcorpse/article/details/106552735