数据挖掘20210111学习笔记
2021-02-22 本文已影响0人
爱吃甜品的鱼
R语言作图


低级绘图函数建立在高级绘图函数基础上,不能单独使用

ggplot2语法
1.入门级绘图模板
2.映射-颜色、大小、透明度、形状
3.分面
4.几何对象
5.统计变换
6.位置调整
7.坐标系
#作图分三类
#1.基础包
plot(iris[,1],iris[,3],col = iris[,5])
text(6.5,4, labels = 'hello')
boxplot(iris[,1]~iris[,5])
dev.off() #关闭画板,清空画板
#2.ggplot2 中坚力量
test = iris
library(ggplot2)
ggplot(data = test)+
geom_point(mapping = aes(x = Sepal.Length,
y = Petal.Length,
color = Species))
#3.ggpubr 江湖救急
library(ggpubr)
ggscatter(iris,
x="Sepal.Length",
y="Petal.Length",
color="Species")
1.入门级模板
ggplot(data = <DATA>)+
<GEOM_FUNCTION>(mapping = aes(<MAPPINGS>))
> ggplot(data = iris)+
+ geom_point(mapping = aes(x = Sepal.Length,
+ y = Petal.Length))
2.映射:按照数据框的某一列来定义图的某个属性;手动设置


例1
> ggplot(data = test)+
+ geom_point(mapping = aes(x = Sepal.Length,
+ y = Petal.Length,
+ color = Species), #aes的参数是列名,是映射
+ shape =8) #shape是geom_point的函数,而不是aes的函数,geom_point函数的参数是具体颜色形状等
例2
> ggplot(data = test)+
+ geom_point(mapping = aes(x = Sepal.Length,
+ y = Petal.Length),
+ color="blue")
3.分面
> ggplot(data = test) +
+ geom_point(mapping = aes(x = Sepal.Length, y = Petal.Length)) +
+ facet_wrap(~ Species) #按照Species这一列的取值分为子图,有几个取值就分为几个子图
双分面
> test$Group = sample(letters[1:5],150,replace = T)
> ggplot(data = test) +
+ geom_point(mapping = aes(x = Sepal.Length, y = Petal.Length)) +
+ facet_grid(Group ~ Species)
4.几何对象
分组
> ggplot(data = test) +
+ geom_smooth(mapping = aes(x = Sepal.Length,
+ y = Petal.Length))
> ggplot(data = test) +
+ geom_smooth(mapping = aes(x = Sepal.Length,
+ y = Petal.Length,
+ group = Species)) #将一条线分成三段
> ggplot(data = test) +
+ geom_smooth(mapping = aes(x = Sepal.Length,
+ y = Petal.Length,
+ color = Species)) #分成三段,每段一种颜色
几何对象可以叠加:局部映射VS全局映射
#局部映射
> ggplot(data = test) +
+ geom_smooth(mapping = aes(x = Sepal.Length,y = Petal.Length))+
+ geom_point(mapping = aes(x = Sepal.Length,y = Petal.Length))
#全局映射
> ggplot(data = test,mapping = aes(x = Sepal.Length, y = Petal.Length))+
+ geom_smooth()+
+ geom_point()

#练习6-2
# 1.尝试写出下图的代码
# 数据是iris
# X轴是Species
# y轴是Sepal.Width
# 图是箱线图
> ggplot(data = iris,mapping = aes(x = Species, y = Sepal.Width))+
+ geom_boxplot()
# 2. 尝试在此图上叠加点图,
# 能发现什么问题?
> ggplot(data = iris,mapping = aes(x = Species, y = Sepal.Width))+
+ geom_boxplot()+
+ geom_point() #在图上看不出每个子集有50个元素
# 3.用下列代码作图,观察结果
> ggplot(test,aes(x = Sepal.Length,y = Petal.Length,color = Species)) +
+ geom_point()+
+ geom_smooth(color = "black") #当局部映射和全局映射冲突时,以局部映射为准
5.统计变换-直方图
> View(diamonds)
> table(diamonds$cut)
> ggplot(data = diamonds) +
+ geom_bar(mapping = aes(x = cut))
> ggplot(data = diamonds) +
+ stat_count(mapping = aes(x = cut))
统计变化使用场景
(1)使用表中数据直接作图,而不统计
> fre = as.data.frame(table(diamonds$cut))
> ggplot(data = fre) +
+ geom_bar(mapping = aes(x = Var1, y = Freq), stat = "identity")
(2)不统计count,统计prop(比例),count改为prop
> ggplot(data = diamonds) +
+ geom_bar(mapping = aes(x = cut, y = ..prop.., group = 1))
6.位置关系
抖动的点图
> ggplot(data = mpg,mapping = aes(x = class,
+ y = hwy,
+ group = class)) +
+ geom_boxplot()+
+ geom_point()

> ggplot(data = mpg,mapping = aes(x = class,
+ y = hwy,
+ group = class)) +
+ geom_boxplot()+
+ geom_jitter()

堆叠直方图
> ggplot(data = diamonds) +
+ geom_bar(mapping = aes(x = cut,fill=clarity))

并列直方图
> ggplot(data = diamonds) +
+ geom_bar(mapping = aes(x = cut, fill = clarity), position = "dodge")

7.坐标系
翻转coord_flip()
> ggplot(data = mpg, mapping = aes(x = class, y = hwy)) +
+ geom_boxplot() +
+ coord_flip()

> bar <- ggplot(data = diamonds) +
+ geom_bar(
+ mapping = aes(x = cut, fill = cut),
+ show.legend = FALSE,
+ width = 1
+ ) +
+ theme(aspect.ratio = 1) +
+ labs(x = NULL, y = NULL)
> bar + coord_flip()

极坐标系coord_polar()
> bar + coord_polar()



> ggplot(iris, aes(x = Species, y = Sepal.Width))+
+ geom_violin(aes(fill = Species))+
+ geom_boxplot()+
+ geom_jitter(aes(shape = Species))+
+ coord_flip()+
+ theme_classic()
ggpubr
ggscatter(iris,x="Sepal.Length",y="Petal.Length",color="Species")

> p <- ggboxplot(iris, x = "Species", y = "Sepal.Length",
+ color = "Species", shape = "Species",
+ add = "jitter")

> my_comparisons <- list( c("setosa", "versicolor"), c("setosa", "virginica"), c("versicolor", "virginica") )
> p + stat_compare_means(comparisons = my_comparisons)+ # Add pairwise comparisons p-value
+ stat_compare_means(label.y = 9)


eoffice包 导出为ppt,全部元素都是可编辑模式
library(eoffice)
topptx(p,"iris_box_ggpubr.pptx")
小洁老师的画图合集 https://www.jianshu.com/nb/35523479