数据挖掘20210111学习笔记

2021-02-22  本文已影响0人  爱吃甜品的鱼

R语言作图

图片.png
画图所需的包

低级绘图函数建立在高级绘图函数基础上,不能单独使用

图片.png

ggplot2语法

1.入门级绘图模板
2.映射-颜色、大小、透明度、形状
3.分面
4.几何对象
5.统计变换
6.位置调整
7.坐标系

#作图分三类
#1.基础包
plot(iris[,1],iris[,3],col = iris[,5]) 
text(6.5,4, labels = 'hello')

boxplot(iris[,1]~iris[,5])

dev.off()   #关闭画板,清空画板

#2.ggplot2 中坚力量
test = iris
library(ggplot2)
ggplot(data = test)+
  geom_point(mapping = aes(x = Sepal.Length,
                           y = Petal.Length,
                           color = Species))

#3.ggpubr 江湖救急
library(ggpubr)
ggscatter(iris,
          x="Sepal.Length",
          y="Petal.Length",
          color="Species")

1.入门级模板

ggplot(data = <DATA>)+
<GEOM_FUNCTION>(mapping = aes(<MAPPINGS>))
> ggplot(data = iris)+
+   geom_point(mapping = aes(x = Sepal.Length,
+                            y = Petal.Length))

2.映射:按照数据框的某一列来定义图的某个属性;手动设置

映射 手动设置

例1

> ggplot(data = test)+
+   geom_point(mapping = aes(x = Sepal.Length,
+                            y = Petal.Length,
+                            color = Species),    #aes的参数是列名,是映射
+              shape =8)  #shape是geom_point的函数,而不是aes的函数,geom_point函数的参数是具体颜色形状等

例2

> ggplot(data = test)+
+   geom_point(mapping = aes(x = Sepal.Length,
+                            y = Petal.Length),
+              color="blue")

3.分面

> ggplot(data = test) + 
+   geom_point(mapping = aes(x = Sepal.Length, y = Petal.Length)) + 
+   facet_wrap(~ Species) #按照Species这一列的取值分为子图,有几个取值就分为几个子图

双分面

> test$Group = sample(letters[1:5],150,replace = T)
> ggplot(data = test) + 
+   geom_point(mapping = aes(x = Sepal.Length, y = Petal.Length)) + 
+   facet_grid(Group ~ Species) 

4.几何对象

分组

> ggplot(data = test) + 
+   geom_smooth(mapping = aes(x = Sepal.Length, 
+                             y = Petal.Length)) 


> ggplot(data = test) + 
+   geom_smooth(mapping = aes(x = Sepal.Length, 
+                             y = Petal.Length,
+                             group = Species))    #将一条线分成三段


> ggplot(data = test) + 
+   geom_smooth(mapping = aes(x = Sepal.Length, 
+                             y = Petal.Length,
+                             color = Species))    #分成三段,每段一种颜色

几何对象可以叠加:局部映射VS全局映射

#局部映射
> ggplot(data = test) + 
+   geom_smooth(mapping = aes(x = Sepal.Length,y = Petal.Length))+
+   geom_point(mapping = aes(x = Sepal.Length,y = Petal.Length))


#全局映射
> ggplot(data = test,mapping = aes(x = Sepal.Length, y = Petal.Length))+
+   geom_smooth()+
+   geom_point()

图片.png
#练习6-2
# 1.尝试写出下图的代码
# 数据是iris
# X轴是Species
# y轴是Sepal.Width
# 图是箱线图
> ggplot(data = iris,mapping = aes(x = Species, y = Sepal.Width))+
+   geom_boxplot()

# 2. 尝试在此图上叠加点图,
# 能发现什么问题?
> ggplot(data = iris,mapping = aes(x = Species, y = Sepal.Width))+
+   geom_boxplot()+
+   geom_point()     #在图上看不出每个子集有50个元素

# 3.用下列代码作图,观察结果
> ggplot(test,aes(x = Sepal.Length,y = Petal.Length,color = Species)) +
+   geom_point()+
+   geom_smooth(color = "black")    #当局部映射和全局映射冲突时,以局部映射为准

5.统计变换-直方图

> View(diamonds)
> table(diamonds$cut)
> ggplot(data = diamonds) + 
+   geom_bar(mapping = aes(x = cut))
> ggplot(data = diamonds) + 
+   stat_count(mapping = aes(x = cut))

统计变化使用场景
(1)使用表中数据直接作图,而不统计

> fre = as.data.frame(table(diamonds$cut))
> ggplot(data = fre) +
+   geom_bar(mapping = aes(x = Var1, y = Freq), stat = "identity")

(2)不统计count,统计prop(比例),count改为prop

> ggplot(data = diamonds) + 
+   geom_bar(mapping = aes(x = cut, y = ..prop.., group = 1))

6.位置关系

抖动的点图

> ggplot(data = mpg,mapping = aes(x = class, 
+                                 y = hwy,
+                                 group = class)) + 
+   geom_boxplot()+
+   geom_point()
图片.png
> ggplot(data = mpg,mapping = aes(x = class, 
+                                 y = hwy,
+                                 group = class)) + 
+   geom_boxplot()+
+   geom_jitter()
图片.png

堆叠直方图

> ggplot(data = diamonds) + 
+   geom_bar(mapping = aes(x = cut,fill=clarity))
图片.png

并列直方图

> ggplot(data = diamonds) + 
+   geom_bar(mapping = aes(x = cut, fill = clarity), position = "dodge")
图片.png

7.坐标系

翻转coord_flip()

> ggplot(data = mpg, mapping = aes(x = class, y = hwy)) + 
+   geom_boxplot() +
+   coord_flip()
图片.png
> bar <- ggplot(data = diamonds) + 
+   geom_bar(
+     mapping = aes(x = cut, fill = cut), 
+     show.legend = FALSE,
+     width = 1
+   ) + 
+   theme(aspect.ratio = 1) +
+   labs(x = NULL, y = NULL)
> bar + coord_flip()
图片.png

极坐标系coord_polar()

> bar + coord_polar()
图片.png
图片.png 图片.png
> ggplot(iris, aes(x = Species, y = Sepal.Width))+
+   geom_violin(aes(fill = Species))+
+   geom_boxplot()+
+   geom_jitter(aes(shape = Species))+
+   coord_flip()+
+   theme_classic()

ggpubr

ggscatter(iris,x="Sepal.Length",y="Petal.Length",color="Species")
图片.png
> p <- ggboxplot(iris, x = "Species", y = "Sepal.Length",
+                color = "Species", shape = "Species",
+                add = "jitter")
图片.png
> my_comparisons <- list( c("setosa", "versicolor"), c("setosa", "virginica"), c("versicolor", "virginica") )
> p + stat_compare_means(comparisons = my_comparisons)+ # Add pairwise comparisons p-value
+   stat_compare_means(label.y = 9) 
图片.png
图片.png
eoffice包 导出为ppt,全部元素都是可编辑模式
library(eoffice)
topptx(p,"iris_box_ggpubr.pptx")

小洁老师的画图合集 https://www.jianshu.com/nb/35523479

上一篇下一篇

猜你喜欢

热点阅读