R-data-sciecne-3
2019-01-15 本文已影响15人
医科研
统计变换
geom_bar函数可绘制基本的条形图,diamons数据集是ggplot2内置数据集,包含54000多颗钻石的信息,每颗钻石有price,color等变量
变量 与 观测的思维
Sys.setlocale('LC_ALL','C')
library(tidyverse)
## -- Attaching packages -------------------------------------------------- tidyverse 1.2.1 --
## √ ggplot2 3.1.0 √ purrr 0.2.5
## √ tibble 1.4.2 √ dplyr 0.7.8
## √ tidyr 0.8.2 √ stringr 1.3.1
## √ readr 1.3.1 √ forcats 0.3.0
## -- Conflicts ----------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
#查看diamonds的数据结构
dim(diamonds)
## [1] 53940 10
head(diamonds)
## # A tibble: 6 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.290 Premium I VS2 62.4 58 334 4.2 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
#将cut变量显示到x轴,y轴显示的是count,但count不是原有变量
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut))
data:image/s3,"s3://crabby-images/21052/210528a257d9069428bab979f50dbf6579863039" alt=""
#绘图时用于计算新数据的算法是stat(statistical transformation)统计变换
#geom_bar实际上对数据进行了转换,返回了count数值,因为stat默认是stat_count
?geom_bar
## starting httpd help server ...
## done
#通常几何对象函数与统计变换函数可以互换使用
#可以发现以下代码得到的图形与上图是相同的
ggplot(diamonds)+
stat_count(mapping = aes(x=cut))
data:image/s3,"s3://crabby-images/89869/89869b01083f177b16a543c74dd4794d02c6691f" alt=""
#绘图时用于计算新数据的算法是stat(statistical transformation)统计变换
#geom_bar实际上对数据进行了转换,返回了count数值,因为stat默认是stat_count
?geom_bar
## starting httpd help server ...
## done
#通常几何对象函数与统计变换函数可以互换使用
#可以发现以下代码得到的图形与上图是相同的
ggplot(diamonds)+
stat_count(mapping = aes(x=cut))
data:image/s3,"s3://crabby-images/43cc4/43cc4fb0d3b6b839b02885ac92323e0be86ac3e2" alt=""
#覆盖默认的统计变换,以下将条形高度映射为y轴变量初始值
demo <- tribble(
~cut, ~freq,
"Fair", 1610,
"Good", 4906,
"Very Good", 12082,
"Premium", 13791,
"Ideal", 21551
)
demo
## # A tibble: 5 x 2
## cut freq
## <chr> <dbl>
## 1 Fair 1610
## 2 Good 4906
## 3 Very Good 12082
## 4 Premium 13791
## 5 Ideal 21551
ggplot(data = demo) +
geom_bar(mapping = aes(x = cut, y = freq), stat = "identity")
data:image/s3,"s3://crabby-images/ca618/ca6185e3e8cdab15e090b27614c7b517dae7d8fe" alt=""
#统计变换生成的变量到图形属性的默认映射,显示一张表示比例的图
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, y = ..prop.., group = 1))
data:image/s3,"s3://crabby-images/7ef36/7ef36a73186f22506efc14f30525c1d03d273b2c" alt=""
#帮助文件中的Computed variables找出统计变换计算出的变量
#stat_summary为x的每个唯一值计算y值的摘要统计,以下计算的是y的max min midian并展示
ggplot(data = diamonds) +
stat_summary(
mapping = aes(x = cut, y = depth),
fun.ymin = min,
fun.ymax = max,
fun.y = median
)
data:image/s3,"s3://crabby-images/753fe/753fe115050ac0cd8c9b81e77912722fe3dcdbb9" alt=""
#ggplot2提供20多种统计变换?stat_bin例如获取帮助,在比例条形图中需设定group=1,否则如下
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, y = ..prop..))
data:image/s3,"s3://crabby-images/23ab3/23ab3a4674e70adabf9773d5a073c378dcf9eebf" alt=""
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = color, y = ..prop..))
data:image/s3,"s3://crabby-images/60ba2/60ba21d9001d7e5563698634a853aacf12b32490" alt=""
##用color或fill图形属性映射变量上色
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, colour = cut))
data:image/s3,"s3://crabby-images/c54a0/c54a0a01a9e93b5ab67784fe7a0b5c0907e1ae6c" alt=""
##fill显得更有效
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = cut))
data:image/s3,"s3://crabby-images/1812a/1812ae1c2f8fc1494b15f37ce73ba015d1f52447" alt=""
#将fill映射到另一个变量,自动堆叠
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = clarity))
data:image/s3,"s3://crabby-images/007cb/007cb075dcc2e79c28cb1ada42cc5ba0e4be8028" alt=""
##堆叠是由position参数设定的位置调整功能完成的
#position有identity, fill, dodge三种选项
#设置透明度
ggplot(data = diamonds, mapping = aes(x = cut, fill = clarity)) +
geom_bar(alpha = 1/5, position = "identity")
data:image/s3,"s3://crabby-images/9347a/9347add14d696db1bc42b40a8a74d21f2efb7485" alt=""
#fill=NA完全透明
ggplot(data = diamonds, mapping = aes(x = cut, colour = clarity)) +
geom_bar(fill = NA, position = "identity")
data:image/s3,"s3://crabby-images/a1491/a14914caa20e3af1e99996cd7197aa2b1558fcb3" alt=""
#fill堆叠的高度相同,适用于比较各组间的比例
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = clarity), position = "fill")
data:image/s3,"s3://crabby-images/26a92/26a92822c98e63aad984092b99ce967185267d6a" alt=""
#dodge将每组条形依次并列放置
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = clarity), position = "dodge")
data:image/s3,"s3://crabby-images/ccff4/ccff4a9669904d6f046c3b586b7886767ae3ed78" alt=""
##重叠点的过绘制,可设置jitter随机抖动,避免网格化排列
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy), position = "jitter")
data:image/s3,"s3://crabby-images/cf633/cf633fa48edd65f04835d49469889fa9b61ff881" alt=""
#ggplot2还提供快速实现方式geom_jitter()
##坐标系
#coord_flip函数可以交换x-y轴,如下
ggplot(data = mpg, mapping = aes(x = class, y = hwy)) +
geom_boxplot()
data:image/s3,"s3://crabby-images/f665e/f665ee6967ab2ee59168c92315aa54ab9a4d6873" alt=""
#方便的展示为水平箱线图
ggplot(data = mpg, mapping = aes(x = class, y = hwy)) +
geom_boxplot() +
coord_flip()
data:image/s3,"s3://crabby-images/b83fd/b83fdf9c0b16fb2294c0ffd7ab79d63327976085" alt=""
##coord_quickmap()设置地图纵横比,可能对我用处不大,算是了解吧
nz <- map_data("nz")
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
ggplot(nz, aes(long, lat, group = group)) +
geom_polygon(fill = "white", colour = "black")
data:image/s3,"s3://crabby-images/f6560/f656077e8d6106fc94d33362bea5048375cf9343" alt=""
#类似的
ggplot(nz, aes(long, lat, group = group)) +
geom_polygon(fill = "white", colour = "black") +
coord_quickmap()
data:image/s3,"s3://crabby-images/a87dd/a87ddd08e48783909011393477a39714806c05a6" alt=""
##coord_polar()使用极坐标系
bar <- ggplot(data = diamonds) +
geom_bar(
mapping = aes(x = cut, fill = cut),
show.legend = FALSE,
width = 1
) +
theme(aspect.ratio = 1) +
labs(x = NULL, y = NULL)
bar + coord_flip()
data:image/s3,"s3://crabby-images/c959d/c959d77a8f194c5bc1b3cd2c1ee1fd813508b24b" alt=""
bar + coord_polar()
data:image/s3,"s3://crabby-images/2d827/2d827f588dcba528cb67060b80f956d139d15d95" alt=""
图形分层语法
通过以上学习,得到了ggplot2绘图的7个参数设置
包括
数据data
映射mapping
几何函数geom_
统计变换stat
位置参数position
分面facet
坐标系coord
但绘图时经常不需要所有参数,ggplot2提供很好的默认参数设置
每个人都会遇到苦难,克服困难的唯一方法就是不断尝试