[R语言] TidyTuesday ggplot2可视化学习 1
2020-04-11 本文已影响0人
半为花间酒
原始数据主题:Tour de France Winners
重新绘制主题:口袋妖怪 Pokemons
跟着刘博学画图,语雀指路:
TidyTuesday 可视化学习之 ggplot2 一笔一画绘制表格
原始数据:
https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-04-07/tdf_winners.csv
重绘数据(口袋妖怪):
https://pan.baidu.com/s/1sMHPLKL_OsEpsrwJ6uQGPQ
提取码:oxm0
转载请注明:陈熹 chenx6542@foxmail.com (简书号:半为花间酒)
前置知识
- rle函数
计算向量中连续相同字符(广义游程)的个数
x <- c(1,1,1,2,3,3,3,1,1)
rle(x)
# Run Length Encoding
# lengths: int [1:4] 3 1 3 2
# values : num [1:4] 1 2 3 1
x <- c(1,2,3,4,4,1,2)
rle(x)
# Run Length Encoding
# lengths: int [1:6] 1 1 1 2 1 1
# values : num [1:6] 1 2 3 4 1 2
x <- c(1,1,2,3,4,4,1,3)
rle(x)
# Run Length Encoding
# lengths: int [1:6] 2 1 1 2 1 1
# values : num [1:6] 1 2 3 4 1 3
图源《R编程艺术》
- glue函数
类似python3的字符串格式化
# > python
name = 'Fred'
age = '50'
print(f'My name is {name}, my age next year is {age + 1}')
# My name is Fred, my age next year is 51.
#> R
name <- "Fred"
age <- 50
anniversary <- as.Date("1991-10-12")
library(glue)
glue('My name is {name},
my age next year is {age + 1},
my anniversary is {anniversary}')
# 会识别\n换行
# My name is Fred,
# my age next year is 51,
# my anniversary is 1991-10-12
glue('My name is {name},',
'my age next year is {age + 1},',
'my anniversary is {format(anniversary, "%A, %B %d, %Y")}.')
# 以,相连直接拼接不换行
# My name is Fred,my age next year is 51,my anniversary is 星期六, 十月 12, 1991.
- coord_cartesian函数
放大镜效果不改变图形形状
library(patchwork)
library(ggplot2)
p1 <- ggplot(mtcars, aes(disp, wt)) +
geom_point() +
geom_smooth()
p2 <- p1 + scale_x_continuous(limits = c(325, 500))
p3 <- p1 + coord_cartesian(xlim = c(325, 500))
p1 + p2 + p3
- with函数
# 使用with函数将dat添加到环境
dat <- matrix(rnorm(20),nrow = 4,ncol=5)
colnames(dat)<-paste("a" ,1:5,sep ="")
rownames(dat)<-paste("b",1:4,sep = "")
dat <- as.data.frame(dat)
dat$a1 + dat$a2
# 等价于
with(dat, a1+a2)
# 另一个例子
dat <- read.csv("femaleMiceWeights.csv")
X <- filter(dat,Diet == "chow") %>%
select(Bodyweight) %>% unlist
Y <- filter(dat,Diet == "hf") %>%
select(Bodyweight) %>% unlist
t.test(X,Y)$p.value
# 等价于
with(t.test(X,Y), p.value)
- here包
How can you avoid setwd()
at the top of every script?
Use the here()
function from the here package to build the path when you read or write a file. Create paths relative to the top-level directory.
getwd()
# 当前目录,也可以直接不指定
ggsave(here::here('.',paste0("tour-de-france ", format(Sys.time(), "%Y%m%d"), ".png")),
dpi = 320, width = 11, height = 17)
# 上一级目录
ggsave(here::here('..',paste0("tour-de-france ", format(Sys.time(), "%Y%m%d"), ".png")),
dpi = 320, width = 11, height = 17)
# 下级目录
ggsave(here::here('dat','test',paste0("tour-de-france ", format(Sys.time(), "%Y%m%d"), ".png")),
dpi = 320, width = 11, height = 17)
原图复现
- 数据预处理部分
library(tidyverse)
library(lubridate)
library(countrycode)
# remotes::install_github("wilkelab/ggtext")
library(ggtext)
library(glue)
library(here)
#library(skimr)
library(dplyr)
tdf_winners <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-04-07/tdf_winners.csv')
tdf_table <- tdf_winners %>%
mutate(
wins_consecutive = with(rle(winner_name), rep(lengths, times = lengths)),
year = year(start_date), # 提取年数据
year_labels = ifelse(year %% 10 == 0, glue("**{year}**"), year),
year_group = case_when(
year < 1915 ~ 1,
year > 1915 & year < 1940 ~ 2,
TRUE ~ 3),
avg_speed = distance / time_overall,
country_code = countrycode(nationality, origin = "country.name", destination = "iso3c"),
winner_annot = ifelse(wins_consecutive > 2, glue("**{winner_name} ({country_code})**"), glue("{winner_name} ({country_code})"))
) %>%
# 分组很妙,添加行号
group_by(year_group) %>%
mutate(
n_annot = row_number(),
annot = ifelse((n_annot - 2) %% 3 == 0, TRUE, FALSE)
) %>%
ungroup() %>%
add_row(year = c(1915, 1916, 1917, 1918, 1940, 1941, 1942, 1943)) %>%
arrange(year) %>%
mutate(n = row_number())
- 画图部分
# 把字体安排上
windowsFonts(HEL=windowsFont("Helvetica CE 55 Roman"),
RMN=windowsFont("Times New Roman"),
ARL=windowsFont("Arial"),
ARLB=windowsFont("Arial Bold"),
JBM=windowsFont("JetBrains Mono"))
# step1: geom_segment() 标虚线
ggplot(tdf_table) +
# dotted gridlines ---------------------------------------------------
# 使用 geom_segment() 函数添加虚线
geom_segment(data = subset(tdf_table, !is.na(year_labels)),
aes(x = 0, xend = 24000, y = n, yend = n),
linetype = "dotted", size = 0.2) +
# step2:加上左右两侧的年份
geom_richtext(aes(x = -1000, y = n, label = year_labels),
fill = "red", label.color = NA,
label.padding = unit(0.1, "lines"),
family = "JBM", size = 2.5) +
geom_richtext(aes(x = 25000, y = n, label = year_labels),
fill = "blue", label.color = NA,
label.padding = unit(0.1, "lines"),
family = "JBM", size = 2.5) +
# step3:geom_area() 加上填充面积
geom_area(aes(x = distance * 0.625, y = n, group = year_group),
fill = "#7DDDB6", alpha = 0.6,
orientation = "y", position = "identity") +
# step4:选择性加上每一个上面对应的点
geom_point(data = subset(tdf_table, annot),
aes(x = distance * 0.625, y = n), size = 0.5) +
# step5:给 step4 中的点加上数值
geom_label(data = subset(tdf_table, annot),
aes(x = distance * 0.625 + 100, y = n, label = distance),
fill = "#F3F2EE", label.size = 0,
label.padding = unit(0.1, "lines"),
hjust = 0, family = "JBM", size = 2.5) +
# step6:给每一行加上注释,对应 WINNER
geom_richtext(aes(x = 5300, y = n, label = winner_annot, .na = NULL),
fill = "#F3F2EE", label.size = 0,
label.padding = unit(0.1, "lines"),
hjust = 0, family = "JBM", size = 2.5) +
geom_label(aes(x = 10300, y = n, label = glue("{winner_team}", .na = NULL)),
fill = "#F3F2EE", label.size = 0, label.padding = unit(0.1, "lines"),
hjust = 0, family = "JBM", size = 2.5) +
# step7:geom_segment 函数添加 AVERAGE SPEED 数据
geom_segment(aes(x = 16000, xend = 16000 + avg_speed * 66.67, y = n, yend = n),
size = 2, colour = "#7DDDB6", alpha = 0.6) +
# step8:选择性添加 AVERAGE SPEED 对应的数值
geom_label(data = subset(tdf_table, annot),
aes(x = 16000 + avg_speed * 66.67 + 100, y = n,
label = round(avg_speed, 1)), fill = "#F3F2EE",
label.size = 0, label.padding = unit(0.1, "lines"),
hjust = 0, family = "JBM", size = 2.5) +
# step9:添加 TOTAL TIME 时间填充(geom_ribbon)、点、标签
geom_ribbon(aes(xmin = 20000, xmax = 20000 + time_overall * 10, y = n, group = year_group),
fill = "#FCDF33", alpha = 0.6, orientation = "y", position = "identity") +
geom_point(data = subset(tdf_table, annot),
aes(x = 20000 + time_overall * 10, y = n), size = 0.5) +
geom_label(data = subset(tdf_table, annot),
aes(x = 20000 + time_overall * 10 + 100, y = n,
label = round(time_overall, 1)),
fill = "#F3F2EE", label.size = 0,
label.padding = unit(0.1, "lines"),
hjust = 0, family = "JBM", size = 2.5) +
# step10:annotate 函数添加竖直线
annotate("segment",
x = c(-2000, 0, 5000, 10000, 16000, 20000, 24000, 26000),
xend = c(-2000, 0, 5000, 10000, 16000, 20000, 24000, 26000),
y = -4, yend = 115, size = 0.3) +
# step11:annotate 函数添加三条横线
annotate("segment",
x = -2000, xend = 26000,
y = c(-4, -1, 115), yend = c(-4, -1, 115), size = 0.3) +
# step12:annotate 添加表头
annotate("text",
x = c(-1000, 2500, 7500, 13000, 18000, 22000, 25000),
y = -2.5,
label = toupper(c("year", "distance", "winner", "team", "average speed", "total time", "year")),
hjust = 0.5, family = "ARLB", size = 3.5) +
# step13:annotate 函数加上空白
annotate("rect",
xmin = -2000, ymin = c(13, 38),
xmax = 26000, ymax = c(16, 41),
fill = "#F3F2EE", colour = "black", size = 0.3) +
# step14:annotate 函数参数 richtext 添加中间小表头
annotate("richtext", x = 13000, y = c(14.5, 39.5),
label = c("**1915-1918** Tour suspended because of Word War I",
"**1940-1946** Tour suspended because of Word War II"),
label.color = NA, fill = "#F3F2EE", hjust = 0.5,
family = "ARL", size = 3.5) +
# step15:annotate 函数参数 text 给 DISTANCE 栏加上单位刻度
annotate("text", x = c(100, 4900), y = 0,
label = c("0", "8000 km"), hjust = c(0, 1),
family = "ARL", size = 3) +
# step16:annotate 函数参数 text 给其他的添加刻度尺和注释
annotate("text", x = c(16100, 19900), y = 0,
label = c("0", "60 km/h"), hjust = c(0, 1),
family = "ARL", size = 3) +
annotate("text", x = c(20100, 23900), y = 0,
label = c("0", "300 h"), hjust = c(0, 1),
family = "ARL", size = 3) +
annotate("text", x = 26000, y = -6,
label = "Source: alastairrushworth/tdf & kaggle.com/jaminliu | Graphic: Georgios Karamanis",
hjust = 1, family = "ARL", size = 3) +
# step17:coord_cartesian 函数取消画板限制范围
coord_cartesian(clip = 'off') +
# step18:scale_x_continuous 函数通过 limits 和 expand 函数控制贴 y 轴距离
scale_x_continuous(limits = c(-2300, 26300), expand = expansion(add = 1)) +
# step19:scale_y_reverse 函数翻转 y 轴左边起始顺序,上下颠倒,并通过 expand = expansion(add = 0) 控制 y 轴顶端和低端间隙为 0
scale_y_reverse(expand = expansion(add = 0)) +
# step20:labs 加标题以及 theme_void 去除主题线条背景以及坐标轴
labs(
title = "Tour de France Winners") +
theme_void(base_family = "JBM") +
# step21:设置灰色背景,画板大小,以及标题大小
theme(
plot.background = element_rect(fill = "#F3F2EE", colour = NA),
plot.margin = margin(20, 20, 20, 20),
plot.title = element_text(hjust = 0.01, size = 28,
family = "JBM", margin = margin(0, 0, -8, 0))
)
# ggsave(here::here('dat','test',paste0("tour-de-france ", format(Sys.time(), "%Y%m%d"), ".png")),
# dpi = 320, width = 11, height = 17)
# # step22:用here方法保存图片
ggsave(here::here('.',paste0("tour-de-france ", format(Sys.time(), "%Y%m%d"), ".png")),
dpi = 320, width = 11, height = 17)
- 成品
拿数据重新绘图
library(tidyverse)
library(lubridate)
library(countrycode)
# remotes::install_github("wilkelab/ggtext")
library(ggtext)
library(glue)
library(here)
#library(skimr)
library(dplyr)
library(RColorBrewer)
library(scales)
# 设置渐变颜色
Blues <- brewer.pal(9, "Blues")[4:7]
pal <- colorRampPalette(Blues)
new_Blues <- pal(10)
show_col(new_Blues)
# 注册字体
windowsFonts(HEL=windowsFont("Helvetica CE 55 Roman"),
RMN=windowsFont("Times New Roman"),
ARL=windowsFont("Arial"),
ARLB=windowsFont("Arial Bold"),
JBM=windowsFont("JetBrains Mono"),
MY=windowsFont("Microsoft YaHei"),
IPML=windowsFont("IBM Plex Mono Light"),
IPS=windowsFont("IBM Plex Sans"),
IPSB=windowsFont("IBM Plex Sans Bold"),
JBMB=windowsFont("JetBrains Mono Bold"))
pokemons <- readr::read_csv('C:/Users/chenx/Desktop/pokemons.csv')
pokemons_dat <- pokemons %>%
filter((100 <= id & id <= 126 )| (131 <= id & id <= 156 ) | (161 <= id & id <= 200 )) %>%
select(1:5,'book_color','HP':'total_value') %>%
mutate(
id_group = case_when(
id < 127 ~ 1,
id > 130 & id < 127 ~ 2,
TRUE ~ 3),
# 实际上不转换也行
color = case_when(
book_color == '绿色' ~ 'green',
book_color == '灰色' ~ 'grey',
book_color == '褐色' ~ 'brown',
book_color == '蓝色' ~ 'blue',
book_color == '粉红色' ~ 'pink',
book_color == '红色' ~ 'red',
book_color == '黄色' ~ 'yellow',
book_color == '紫色' ~ 'purple',
book_color == '白色' ~ 'white',
book_color == '黑色' ~ 'black'),
name_type = glue('{Chinese_name}/{Japanese_name}({poketype})')
) %>%
group_by(id_group) %>%
mutate(
n_annot = row_number(),
# 设置可显示出的数据点
annot = ifelse((n_annot - 2) %% 3 == 0, TRUE, FALSE)
) %>%
ungroup() %>%
add_row(id = c(127, 128, 129, 130, 157, 158, 159, 160)) %>%
arrange(id) %>%
mutate(n = row_number())
ggplot(pokemons_dat) +
geom_segment(data = subset(pokemons_dat, !is.na(color)),
aes(x = 0, xend = 24000, y = n, yend = n),
linetype = "dotted", size = 0.2) +
geom_richtext(aes(x = -1000, y = n, label = id),
fill = "red", label.color = NA,
label.padding = unit(0.1, "lines"),
family = "JBM", size = 2.5) +
geom_richtext(aes(x = 25000, y = n, label = id, fill = color),
label.color = NA,
label.padding = unit(0.1, "lines"),
family = "JBM", size = 2.5) +
scale_fill_manual(values =new_Blues) +
guides(fill=FALSE) +
geom_area(aes(x = total_value * 7, y = n, group = id_group),
fill = "#7DDDB6", alpha = 0.6,
orientation = "y", position = "identity") +
geom_point(data = subset(pokemons_dat, annot),
aes(x = total_value * 7, y = n), size = 0.5) +
geom_label(data = subset(pokemons_dat, annot),
aes(x = total_value * 7 + 100, y = n, label = total_value),
fill = "#F3F2EE", label.size = 0,
label.padding = unit(0.1, "lines"),
hjust = 0, family = "JBM", size = 2.5) +
# 很无奈,中文和日文打带边框标签都会留出右端空格,所以只能把英文名单拎出来加边框
geom_label(aes(x = 6000, y = n, label = name_type,.na = NULL),
fill = "#F3F2EE", label.size = 0,
label.padding = unit(0.1, "lines"),
hjust = 0, family = "ARL", size = 2.5) +
geom_richtext(aes(x = 13000, y = n, label = English_name, .na = NULL),
fill = "#F3F2EE", label.size = 0, label.padding = unit(0.1, "lines"),
hjust = 0, family = "JBM", size = 2.5) +
geom_segment(aes(x = 16000, xend = 16000 + Attack * 21, y = n, yend = n),
size = 2, colour = "#7DDDB6", alpha = 0.6) +
geom_label(data = subset(pokemons_dat, annot),
aes(x = 16000 + Attack * 21 + 100, y = n,
label = round(Attack, 1)), fill = "#F3F2EE",
label.size = 0, label.padding = unit(0.1, "lines"),
hjust = 0, family = "JBM", size = 2.5) +
geom_ribbon(aes(xmin = 21000, xmax = 21000 + Speed * 15, y = n, group = id_group),
fill = "#FCDF33", alpha = 0.6, orientation = "y", position = "identity") +
geom_point(data = subset(pokemons_dat, annot),
aes(x = 21000 + Speed * 15, y = n), size = 0.5) +
geom_label(data = subset(pokemons_dat, annot),
aes(x = 21000 + Speed * 15 + 100, y = n,
label = round(Speed, 1)),
fill = "#F3F2EE", label.size = 0,
label.padding = unit(0.1, "lines"),
hjust = 0, family = "JBM", size = 2.5) +
annotate("segment",
x = c(-2000, 0, 5700, 12700, 16000, 21000, 24000, 26000),
xend = c(-2000, 0, 5700, 12700, 16000, 21000, 24000, 26000),
y = -4, yend = 103, size = 0.3) +
annotate("segment",
x = -2000, xend = 26000,
y = c(-4, -1, 103), yend = c(-4, -1, 103), size = 0.3) +
annotate("text",
x = c(-1000, 2750, 9200, 14350, 18500, 22500, 25000),
y = -2.5,
label = toupper(c("ID", "ABILITY SCORES", "NAME/TYPE", "ENAME", "ATTACK", "SPEED", "ID")),
hjust = 0.5, family = "ARLB", size = 3.5) +
annotate("rect",
xmin = -2000, ymin = c(27.5, 57.5),
xmax = 26000, ymax = c(31.5, 61.5),
fill = "#F3F2EE", colour = "black", size = 0.3) +
annotate("richtext", x = 13000, y = c(29.5, 59.5),
label = c("**127-130** Which Have Been Deleted :) I",
"**157-160** Which Have Been Deleted :) II"),
label.color = NA, fill = "#F3F2EE", hjust = 0.5,
family = "JBM", size = 5) +
annotate("text", x = c(100, 5600), y = 0,
label = c("0", "800"), hjust = c(0, 1),
family = "JBM", size = 3) +
annotate("text", x = c(16100, 20900), y = 0,
label = c("0", "250"), hjust = c(0, 1),
family = "JBM", size = 3) +
annotate("text", x = c(21100, 23900), y = 0,
label = c("0", "200"), hjust = c(0, 1),
family = "JBM", size = 3) +
annotate("text", x = 26000, y = -6,
label = "Source: wiki.52poke.com | Graphic: Xi Chen",
hjust = 1, family = "JBM", size = 3) +
coord_cartesian(clip = 'off') +
scale_x_continuous(limits = c(-2300, 26300), expand = expansion(add = 1)) +
scale_y_reverse(expand = expansion(add = 0)) +
labs(
title = "Pokemons"
) +
theme_void(base_family = "JBM") +
theme(
plot.background = element_rect(fill = "#F3F2EE", colour = NA),
plot.margin = margin(20, 20, 20, 20),
plot.title = element_text(hjust = 0.01, size = 28,
family = "JBMB", margin = margin(0, 0, -8, 0))
)
ggsave(here::here('.',paste0("Pokemons ", format(Sys.time(), "%Y%m%d"), ".png")),
dpi = 640, width = 10, height = 14)