R 数据可视化 —— ggplot 色块图
2021-04-20 本文已影响0人
名本无名
前言
绘制色块图的函数有三个,其中 geom_rect()
和 geom_tile()
除了参数不同之外,其他都是一样的。
-
geom_rect
使用四个角参数:xmin, xmax, ymin and ymax
-
geom_tile
指定了中心位置和大小参数:x, y, width, height
geom_raster
是条块大小相同时 geom_tile
的快速版本,而且当输出为 PDF
时所占空间也更小
示例
绘制简单的热图
ggplot(faithfuld, aes(waiting, eruptions)) +
geom_raster(aes(fill = density))
增加图像的平滑度
ggplot(faithfuld, aes(waiting, eruptions)) +
geom_raster(aes(fill = density), interpolate = TRUE)
如果想要绘制矩形块,可以使用 geom_tile()
df <- data.frame(
x = rep(c(2, 5, 7, 9, 12), 2),
y = rep(c(1, 2), each = 5),
z = factor(rep(1:5, each = 2)),
w = rep(diff(c(0, 4, 6, 8, 10, 14)), 2)
)
ggplot(df, aes(x, y)) +
geom_tile(aes(fill = z), colour = "grey50")
ggplot(df, aes(x, y, width = w)) +
geom_tile(aes(fill = z), colour = "grey50")
或 geom_rect()
ggplot(df, aes(xmin = x - w / 2, xmax = x + w / 2, ymin = y, ymax = y + 1)) +
geom_rect(aes(fill = z), colour = "grey50")
极坐标化
p1 <- ggplot(df, aes(x, y, width = w)) +
geom_tile(aes(fill = z), colour = "grey50") +
coord_polar(theta = 'x')
p2 <- ggplot(df, aes(x, y, width = w)) +
geom_tile(aes(fill = z), colour = "grey50") +
coord_polar(theta = 'y')
plot_grid(p1, p2)
样式
1. 华夫饼图
华夫饼图包括两种类型:
- 块状华夫饼图:
用于展示每组数据占总体的比例情况,可以快速看出数据中不同类别的分布以及占比,同时也可以知道不同分组之间的分布差异
其中堆积型侧重于展示类别数值
df <- tibble(
x = rep(1:10, 10),
y = rep(1:10, each=10),
class = sort(sample(mpg$class, 100))
)
sample_n(df, 67) %>%
arrange(x, y) %>%
group_by(x) %>%
mutate(y = 1:n()) %>%
ggplot(aes(x, y, fill = class)) +
geom_tile(colour = "white") +
# geom_point(size = 12, shape = 21) +
coord_fixed() +
theme(panel.background = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank())
以及侧重于展示类别占比的百分比堆积型
ggplot(df, aes(x, y, fill = class)) +
geom_tile(colour = "white") +
scale_y_continuous(trans = "reverse") +
theme(panel.background = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank())
- 点状华夫饼图:
以点图矩阵的方式展示离散数据,用颜色来区分不同组别。功能与块状华夫饼图相同
ggplot(df, aes(x, y, fill = class)) +
# geom_tile(colour = "white") +
geom_point(size = 12, shape = 21) +
coord_fixed() +
scale_y_continuous(trans = "reverse") +
theme(panel.background = element_blank())
2. 马赛克图
马赛克图用于显示分类数据中一对变量之间的关系。
df <- tibble(
gene = rep(LETTERS[1:5], 4),
status = rep(c("alpha", "beta", "gamma", "delta"), each = 5),
value = sample(1:100, 20),
percent = rep(c(10, 30, 30, 20, 10), 4)
)
df %>%
group_by(status) %>%
mutate(xmax = cumsum(percent), xmin = xmax - percent) %>%
group_by(gene) %>%
mutate(ytmp = value * 100 / sum(value), ymax = cumsum(ytmp), ymin = ymax - ytmp) %>%
mutate(x = (xmin + xmax) / 2, y = (ymin + ymax) / 2) %>%
ggplot() +
geom_rect(aes(xmin = xmin, ymin = ymin, xmax = xmax, ymax = ymax, fill = status),
colour = "black") +
geom_text(aes(x, y, label = paste0(round(ytmp, 2), "%"))) +
geom_text(aes(x = x, y = 103, label = gene)) +
theme(panel.background = element_blank())
马赛克图
3. 瀑布图
假设我们有如下突变数据
> data
# A tibble: 2,432 x 3
sample gene MutFunc
<chr> <chr> <chr>
1 C1803704 ATM nonsynonymous SNV
2 C1803704 BRAF nonsynonymous SNV
3 C1803704 BRCA1 synonymous SNV
4 C1803704 BRCA1 nonsynonymous SNV
5 C1803704 BRCA2 nonsynonymous SNV
6 C1803704 BRCA2 synonymous SNV
7 C1803704 BRCA2 stopgain
8 C1803704 BRD4 nonsynonymous SNV
9 C1803704 EOMES nonsynonymous SNV
10 C1803704 EPCAM nonsynonymous SNV
# … with 2,422 more rows
我们需要绘制基因在样本中的突变情况和每个样本的突变基因数情况,以及每个基因在样本中的突变情况,三个图结合在一起。
颜色表示不同的突变类型。
首先,对数据进行处理,
data <- df %>% select(sample, gene, MutFunc) %>%
distinct() %>%
filter(MutFunc != ".")
genes <- count(data, gene) %>%
top_n(n = 20, wt = n) %>%
mutate(percent = round(n * 100 / sum(n), 1)) %>%
arrange(desc(n))
samples <- subset(data, gene %in% genes$gene) %>%
count(sample) %>% arrange(desc(n)) %>%
rename(num = n)
df <- inner_join(data, genes) %>%
mutate(gene = factor(gene, levels = rev(genes$gene)),
sample = factor(sample, levels = samples$sample)) %>%
inner_join(samples)
转换为这个格式的数据
> df
# A tibble: 978 x 6
sample gene MutFunc n percent num
<chr> <fct> <chr> <int> <dbl> <int>
1 C1803704 ATM nonsynonymous SNV 16 1.6 16
2 C1803704 BRCA1 synonymous SNV 137 14 16
3 C1803704 BRCA1 nonsynonymous SNV 137 14 16
4 C1803704 BRCA2 nonsynonymous SNV 274 28 16
5 C1803704 BRCA2 synonymous SNV 274 28 16
6 C1803704 BRCA2 stopgain 274 28 16
7 C1803704 EOMES nonsynonymous SNV 31 3.2 16
8 C1803704 PIK3CA nonsynonymous SNV 39 4 16
9 C1803704 TP53 stopgain 55 5.6 16
10 C1803710 BRCA1 synonymous SNV 137 14 11
# … with 968 more rows
然后绘制这三张图
p1 <- ggplot(df) +
geom_tile(aes(x = sample, y = gene, fill = MutFunc)) +
# geom_text(aes(x = -4, y = gene, label = percent), data = genes) +
# 图例行数的调整放到 fill,单独用 guides 无效
scale_fill_discrete(guide = guide_legend(nrow = 3)) +
scale_y_discrete(position = "right") +
theme(axis.text.x = element_blank(),
axis.text.y.left = element_text(size = 4),
axis.ticks = element_blank(),
axis.title = element_blank(),
legend.position = "bottom",
legend.background = element_rect(fill = 'white', colour = 'black')
)
# 样本的突变基因数目条形图
p2 <- ggplot(df) +
geom_bar(aes(x = sample, fill = MutFunc)) +
# 使用 expand 删除数据与轴之间的空隙
scale_y_continuous(breaks = seq(0, 25, 5), limits = c(0, 25),
expand = expansion(mult = 0, add = 0)) +
theme(
legend.position = "none",
panel.background = element_blank(),
axis.title = element_blank(),
axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.length.y.left = unit(.25, "cm"),
axis.line.y.left = element_line(colour = "black"),
)
# 突变基因的突变频数条形图
p3 <- ggplot(df) +
geom_bar(aes(y = gene, fill = MutFunc)) +
scale_x_continuous(position = "top", breaks = seq(0, 280, 70),
limits = c(0, 280),
expand = expansion(mult = 0, add = 0)) +
theme(
legend.position = "none",
panel.background = element_blank(),
axis.title = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.length.x.top = unit(.25, "cm"),
axis.line.x.top = element_line(colour = "black")
)
最好将三张图拼接起来
p12 <- plot_grid(p2, p1, nrow = 2, align = "v",
rel_heights = c(1, 5))
pr <- plot_grid(NULL, p3, NULL, nrow = 3, align = "v",
rel_heights = c(0.95, 5, 1.05))
plot_grid(p12, pr, ncol = 2, align = "h",
rel_widths = c(5, 1))
瀑布图
代码:https://github.com/dxsbiocc/learn/tree/main/R/plot/color_block.R