R语言可视化2: 热图-pheatmap/ComplexHeat

2023-02-06  本文已影响0人  小程的学习笔记

1. 使用\color{green}{pheatmap}包绘制热图

1.1 基本用法

# 安装并加载所需的R包
# install.packages("pheatmap")
# library(devtools)
# install_github("raivokolde/pheatmap")
library(pheatmap)
library(RColorBrewer)

# 生成示例数据
test = matrix(rnorm(200), 20, 10)
test[1:10, seq(1, 10, 2)] = test[1:10, seq(1, 10, 2)] + 3
test[11:20, seq(2, 10, 2)] = test[11:20, seq(2, 10, 2)] + 2
test[15:20, seq(2, 10, 2)] = test[15:20, seq(2, 10, 2)] + 4
colnames(test) = paste("Test", 1:10, sep = "")
rownames(test) = paste("Gene", 1:20, sep = "")

# 简单绘图
pheatmap(test) 
pheatmap-1

1.2 其他参数设置

# 添加行和列的注释信息
annotation_col = data.frame(CellType = factor(rep(c("CT1", "CT2"), 5)), Time = 1:5)
rownames(annotation_col) = paste("Test", 1:10, sep = "")

annotation_row = data.frame(GeneClass = factor(rep(c("Path1", "Path2", "Path3"), c(10, 4, 6))))
rownames(annotation_row) = paste("Gene", 1:20, sep = "")

# 设置注释信息的颜色
ann_colors = list(
  Time = c("white", "firebrick"),
  CellType = c(CT1 = "#1B9E77", CT2 = "#D95F02"),
  GeneClass = c(Path1 = "#7570B3", Path2 = "#E7298A", Path3 = "#66A61E")
)

pheatmap(test, 
         scale = "row", # 进行均一化的方向,值为 "row", "column" 或者"none"
         # kmeans_k = 2, # 用kmean_k把列的特征聚类,2即是聚成2列
         clustering_distance_rows = "euclidean", clustering_distance_cols = "euclidean", # 表示行、列聚类使用的度量方法,默认为"euclidean",也可为  "correlation"即按照Pearson correlation方法进行聚类
         clustering_method = "complete",  # 选择聚类方法,包括:"ward", "ward.D", "ward.D2", "single", "complete", "average", "mcquitty", "median", "centroid"
         cluster_rows = T, cluster_cols = T,  # 对行或列进行聚类
         cutree_rows = NA, cutree_cols = 6, # 若进行了行、列聚类,根据行、列聚类数量分隔热图行
         # gaps_row = NULL,  #仅在未进行行聚类时使用,表示在行方向上热图的隔断位置
         # gaps_col = c(1,2,3,4,5,6))  #仅在未进行列聚类时使用,表示在列方向上热图的隔断位置
         treeheight_row = 30, treeheight_col = 30,  #对行、列聚类树高度调整 
         border_color = NA, #热图每个小的单元格边框的颜色,默认为 "grey60"
         cellwidth = 30, cellheight = 10, #单个单元格的宽度/高度,默认为 “NA”
         # display_numbers = matrix(ifelse(test > 1, "***", ""), nrow = nrow(test)), #使用"display_numbers" 根据热图单元格的数值进行标记,若该单元格原始数值大于1,则为 "***",否则为 " ";
         display_numbers = T, #是否在单元格上显示原始数值或按照特殊条件进行区分标记,
         fontsize_number = 6, #热图上显示数字的字体大小
         number_format = "%.2f", #热图单元格上显示的数据格式,“%.2f” 表示两位小数,"%.1e"表示科学计数法
         number_color = "grey30", #修改热图单元格上显示的数据字体颜色
         show_rownames = F, show_colnames = T, #是否显示行名、列名
         main = "Example heatmap", #表示热图的标题名字
         color = colorRampPalette(c("#2E99B0","#FCD77F","#FF2E4C"))(50), #表示热图颜色,(50)表示50个等级
         legend = T, #表示是否显示图例,值为TRUE或FALSE
         legend_breaks = NA, #设置图例的范围legend_breaks=c(-2.5,0,2.5)表示图例断点的设置,默认为NA,
         legend_labels = NA, #表示图例断点的标签
         angle_col = "45", #表示列标签的角度
         annotation_col = annotation_col, annotation_row = annotation_row, # 添加行和列的注释信息
         annotation_colors = ann_colors # 修改注释的颜色
) 
pheatmap-2

2. 使用\color{green}{ComplexHeatmap}包绘制热图

2.1 基本用法

# 安装并加载所需的R包
# if (!requireNamespace("BiocManager", quietly=TRUE))
#   install.packages("BiocManager")
# BiocManager::install("ComplexHeatmap")
# library(devtools)
# install_github("jokergoo/ComplexHeatmap")

# 生成示例数据
nr1 = 4; nr2 = 8; nr3 = 6; nr = nr1 + nr2 + nr3
nc1 = 6; nc2 = 8; nc3 = 10; nc = nc1 + nc2 + nc3
mat = cbind(rbind(matrix(rnorm(nr1*nc1, mean = 1,   sd = 0.5), nr = nr1),
                  matrix(rnorm(nr2*nc1, mean = 0,   sd = 0.5), nr = nr2),
                  matrix(rnorm(nr3*nc1, mean = 0,   sd = 0.5), nr = nr3)),
            rbind(matrix(rnorm(nr1*nc2, mean = 0,   sd = 0.5), nr = nr1),
                  matrix(rnorm(nr2*nc2, mean = 1,   sd = 0.5), nr = nr2),
                  matrix(rnorm(nr3*nc2, mean = 0,   sd = 0.5), nr = nr3)),
            rbind(matrix(rnorm(nr1*nc3, mean = 0.5, sd = 0.5), nr = nr1),
                  matrix(rnorm(nr2*nc3, mean = 0.5, sd = 0.5), nr = nr2),
                  matrix(rnorm(nr3*nc3, mean = 1,   sd = 0.5), nr = nr3))
)
mat = mat[sample(nr, nr), sample(nc, nc)] # random shuffle rows and columns
rownames(mat) = paste0("row", seq_len(nr))
colnames(mat) = paste0("column", seq_len(nc))

# 简单绘图
Heatmap(mat)
ComplexHeatmap-1

2.2 单个热图

# 生成颜色映射
library(circlize)
col_fun = colorRamp2(c(-2, 0, 2), c("green", "white", "red"))

# 渲染树状图
library(dendextend)
row_dend = as.dendrogram(hclust(dist(mat)))
row_dend = color_branches(row_dend, k = 2) # `color_branches()` returns a dendrogram object

Heatmap(mat, name = "mat", col = col_fun, # 修改热图配色
        border_gp = gpar(col = "black", lty = 2), # 控制热图网格的边框配色及形状
        column_title = "I am a column title",  row_title = "I am a row title", # 按行或/和按列设置标题
        column_title_side = "bottom", row_title_side = "left", # 行/列标题的位置
        column_title_gp = gpar(fontsize = 20, fontface = "bold"), # 行/列标题的大小
        row_title_rot = 90, column_title_rot = 0, # 标题的旋转
        column_dend_height = unit(2, "cm"), row_dend_width = unit(2, "cm"), # 树状图的高度
        # row_dend_gp = gpar(col = "red")
        cluster_rows = row_dend, # 对树状图进行更自定义的可视化
        row_names_side = "left", column_names_side = "bottom", # 行/列名的位置
        row_dend_side = "right",column_dend_side = "top", # 树状图的位置
        row_names_gp = gpar(fontsize = 10, col = c(rep("red", 10), rep("blue", 8))), # 行/列字体的大小及配色
        column_names_rot = 45, #行/列名的旋转
        # column_km = 3, # 通过k均值进行行/列拆分
        column_split = rep(c("A", "B", "C"), c(6, 8, 10)), # 按分类变量拆分
        column_gap = unit(c(4, 2), "mm") # 切片之间的间隙
)  
ComplexHeatmap-2

2.3 添加注释

# 使用HeatmapAnnotation函数添加待注释的内容
library(circlize)
col_fun = colorRamp2(c(0, 5, 10), c("green", "white", "red"))

column_ha <- HeatmapAnnotation(foo = runif(24),
                               foo1 = anno_lines(runif(24), smooth = TRUE), # 添加线条标注 
                               bar1 = anno_barplot(runif(24), bar_width = 1, gp = gpar(fill = 1:24),  # 添加条形图注释,设置宽度,调整颜色
                                                   seq(-5, 5), baseline = 0, add_numbers = F), # 设置基线,不显示数据
                               box1 = anno_boxplot(runif(24), height = unit(1, "cm"), gp = gpar(fill = 1:24), # 添加箱线图标注
                                                   box_width = 0.9, outline = FALSE), # 调整箱线图宽度,及是否显示离群值
                               col = list(foo = col_fun), # 对于连续值,调整颜色
                               simple_anno_size = unit(1, "cm"), # 调整注释的高度
                               gp = gpar(col = "black") # 调整注释的网格边框
                               )

row_ha <- HeatmapAnnotation(foo2 = runif(18), bar2 = anno_barplot(runif(18)), which = "row") # 或使用`rowAnnotation()`

Heatmap(mat, top_annotation = column_ha,  right_annotation = row_ha,
        row_km = 3 # 用km把行分为3列
)
ComplexHeatmap-3

3. 使用\color{green}{HeatmapR}包绘制热图

3.1 基本用法

# 安装并加载所需的R包
# devtools::install_github("DillonHammill/HeatmapR")
library(HeatmapR)

# 使用heat_map_save()导出图像
heat_map_save("HeatmapR-1.png", height = 9.5, width = 5.7, res = 500)

heat_map(mtcars,
         cell_shape = "rect", # 修改单元格形状,可设置为“rect”(默认)、“circle”或“diamond”
         cell_size = TRUE, # 根据输入矩阵中的值缩放每个单元格的大小
         legend = TRUE, # 控制要显示的图例类型,包括"colour","shape"或"both"
         legend_size = 1.5, # 调整图例可用的空间
         legend_col_scale_size = 1.2, # 控制色标图例的大小
         legend_title = c("colour", "size"), # 显示的标题和文本
         title = "Legends"
)
heat_map_complete()
HeatmapR-1

3.2 其他参数设置

heat_map_save("HeatmapR-2.png", height = 9.5, width = 6.7, res = 500)

heat_map(
  mtcars,
  cell_border_line_type = 3, cell_border_line_width = 2, cell_border_line_col = "red", cell_border_line_col_alpha = 1, # 调节单元格的边框
  cell_text = TRUE, cell_text_size = 0.6, cell_text_font = 2, # 显示单元格数据,并调整字体(1 by default)及大小(1 by default)
  cell_text_col = "black", cell_text_col_alpha = 1, # 调整单元格数据的颜色("white" by default),及透明度
  title = "mtcars", # 设置标题
  scale = "column", # 缩放行或列的数据,以看出较小的差距
  scale_method = "range", # 选择缩放的方法:"range" (by default), "mean" or "zscore"
  bar_values_x = 1:ncol(mtcars), bar_size_x = 0.8, bar_fill_x = rainbow(ncol(mtcars)), bar_line_col_x = "black", # 在热图中添加条形图
  bar_values_y = 1:nrow(mtcars), bar_size_y = 0.8, bar_fill_y = rainbow(ncol(mtcars)), bar_line_col_y = "black",
  dist_method = "euclidean", clust_method = "complete",
  tree_x = TRUE, tree_size_x = 0.4, # 将树状图添加到x轴
  tree_cut_x = 4, tree_split_x = 1, # 选择性割任x轴上的树状图
  tree_y = TRUE, tree_size_y = 0.8, # 将树状图添加到y轴
  tree_cut_y = 5, tree_split_y = 1, # 选择性割任y轴上的树状图
)

heat_map_complete()
HeatmapR-2

4. 使用\color{green}{funkyheatmap}包绘制热图

4.1 基本用法

# 安装并加载所需的R包
# install.packages("funkyheatmap")
library(funkyheatmap)
library(dplyr, warn.conflicts = FALSE)
library(tibble, warn.conflicts = FALSE)

funky_heatmap(mtcars)
funkyheatmap-1

4.2 其他参数设置

data("dynbenchmark_data") # 使用dynbenchmark_data的数据
data <- dynbenchmark_data

row_info <- data$data %>% select(group, id) # 选择待展示的行(此处即为id列各种方法)

row_groups <- data$data %>% transmute(group, Group = case_when(
  group == "cycle" ~ "Cyclic methods", TRUE ~ paste0(stringr::str_to_title(group), " methods"))) %>%
  unique()

# 定义需要展示的列以及对应的一些属性信息
column_info <- tribble( # tribble_start
  ~group,                   ~id,                                            ~name,                      ~geom,        ~palette,         ~options,
  "method_characteristic",  "method_name",                                  "",                         "text",       NA,               list(hjust = 0, width = 6),
  "method_characteristic",  "method_platform",                              "Platform",                 "text",       NA,               list(width = 2),
  "method_characteristic",  "method_topology_inference",                    "Topology inference",       "text",       NA,               list(width = 2),
  "score_overall",          "summary_overall_overall",                      "Overall",                  "bar",        "overall",        list(width = 4, legend = FALSE),
  "score_overall",          "benchmark_overall_overall",                    "Accuracy",                 "bar",        "benchmark",      list(width = 4, legend = FALSE),
  "score_overall",          "qc_overall_overall",                           "Usability",                "bar",        "qc",             list(width = 4, legend = FALSE),
  "score_overall",          "control_label",                                "",                         "text",       NA,               list(overlay = TRUE),
  "benchmark_metric",       "benchmark_overall_norm_him",                   "Topology",                 "funkyrect",  "benchmark",      lst(),
  "benchmark_metric",       "benchmark_overall_norm_F1_branches",           "Branch assignment",        "funkyrect",  "benchmark",      lst(),
  "benchmark_metric",       "benchmark_overall_norm_correlation",           "Cell positions",           "funkyrect",  "benchmark",      lst(),
  "benchmark_metric",       "benchmark_overall_norm_featureimp_wcor",       "Features",                 "funkyrect",  "benchmark",      lst(),
  "benchmark_source",       "benchmark_source_real_gold",                   "Gold",                     "funkyrect",  "benchmark",      lst(),
  "benchmark_source",       "benchmark_source_real_silver",                 "Silver",                   "funkyrect",  "benchmark",      lst(),
  "benchmark_source",       "benchmark_source_synthetic_dyngen",            "dyngen",                   "funkyrect",  "benchmark",      lst(),
  "benchmark_source",       "benchmark_source_synthetic_dyntoy",            "dyntoy",                   "funkyrect",  "benchmark",      lst(),
  "benchmark_source",       "benchmark_source_synthetic_prosstt",           "PROSSTT",                  "funkyrect",  "benchmark",      lst(),
  "benchmark_source",       "benchmark_source_synthetic_splatter",          "Splatter",                 "funkyrect",  "benchmark",      lst(),
  "benchmark_execution",    "benchmark_overall_pct_errored_str",            "% Errored",                "text",       NA,               lst(hjust = 1),
  "benchmark_execution",    "benchmark_overall_error_reasons",              "Reason",                   "pie",        "error_reasons",  lst(),
  "scaling_predtime",       "scaling_pred_scoretime_cells1m_features100",   "1m \u00D7 100",            "rect",       "scaling",        lst(scale = FALSE),
  "scaling_predtime",       "scaling_pred_scoretime_cells1m_features100",   "",                         "text",       "white6black4",   lst(label = "scaling_pred_timestr_cells1m_features100", overlay = TRUE, size = 3, scale = FALSE),
  "scaling_predtime",       "scaling_pred_scoretime_cells100k_features1k",  "100k \u00D7 1k",           "rect",       "scaling",        lst(scale = FALSE),
  "scaling_predtime",       "scaling_pred_scoretime_cells100k_features1k",  "",                         "text",       "white6black4",   lst(label = "scaling_pred_timestr_cells100k_features1k", overlay = TRUE, size = 3, scale = FALSE),
  "scaling_predtime",       "scaling_pred_scoretime_cells10k_features10k",  "10k \u00D7 10k",           "rect",       "scaling",        lst(scale = FALSE),
  "scaling_predtime",       "scaling_pred_scoretime_cells10k_features10k",  "",                         "text",       "white6black4",   lst(label = "scaling_pred_timestr_cells10k_features10k", overlay = TRUE, size = 3, scale = FALSE),
  "scaling_predtime",       "scaling_pred_scoretime_cells1k_features100k",  "1k \u00D7 100k",           "rect",       "scaling",        lst(scale = FALSE),
  "scaling_predtime",       "scaling_pred_scoretime_cells1k_features100k",  "",                         "text",       "white6black4",   lst(label = "scaling_pred_timestr_cells1k_features100k", overlay = TRUE, size = 3, scale = FALSE),
  "scaling_predtime",       "scaling_pred_scoretime_cells100_features1m",   "100 \u00D7 1m",            "rect",       "scaling",        lst(scale = FALSE),
  "scaling_predtime",       "scaling_pred_scoretime_cells100_features1m",   "",                         "text",       "white6black4",   lst(label = "scaling_pred_timestr_cells100_features1m", overlay = TRUE, size = 3, scale = FALSE),
  "scaling_predtime",       "benchmark_overall_time_predcor_str",           "Cor. pred. vs. real",      "text",       NA,               lst(size = 3),
  "stability",              "stability_him",                                "Topology",                 "funkyrect",  "stability",      lst(),
  "stability",              "stability_F1_branches",                        "Branch assignment",        "funkyrect",  "stability",      lst(),
  "stability",              "stability_correlation",                        "Cell positions",           "funkyrect",  "stability",      lst(),
  "stability",              "stability_featureimp_wcor",                    "Features",                 "funkyrect",  "stability",      lst(),
  "qc_category",            "qc_cat_availability",                          "Availability",             "funkyrect",  "qc",             lst(),
  "qc_category",            "qc_cat_behaviour",                             "Behaviour",                "funkyrect",  "qc",             lst(),
  "qc_category",            "qc_cat_code_assurance",                        "Code assurance",           "funkyrect",  "qc",             lst(),
  "qc_category",            "qc_cat_code_quality",                          "Code quality",             "funkyrect",  "qc",             lst(),
  "qc_category",            "qc_cat_documentation",                         "Documentation",            "funkyrect",  "qc",             lst(),
  "qc_category",            "qc_cat_paper",                                 "Paper",                    "funkyrect",  "qc",             lst(),
  "qc_category",            "control_label",                                "",                         "text",       NA,               list(overlay = TRUE, width = -6)
)

# 将上述column_info的列,根据对应的group ,设置group的“Category”和大一级的Experiment信息
column_groups <- tribble(
  ~Experiment,    ~Category,                                      ~group,                   ~palette,
  "Method",       "\n",                                           "method_characteristic",  "overall",
  "Summary",      "Aggregated scores per experiment",             "score_overall",          "overall",
  "Accuracy",     "Per metric",                                   "benchmark_metric",       "benchmark",
  "Accuracy",     "Per dataset source",                           "benchmark_source",       "benchmark",
  "Accuracy",     "Errors",                                       "benchmark_execution",    "benchmark",
  "Scalability",  "Predicted time\n(#cells \u00D7 #features)",    "scaling_predtime",       "scaling",
  "Stability",    "Similarity\nbetween runs",                     "stability",              "stability",
  "Usability",    "Quality of\nsoftware and paper",               "qc_category",            "qc"
)

# 设置不同palette的颜色
error_reasons <- tibble(
  name = c("pct_memory_limit", "pct_time_limit", "pct_execution_error", "pct_method_error"),
  label = c("Memory limit exceeded", "Time limit exceeded", "Execution error", "Method error"),
  colour = RColorBrewer::brewer.pal(length(name), "Set3")
)

palettes <- tribble(
  ~palette,             ~colours,
  "overall",            grDevices::colorRampPalette(rev(RColorBrewer::brewer.pal(9, "Greys")[-1]))(101),
  "benchmark",          grDevices::colorRampPalette(rev(RColorBrewer::brewer.pal(9, "Blues") %>% c("#011636")))(101),
  "scaling",            grDevices::colorRampPalette(rev(RColorBrewer::brewer.pal(9, "Reds")[-8:-9]))(101),
  "stability",          grDevices::colorRampPalette(rev(RColorBrewer::brewer.pal(9, "YlOrBr")[-7:-9]))(101),
  "qc",                 grDevices::colorRampPalette(rev(RColorBrewer::brewer.pal(9, "Greens")[-1] %>% c("#00250f")))(101),
  "error_reasons",      error_reasons %>% select(label, colour) %>% deframe(),
  "white6black4",       c(rep("white", 3), rep("black", 7))
)


g <- funky_heatmap(
  data = data$data,
  column_info = column_info,
  column_groups = column_groups,
  row_info = row_info,
  row_groups = row_groups,
  palettes = palettes,
  col_annot_offset = 3.2
)

ggsave("/Users/shumin/Desktop/funkyheatmap-2.png", g, device = png, width = g$width, height = g$height)
funkyheatmap-2

参考:

  1. https://github.com/jokergoo/ComplexHeatmap
  2. https://github.com/DillonHammill/HeatmapR
  3. funkyheatmap |临床+组学+分组数据可视化“神器”,时髦的热图(http://www.360doc.com/content/23/0110/08/65403234_1063150291.shtml
上一篇下一篇

猜你喜欢

热点阅读