如果有maf格式的文件,可以直接oncoplot包绘制瀑布图,有多种展示和统计maftools | 从头开始绘制发表级oncoplot(瀑布图)和maftools|TCGA肿瘤突变数据的汇总,分析和可视化,如果只有多个样本的基因突变与否的excel,不用担心,也可以用complexheatmap包绘制。
mat: The value should be a character matrix which encodes mulitple alterations or a list of matrices for which every matrix contains binary value representing whether the alteration is present or absent. When the value is a list, the names of the list represent alteration types. You can use unify_mat_list to make all matrix having same row names and column names.
alter_fun: A single function or a list of functions which defines how to add graphics for different alterations. You can use alter_graphic to automatically generate for rectangles and points.
alter_fun_is_vectorized:Whether alter_fun is implemented vectorized. Internally the function will guess.
col:A vector of color for which names correspond to alteration types.
top_annotation:Annotation put on top of the oncoPrint. By default it is barplot which shows the number of genes with a certain alteration in each sample.
right_annotation:Annotation put on the right of the oncoPrint. By default it is barplot which shows the number of samples with a certain alteration in each gene.
left_annotation:Annotation put on the left of the oncoPrint.
bottom_annotation:Annotation put at the bottom of the oncoPrint.
heatmap_legend_param:pass to Heatmap.
1. 软件包安装
if (!require(maftools)) BiocManager::install("maftools")
if (!require(ComplexHeatmap)) BiocManager::install("ComplexHeatmap")
2. 数据读取
举例的数据就是LAML的数据集,在TCGA数据库上也有,但是我这里是使用maftools自带的例子数据,方便一些,获取突变矩阵只需要在oncoplot函数设置参数writeMatrix = TRUE,就会自动生成一个文件名为“onco_matrix.txt”的突变矩阵文件了,简单吧,不过前提条件是我们有maf格式文件,如果没有自己搞一个突变矩阵吧,行名为基因,列名为样本即可。
laml.maf = system.file("extdata", "tcga_laml.maf.gz", package = "maftools")
# clinical information containing survival information and histology. This is
# optional
laml.clin = system.file("extdata", "tcga_laml_annot.tsv", package = "maftools")
laml = read.maf(maf = laml.maf, clinicalData = laml.clin)
## -Reading
## -Validating
## -Silent variants: 475
## -Summarizing
## -Processing clinical data
## -Finished in 3.190s elapsed (0.450s cpu)
matMut <- read.table("onco_matrix.txt", header = T, check.names = F, sep = "\t")
matMut[1:3, 1:3]
## TCGA-AB-2945 TCGA-AB-2965 TCGA-AB-2993
## FLT3 Missense In-frame In-frame
## DNMT3A Missense Missense Truncating
## NPM1 Truncating Truncating Truncating
matMut[matMut == "In-frame"] = "In_frame"
matMuttmp = matMut
matMuttmp$gene = row.names(matMuttmp)
mat_long <- melt(matMuttmp, id.vars = "gene", value.name = "Variant_Classification")
## [1] "" "In_frame" "Missense" "Multi_Hit" "Truncating"
1. 临床数据整理
pdata <- getClinicalData(laml)
pdata <- subset(pdata, pdata$Tumor_Sample_Barcode %in% colnames(matMut))
pdata = as.data.frame(pdata)
pdata$days_to_last_followup = ifelse(pdata$days_to_last_followup == "-Inf", 0, pdata$days_to_last_followup)
# 画图并去除无突变的样本和基因
pdata$days_to_last_followup = as.numeric(pdata$days_to_last_followup)
pdata$FAB_classification = factor(pdata$FAB_classification)
pdata$Overall_Survival_Status = factor(pdata$Overall_Survival_Status)
## 'data.frame': 164 obs. of 4 variables:
## $ Tumor_Sample_Barcode : chr "TCGA-AB-2802" "TCGA-AB-2804" "TCGA-AB-2805" "TCGA-AB-2806" ...
## $ FAB_classification : Factor w/ 8 levels "M0","M1","M2",..: 5 4 1 2 2 3 4 3 3 5 ...
## $ days_to_last_followup : num 365 2557 577 945 181 ...
## $ Overall_Survival_Status: Factor w/ 2 levels "0","1": 2 1 2 2 2 1 2 2 2 2 ...
matMut <- matMut[, pdata$Tumor_Sample_Barcode]
2. 指定变异形状
alter_fun <- list(
background = function(x, y, w, h) {
grid.rect(x, y, w-unit(0.5, "mm"), h-unit(0.5, "mm"),
gp = gpar(fill = "white", col = NA))
In_frame = function(x, y, w, h) {
grid.rect(x, y, w-unit(0.5, "mm"), h-unit(0.5, "mm"),
gp = gpar(fill = col["In_frame"], col = NA))
Missense = function(x, y, w, h) {
grid.rect(x, y, w-unit(0.5, "mm"), h-unit(0.5, "mm"),
gp = gpar(fill = col["Missense"], col = NA))
Multi_Hit = function(x, y, w, h) {
grid.rect(x, y, w-unit(0.5, "mm"), h-unit(0.5, "mm"),
gp = gpar(fill = col["Multi_Hit"], col = NA))
Truncating = function(x, y, w, h) {
grid.rect(x, y, w-unit(0.5, "mm"), h-unit(0.5, "mm"),
gp = gpar(fill = col["Truncating"], col = NA))
# Splice_Site = function(x, y, w, h) {
# grid.rect(x, y, w-unit(0.5, "mm"),h-unit(0.5, "mm"),
# gp = gpar(fill = col["Splice_Site"], col = NA))
3. 指定变异类型的标签,和数据中的类型对应
heatmap_legend_param <- list(title = "Alternations", at = c("In_frame", "Missense",
"Truncating", "Multi_Hit"), labels = c("In_frame", "Missense", "Truncating",
4. 指定颜色
# 指定颜色, 调整颜色代码即可
col <- c(In_frame = "purple", Missense = "orange", Multi_Hit = "black", Truncating = "blue")
# 定义注释信息 自定义颜色 连续性变量设置颜色(外)
col_OS = colorRamp2(c(0, 973), c("white", "red"))
5. 设置样本注释
ha <- HeatmapAnnotation(OS = pdata$days_to_last_followup, Status = pdata$Overall_Survival_Status,
FAB_classification = pdata$FAB_classification, col = list(OS = col_OS), show_annotation_name = TRUE,
annotation_name_gp = gpar(fontsize = 7))
6. 设定标题
column_title <- "This is Oncoplot "
7. 简单瀑布图
oncoPrint(matMut, alter_fun = alter_fun, col = col, alter_fun_is_vectorized = FALSE)

8. 添加注释
bottom_annotation = ha, #注释信息在底部
# top_annotation=top_annotation,
alter_fun = alter_fun,
col = col,
column_title = column_title,
heatmap_legend_param = heatmap_legend_param,
row_names_side = "left",
pct_side = "right",
# column_order=sample_order,
# column_split=3
alter_fun_is_vectorized = FALSE

9. 调整注释的位置
oncoplot_anno <- oncoPrint(matMut,
bottom_annotation = ha, #注释信息在底部
# top_annotation=top_annotation,
alter_fun = alter_fun,
col = col,
column_title = "",
heatmap_legend_param = heatmap_legend_param,
row_names_side = "left",
pct_side = "right",
# column_order=sample_order,
# column_split=3
alter_fun_is_vectorized = FALSE
draw(oncoplot_anno, annotation_legend_side = "left", )

draw(oncoplot_anno, annotation_legend_side = "left", heatmap_legend_side = "right")

draw(oncoplot_anno, annotation_legend_side = "right", heatmap_legend_side = "left",

draw(oncoplot_anno, annotation_legend_side = "right", heatmap_legend_side = "bottom",
align_heatmap_legend = "global_center")

