R语言做生信基因组数据绘图R

complexheatmap学习——oncoplot

2019-07-04  本文已影响41人  drlee_fc74

本文是对于ComplexHeatmap作者说明的学习笔记。如果想看原文可去:https://jokergoo.github.io/ComplexHeatmap-reference/book/a-list-of-heatmaps.html

oncoplot是对于基因组改变(copy该表或者突变)可视化的一种方式。很多工具都通过了绘制oncoplot的方式。例如maftools。在complexheatmap当中也提供了oncoPlot函数用于绘制类似的图。相较于其他的工具。这个函数好的一点在于其可以和Heatmap图连用绘制oncoplot + heatmap形式的图。

基本的设定

数据输入的形式

oncoplot作图的时候,需要输入的形式是一个包含基因在每个样本中改变的形式。

library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.0.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite:
## Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##   genomic data. Bioinformatics 2016.
## ========================================
mat = read.table(textConnection(
"s1,s2,s3
g1,snv;indel,snv,indel
g2,,snv;indel,snv
g3,snv,,indel;snv"), row.names = 1, header = TRUE, sep = ",", stringsAsFactors = FALSE)
mat = as.matrix(mat)
mat
##    s1          s2          s3         
## g1 "snv;indel" "snv"       "indel"    
## g2 ""          "snv;indel" "snv"      
## g3 "snv"       ""          "indel;snv"

通过上面的例子,我们可以看到。每个数据基因在不同样本当中如果基因组发生改变就标注为什么改变。如果没有的话,那么就是空白。oncoplot会自动识别以;:,|为分隔符的不同变异。

alter_fun自定义变异如何显示

alter_fun可以自定义不同的变异通过什么样子来进行显示。这个函数包括四个参数x,y,w,h分别代表变异的位置(x,y)以及高度(h)和宽度(w)。我们在得到所有变异的类型之后。在alter_fun函数里面要定义所有的变化特点。

col = c(snv = "red", indel = "blue")
oncoPrint(mat,
    alter_fun = list(
        snv = function(x, y, w, h) grid.rect(x, y, w*0.9, h*0.9, 
            gp = gpar(fill = col["snv"], col = NA)),
        indel = function(x, y, w, h) grid.rect(x, y, w*0.9, h*0.4, 
            gp = gpar(fill = col["indel"], col = NA))
    ), col = col)
## All mutation types: snv, indel
image.png
oncoPrint(mat,
    alter_fun = function(x, y, w, h, v) {
        n = sum(v)  # how many alterations for current gene in current sample
        h = h*0.9
        # use `names(which(v))` to correctly map between `v` and `col`
        if(n) grid.rect(x, y - h*0.5 + 1:n/n*h, w*0.9, 1/n*h, 
            gp = gpar(fill = col[names(which(v))], col = NA), just = "top")
    }, col = col)
## All mutation types: snv, indel
image.png

背景设置

alter_fun当中不止可以设置各个变异的参数。同时也可以设置背景的颜色。默认的是没有背景颜色的。我们可以进行进一步的设置。

oncoPrint(mat,
    alter_fun = list(
        background = function(x, y, w, h) grid.rect(x, y, w, h, 
            gp = gpar(fill = "#00FF0020")),
        snv = function(x, y, w, h) grid.rect(x, y, w*0.9, h*0.9, 
            gp = gpar(fill = col["snv"], col = NA)),
        indel = function(x, y, w, h) grid.rect(x, y, w*0.9, h*0.4, 
            gp = gpar(fill = col["indel"], col = NA))
    ), col = col)
## All mutation types: snv, indel
image.png

如果不想要背景的话,可以通过background = function(...) NULL,来设置为没有背景。

更加复杂的设定

有时候我们不只是想要显示不同的颜色。同时还想要在不同变异上面进行标记。例如下面这个

set.seed(123)
x1 = sample(c("", "snv"), 100, replace = TRUE, prob = c(8, 2))
x2 = sample(c("", "indel"), 100, replace = TRUE, prob = c(8, 2))
x2[x1 == "snv"] = ""
x3 = sample(c("", "intronic"), 100, replace = TRUE, prob = c(5, 5))
x4 = sample(c("", "exonic"), 100, replace = TRUE, prob = c(5, 5))
x3[x1 == "" & x2 == ""] = ""
x4[x1 == "" & x2 == ""] = ""
x4[x3 == "intronic"] = ""
x = apply(cbind(x1, x2, x3, x4), 1, function(x) {
    x = x[x != ""]
    paste(x, collapse = ";")
})
m = matrix(x, nrow = 10, ncol = 10, dimnames = list(paste0("g", 1:10), paste0("s", 1:10)))
alter_fun = list(
    background = function(x, y, w, h) 
        grid.rect(x, y, w*0.9, h*0.9, gp = gpar(fill = "#CCCCCC", col = NA)),
    # red rectangles
    snv = function(x, y, w, h) 
        grid.rect(x, y, w*0.9, h*0.9, gp = gpar(fill = "red", col = NA)),
    # blue rectangles
    indel = function(x, y, w, h) 
        grid.rect(x, y, w*0.9, h*0.9, gp = gpar(fill = "blue", col = NA)),
    # dots
    intronic = function(x, y, w, h) 
        grid.points(x, y, pch = 16),
    # crossed lines
    exonic = function(x, y, w, h) {
        grid.segments(x - w*0.4, y - h*0.4, x + w*0.4, y + h*0.4, gp = gpar(lwd = 2))
        grid.segments(x + w*0.4, y - h*0.4, x - w*0.4, y + h*0.4, gp = gpar(lwd = 2))
    }
)
ht = oncoPrint(m, alter_fun = alter_fun, col = c(snv = "red", indel = "blue"))
## All mutation types: snv, intronic, indel, exonic
## Colors are not defined for: intronic, exonic
draw(ht, heatmap_legend_list = list(
    Legend(labels = c("intronic", "exonic"), type = "points", pch = c(16, 28))
))
image.png
m[1:4, 1:4]
##    s1    s2             s3             s4            
## g1 ""    "snv;intronic" "snv;intronic" "snv"         
## g2 ""    ""             ""             "snv;intronic"
## g3 ""    ""             ""             ""            
## g4 "snv" "indel;exonic" "snv"          ""

函数的其他参数

show_column_names可以用来定义是否显示列名。 row_names_side定义行名的位置 pct_side定义突变百分比的位置 pct_digits定义突变个数百分比的小数点个数 anno_oncoprint_barplot调整上面和有面barplot的具体参数 heatmap_legend_param定义图例的变化

oncoPrint(mat, alter_fun = alter_fun, col = col, 
    top_annotation = HeatmapAnnotation(
        cbar = anno_oncoprint_barplot(height = unit(1, "cm"))),
    right_annotation = rowAnnotation(
        rbar = anno_oncoprint_barplot(
            width = unit(4, "cm"),
            axis_param = list(at = c(0, 2, 4),
                labels = c("zero", "two", "four"),
                side = "top",
                labels_rot = 0))),
    )
## All mutation types: snv, indel
image.png

如果我们想把右边的图放到左边的话,则可以

oncoPrint(mat, alter_fun = alter_fun, col = col, 
    left_annotation =  rowAnnotation(
        rbar = anno_oncoprint_barplot(
            axis_param = list(direction = "reverse")
    )),
    right_annotation = NULL)
## All mutation types: snv, indel
image.png

具体绘图演示

我们使用TCGA数据库当中一部分突变数据来进行绘图。首先加载数据

mat = read.table(system.file("extdata", package = "ComplexHeatmap", 
    "tcga_lung_adenocarcinoma_provisional_ras_raf_mek_jnk_signalling.txt"), 
    header = TRUE, stringsAsFactors = FALSE, sep = "\t")
mat[is.na(mat)] = ""
rownames(mat) = mat[, 1]
mat = mat[, -1]
mat=  mat[, -ncol(mat)]
mat = t(as.matrix(mat))
mat[1:3, 1:3]
##      TCGA-05-4384-01 TCGA-05-4390-01 TCGA-05-4425-01
## KRAS "  "            "MUT;"          "  "           
## HRAS "  "            "  "            "  "           
## BRAF "  "            "  "            "  "

我们发现数据当中存在三种变异分别是HOMDEL, AMP and MUT所以,我们对三种变异进行定义

col = c("HOMDEL" = "blue", "AMP" = "red", "MUT" = "#008000")
alter_fun = list(
    background = function(x, y, w, h) {
        grid.rect(x, y, w-unit(0.5, "mm"), h-unit(0.5, "mm"), 
            gp = gpar(fill = "#CCCCCC", col = NA))
    },
    # big blue
    HOMDEL = function(x, y, w, h) {
        grid.rect(x, y, w-unit(0.5, "mm"), h-unit(0.5, "mm"), 
            gp = gpar(fill = col["HOMDEL"], col = NA))
    },
    # bug red
    AMP = function(x, y, w, h) {
        grid.rect(x, y, w-unit(0.5, "mm"), h-unit(0.5, "mm"), 
            gp = gpar(fill = col["AMP"], col = NA))
    },
    # small green
    MUT = function(x, y, w, h) {
        grid.rect(x, y, w-unit(0.5, "mm"), h*0.33, 
            gp = gpar(fill = col["MUT"], col = NA))
    }
)
column_title = "OncoPrint for TCGA Lung Adenocarcinoma, genes in Ras Raf MEK JNK signalling"
heatmap_legend_param = list(title = "Alternations", at = c("HOMDEL", "AMP", "MUT"), 
        labels = c("Deep deletion", "Amplification", "Mutation"))
oncoPrint(mat,
    alter_fun = alter_fun, col = col, 
    column_title = column_title, heatmap_legend_param = heatmap_legend_param)
## All mutation types: MUT, AMP, HOMDEL
image.png

上图我们可以看到有很多样本都没有突变, 这个时候我们就需要去掉这些没有突变的样本。remove_empty_columnsremove_empty_rows可以去掉没有突变的样本或者基因

oncoPrint(mat,
    alter_fun = alter_fun, col = col, 
    remove_empty_columns = TRUE, remove_empty_rows = TRUE,
    column_title = column_title, heatmap_legend_param = heatmap_legend_param)
## All mutation types: MUT, AMP, HOMDEL
image.png

另外我们可以通过row_order以及column_order来调整分布的顺序

oncoplot注释

我们可以通过anno_oncoprint_barplot来自定义注释的内容。

oncoPrint(mat,
    alter_fun = alter_fun, col = col, 
    top_annotation = HeatmapAnnotation(
        column_barplot = anno_oncoprint_barplot("MUT", border = TRUE, # only MUT
            height = unit(4, "cm"))),
    right_annotation = rowAnnotation(
        row_barplot = anno_oncoprint_barplot(c("AMP", "HOMDEL"),  # only AMP and HOMDEL
            border = TRUE, height = unit(4, "cm"), 
            axis_param = list(side = "bottom", labels_rot = 90))),
    remove_empty_columns = TRUE, remove_empty_rows = TRUE,
    column_title = column_title, heatmap_legend_param = heatmap_legend_param)
## All mutation types: MUT, AMP, HOMDEL
image.png

另外由于oncoplot本身就是热图。所以我们可以在原始的基础上添加别的注释

oncoPrint(mat,
    alter_fun = alter_fun, col = col, 
    remove_empty_columns = TRUE, remove_empty_rows = TRUE,
    top_annotation = HeatmapAnnotation(cbar = anno_oncoprint_barplot(),
        foo1 = 1:172,
        bar1 = anno_points(1:172)),
    left_annotation = rowAnnotation(foo2 = 1:26),
    right_annotation = rowAnnotation(bar2 = anno_barplot(1:26)),
    column_title = column_title, heatmap_legend_param = heatmap_legend_param)
## All mutation types: MUT, AMP, HOMDEL
image.png

oncplot和heatmap连接

ht_list = oncoPrint(mat,
    alter_fun = alter_fun, col = col, 
    column_title = column_title, heatmap_legend_param = heatmap_legend_param) +
Heatmap(matrix(rnorm(nrow(mat)*10), ncol = 10), name = "expr", width = unit(4, "cm"))
## All mutation types: MUT, AMP, HOMDEL
draw(ht_list, row_split = sample(c("a", "b"), nrow(mat), replace = TRUE))
image.png
ht_list = oncoPrint(mat,
    alter_fun = alter_fun, col = col, 
    column_title = column_title, heatmap_legend_param = heatmap_legend_param) %v%
Heatmap(matrix(rnorm(ncol(mat)*10), nrow = 10), name = "expr", height = unit(4, "cm"))
## All mutation types: MUT, AMP, HOMDEL
draw(ht_list)
image.png
上一篇下一篇

猜你喜欢

热点阅读