TCGA-15.突变数据分析-mutation signafit
2020-01-29 本文已影响0人
小洁忘了怎么分身
花花写于2020-01-29 今天大年初四,在家各种姿势打滚,实在是无聊,遂决定起来学习。经评测,发明了最舒服的边休息边工作姿势,大概就是:
0.准备R包
懒得写安装代码啦,你长大了,要学会自己安装,参考https://www.jianshu.com/p/bba1ad57a144
rm(list=ls())
options(stringsAsFactors = F)
project='TCGA_KIRC'
library(maftools)
library(dplyr)
library(pheatmap)
library(ComplexHeatmap)
library(stringr)
require(deconstructSigs)
require(BSgenome)
require(BSgenome.Hsapiens.UCSC.hg38)
1.读取突变数据maf
是和上一篇一样的数据,从gdc下载的maf文件和临床信息,见:
https://www.jianshu.com/p/2d6cb5bd8771
laml = read.maf(maf = 'TCGA.KIRC.mutect.somatic.maf.gz')
#> -Reading
#> -Validating
#> -Silent variants: 8383
#> -Summarizing
#> --Mutiple centers found
#> BCM;BI--Possible FLAGS among top ten genes:
#> TTN
#> MUC16
#> HMCN1
#> -Processing clinical data
#> --Missing clinical data
#> -Finished in 4.134s elapsed (6.208s cpu)
laml
#> An object of class MAF
#> ID summary Mean Median
#> 1: NCBI_Build GRCh38 NA NA
#> 2: Center BCM;BI NA NA
#> 3: Samples 336 NA NA
#> 4: nGenes 9444 NA NA
#> 5: Frame_Shift_Del 1732 5.155 4
#> 6: Frame_Shift_Ins 1201 3.574 1
#> 7: In_Frame_Del 238 0.708 0
#> 8: In_Frame_Ins 350 1.042 0
#> 9: Missense_Mutation 12997 38.682 36
#> 10: Nonsense_Mutation 1259 3.747 2
#> 11: Nonstop_Mutation 18 0.054 0
#> 12: Splice_Site 490 1.458 1
#> 13: Translation_Start_Site 25 0.074 0
#> 14: total 18310 54.494 47
mut=laml@data
mut=mut[mut$Variant_Type=='SNP',]
#每行记录了一个突变,所以统计样本列的行数即为样本的突变数量
plot(table(mut[,16]),las = 2)
可以可视化看每个样本的突变数量
2.制作signatures的输入数据(96频谱)
关于什么是mutation
signature:http://www.bio-info-trainee.com/1619.html
关于96频谱:http://www.biotrainee.com/thread-832-1-1.html
a = mut[,c(16,5,6,12,13)]
colnames(a)=c( "Sample","chr", "pos","ref", "alt")
a$Sample=as.character(a$Sample)
sigs.input <- mut.to.sigs.input(mut.ref = a,
sample.id = "Sample",
chr = "chr",
pos = "pos",
ref = "ref",
alt = "alt",
bsg = BSgenome.Hsapiens.UCSC.hg38)
class(sigs.input)
#> [1] "data.frame"
#第一个样本的突变绘图
barplot(as.numeric(sigs.input[1,])~colnames(sigs.input))
sigs.input[1:4,1:4]
#> A[C>A]A A[C>A]C A[C>A]G A[C>A]T
#> TCGA-G6-A8L7-01A-11D-A36X-10 0 1 0 0
#> TCGA-B8-A54G-01A-11D-A25V-10 0 1 0 0
#> TCGA-BP-4995-01A-01D-1462-08 0 0 0 0
#> TCGA-B0-5100-01A-01D-1421-08 1 0 0 0
一顿操作猛如虎,生成signature与样本对应关系的矩阵
if(!file.exists(paste0(project,"w.Rdata"))){
w=lapply(unique(a$Sample), function(i){
## signatures.cosmic signatures.nature2013
sample_1 = whichSignatures(tumor.ref = sigs.input[,],
signatures.ref = signatures.cosmic,
sample.id = i,
contexts.needed = TRUE,
tri.counts.method = 'default')
print(c(i,which(unique(a$Sample)==i)))
return(sample_1$weights)
})
w=do.call(rbind,w)
save(w,file = paste0(project,"w.Rdata"))
}
load(paste0(project,"w.Rdata"))
mat = t(w)
3.可视化
矩阵的可视化--热图
3.1 简单版本的热图
pheatmap(mat,
cluster_rows = F,
cluster_cols = T,
show_colnames = F)
3.2 好看一点的热图
看到了这个图,氮素不知道用什么包画的,反正闲的没事干,不如自己模仿一波。如果你知道用的什么包,可以留言告诉我~感谢
热图上面放直方图,直方图内容是每个样本的突变数量。想到了ComplexHeatmap,可以将直方图作为注释添加上去。
难点是调整顺序,R语言神技能加持。
捋一捋:
直方图横纵坐标是样本和突变数量
热图输入数据mat横纵坐标是样本和signature
注释栏来源于clinical数据,与样本一一对应。
所以需要调整样本顺序,三者统一。
注释栏简化一下,用生死和性别;先排一下序,再按照排好的顺序给mat和直方图输入数据排序。
load("clinical.Rdata")
table( str_sub(colnames(mat),1,12)%in% cl_df$bcr_patient_barcode)
#>
#> TRUE
#> 336
#一一对应,只要mat列名对应的临床数据。
cl_df = cl_df[cl_df$bcr_patient_barcode %in% str_sub(colnames(mat),1,12),]
#按照性别和生死排序
cl_df = arrange(cl_df,gender,vital_status)
#mat排序
x = match(cl_df$bcr_patient_barcode,str_sub(colnames(mat),1,12))
mat = mat[,x]
#直方图输入数据排序
for_bar = laml@variants.per.sample$Variants[match(cl_df$bcr_patient_barcode,str_sub(laml@variants.per.sample$Tumor_Sample_Barcode,1,12))]
#确认顺序是否正确
cl_df$bcr_patient_barcode[1] == str_sub(colnames(mat),1,12)[1]
#> [1] TRUE
cl_df$bcr_patient_barcode[1] == for_bar[1]
#> [1] FALSE
annotation = HeatmapAnnotation(bar1 = anno_barplot(for_bar),
df = data.frame(gender = cl_df$gender,
vital = cl_df$vital_status)
)
# top_annotation参数在顶部添加注释信息
ht_list = Heatmap(mat, name = "mat",
#top_annotation = column_ha,
#right_annotation = row_ha,
cluster_rows = F,
cluster_columns = F,
#show_row_names = F,
show_column_names = F,
top_annotation = annotation)
draw(ht_list, heatmap_legend_side = "left", annotation_legend_side = "bottom")