TCGA

突变数据的处理及柱状图的使用

2021-01-19  本文已影响0人  碌碌无为的杰少

突变数据其实做的不多,代码什么的也较少,其实也比较简单,现在就整理了一下,然后得到了柱形图的代码,一并奉上。

下载数据

options(stringsAsFactors = F) 
require(maftools) 
require(dplyr)
COAD = read.maf(maf = 'TCGA.COAD.mutect.03652df4-6090-4f5a-a2ff-ee28a37f9301.DR-10.0.somatic.maf.gz')
info_merge=data.frame(id=COAD@clinical.data$Tumor_Sample_Barcode,
                      sample=substring(COAD@clinical.data$Tumor_Sample_Barcode,1,15))

自带的包

maf_df = COAD@data
plotmafSummary(maf = COAD, rmOutlier = TRUE,
               #showBarcodes = FALSE,
               addStat = 'median', dashboard = TRUE, titvRaw = FALSE)
 COAD@variants.per.sample
maf_df %>% filter(Hugo_Symbol=="EGFR") %>%
                    count(Variant_Classification,sort = T)
count(maf_df,Hugo_Symbol,sort = T)[1:30]

·#分组处理

dd1 <- Last1[,c('CIC','sampleID')]
colnames(dd1)[2] <- 'sample'
info_merge1 <- inner_join(info_merge,dd1,by='sample')



A.name=as.character(subset(info_merge1,CIC=="A")$id)
B.name=as.character(subset(info_merge1,CIC=="B")$id)
C.name=as.character(subset(info_merge1,CIC=="C")$id)
D.name=as.character(subset(info_merge1,CIC=="D")$id)

A=subsetMaf(maf = COAD,tsb=A.name,isTCGA = F)
B=subsetMaf(maf = COAD,tsb=B.name,isTCGA = F)
C=subsetMaf(maf = COAD,tsb=C.name,isTCGA = F)
D=subsetMaf(maf = COAD,tsb=D.name,isTCGA = F)
length(unique(A@data$Tumor_Sample_Barcode))
length(unique(B@data$Tumor_Sample_Barcode))
length(unique(C@data$Tumor_Sample_Barcode))
length(unique(D@data$Tumor_Sample_Barcode))


col = RColorBrewer::brewer.pal(n = 8, name = 'Paired')
names(col) = c('Frame_Shift_Del','Missense_Mutation', 'Nonsense_Mutation', 'Multi_Hit', 'Frame_Shift_Ins',
               'In_Frame_Ins', 'Splice_Site', 'In_Frame_Del')
oncoplot(maf = COAD ,writeMatrix = T,top=30, colors = col,fontSize = 1)
COAD1 = read.table("onco_matrix.txt", 
                  header = TRUE, 
                  stringsAsFactors = FALSE, sep = "\t")

oncoplot(maf = A , colors = col,fontSize = 0.6,genes = rownames(COAD1),keepGeneOrder= T)
oncoplot(maf = B , colors = col,fontSize = 0.6,genes = rownames(COAD1),keepGeneOrder= T)
oncoplot(maf = C , colors = col,fontSize = 0.6,genes = rownames(COAD1),keepGeneOrder= T)
oncoplot(maf = D , colors = col,fontSize = 0.6,genes = rownames(COAD1),keepGeneOrder= T)
图片.png

柱形图和TMB

tmp <- Last1$MSI.Status
tmp <- Last1[,c('CIC','MSI.Status','Stage')]
df <- data.frame()
col21 = c("#E11E24","#92D0E6","#F5949B","#FBB96F","#007AB8","#A2D184","#00A04E",
          "#BC5627","#0080BD","#EBC379","#A74D9D","#F57E21","#00998F","#F4E708",
          "#F184B3","#9B9A99","#6CC7C0","#BCB9DA","#F37C75","#5FB0D4","#F9AD62")

for(i in unique(tmp$CIC)){
df_i <- subset(tmp, tmp$CIC==i) %>% pull(MSI.Status) %>% table()
                    df_i <- data.frame(sample=rep(i, length(df_i)), value=df_i)
                    names(df_i) <- c("CIC", "MSI.Status", "value")
                    df <- rbind(df, df_i)
                     }

p1 <- ggplot(df, aes(x=CIC, y=value, fill=MSI.Status)) + 
                    geom_bar(stat= "identity", position = "fill") + 
                    scale_fill_manual(values = col21 ) +
                    labs(x = 'MSI.Status', y = 'Relative Abundance', title = 'Samples composition') +
                    theme(panel.grid = element_blank(), panel.background = element_blank(), 
                          axis.text.x = element_text(angle=45))
p1
pdf("p1.pdf",width = 6,height = 7)
print(p1)
dev.off()

tmp <- Last1$MSI.Status

tmp <- Last1[,c('CIC','MSI.Status','Stage')]
df <- data.frame()
for(i in unique(tmp$CIC)){
                    df_i <- subset(tmp, tmp$CIC==i) %>% pull(Stage) %>% table()
                    df_i <- data.frame(sample=rep(i, length(df_i)), value=df_i)
                    names(df_i) <- c("CIC", "Stage", "value")
                    df <- rbind(df, df_i)
}


p2 <- ggplot(df, aes(x=CIC, y=value, fill=Stage)) + 
                    geom_bar(stat= "identity", position = "fill") + 
                    scale_fill_manual(values = col21 ) +
                    labs(x = 'Stage', y = 'Relative Abundance', title = 'Samples composition') +
                    theme(panel.grid = element_blank(), panel.background = element_blank(), 
                          axis.text.x = element_text(angle=45))
p2
pdf("p2.pdf",width = 6,height = 7)
print(p2)
dev.off()

DD2 <- COAD@variants.per.sample
DD2$Variants <- DD2$Variants/35
colnames(DD2)[1] <- 'sample'
DD2$sample <- substring(DD2$sample,1,15)
info_merge2$group <- ifelse(info_merge2$TMB> 10,'high','low')
tmp <- info_merge2[,c('CIC','group')]
df <- data.frame()

for(i in unique(tmp$CIC)){
                    df_i <- subset(tmp, tmp$CIC==i) %>% pull(group) %>% table()
                    df_i <- data.frame(sample=rep(i, length(df_i)), value=df_i)
                    names(df_i) <- c("CIC", "group", "value")
                    df <- rbind(df, df_i)
}

p3 <- ggplot(df, aes(x=CIC, y=value, fill=group)) + 
                    geom_bar(stat= "identity", position = "fill") + 
                    scale_fill_manual(values = c('#E11E24',"#92D0E6") ) +
                    labs(x = 'MSI.Status', y = 'Relative Abundance', title = 'Samples composition') +
                    theme(panel.grid = element_blank(), panel.background = element_blank(), 
                          axis.text.x = element_text(angle=45))

p3
pdf("p3.pdf",width = 6,height = 7)
print(p3)
dev.off()
图片.png
图片.png

boxplot

my_comparisons <- list(
                    c("D", "B"), c("D", "C"),
                    c("D", "A")
)

# Create the box plot. Change colors by groups: Species
# Add jitter points and change the shape by groups
ggboxplot(
                    info_merge2, x = "CIC", y = "TMB",
                    color = "CIC", palette = 'nejm',
                    add = "jitter"
)+
                    #scale_y_continuous(limits=c(0, 90), breaks=c(0, 30, 60, 90))+
                    stat_compare_means(comparisons = my_comparisons, method = "t.test")


图片.png
上一篇下一篇

猜你喜欢

热点阅读