ggplot集锦

文章复现进程3

2021-12-13  本文已影响0人  jiarf

这篇一部分复现的文章是
Comprehensive Analysis of the Expression and Prognosis for E2Fs in Human Breast Cancer的图7图8图9

原图如图所示

一、Fig7 :A、 B 、C

由于上图网络图没有分析结果,所以此处用的gene list是E2Fs
使用GO分析结果,代码如下

#bioinformatics  bp
setwd('/data/jiarf/literiture')
rm(list=ls())
library(clusterProfiler)
library(org.Hs.eg.db)
bp <- read.delim('./chart_bp.txt')
cc <- read.delim('./chart_cc.txt')
genelist <- c('E2F1',
'E2F2',
'E2F3',
'E2F4',
'E2F5',
'E2F6',
'E2F7',
'E2F8')
geneinfo = select(org.Hs.eg.db, keys=keys(org.Hs.eg.db), columns = c('ENTREZID',"SYMBOL"))
geneinfo2 <- geneinfo[which(geneinfo$SYMBOL%in% genelist),]

ego_cc <- enrichGO(gene = geneinfo2$ENTREZID,
                   OrgDb=org.Hs.eg.db,
                   ont = "CC",##"MF", "BP", and "CC"
                   pAdjustMethod = "BH",
                   minGSSize = 1,
                   pvalueCutoff = 0.05)
ego_cc_result<-as.data.frame(ego_cc@result)
ego_cc_result <- ego_cc_result[ego_cc_result$pvalue<0.05,]
library(ggplot2)

dt <- ego_cc_result
dt$`-log10(pvalue)` <- -log10(dt$pvalue)
dt$Count<-as.numeric(gsub('/[0-9]+','',dt$GeneRatio))
dt$Description<-factor(dt$Description,levels = rev(unique(dt$Description)))


# p<- ggplot(dt,aes(`-log10(pvalue)`,y=reorder(Description,-pvalue)))+ #以pvalue和pathway为X轴和Y轴
#   geom_point(aes(size=Count,color=Change))+ #点图大小和颜色数据
#   labs(y='',x='-log10(Pvalue)')+
#   scale_size_area(name="Gene count")+
#   theme_bw()+
#   geom_vline(xintercept = -log10(0.05),linetype =2,colour = 'black')+
#   theme(axis.text=element_text(size=15,face = "bold", color="gray50"),
#         axis.title=element_text(size=16),
#         legend.text=element_text(size=16),legend.title = element_text(size=15))
# p

p <- ggplot(data=dt, aes(x=ID, y=Count,fill=`-log10(pvalue)`)) +
  geom_bar(stat="identity", width=0.8) + 
  coord_flip() +
  theme_bw() + 
  # scale_x_discrete(labels=rev(dt$Description)) +
  xlab("") +
  theme(axis.text=element_text(size=10,face = "plain", color="black"),
        axis.title=element_text(size=10),
        legend.text=element_text(size=10),legend.title = element_text(size=20)) +
  scale_fill_gradient2(mid = "white",low='red',high='blue',midpoint = (16.671628-1.347296)/2)
p
ggsave('./CC.pdf',p,width=8,height = 5)
write.table(dt,'./CC_data.tab',sep='\t',quote=F)
#### bp
ego_bp <- enrichGO(gene = geneinfo2$ENTREZID,
                   OrgDb=org.Hs.eg.db,
                   ont = "BP",##"MF", "BP", and "CC"
                   pAdjustMethod = "BH",
                   minGSSize = 1,
                   pvalueCutoff = 0.05)
ego_bp_result<-as.data.frame(ego_bp@result)
ego_bp_result <- ego_bp_result[ego_bp_result$pvalue<0.05,]
library(ggplot2)
dt <- ego_bp_result
dt$`-log10(pvalue)` <- -log10(dt$pvalue)
dt$Count<-as.numeric(gsub('/[0-9]+','',dt$GeneRatio))
dt$Description<-factor(dt$Description,levels = rev(unique(dt$Description)))
 # dt2 <- dt[dt$ID %in% c('GO:0006352','GO:0051726','GO:0045739','GO:0034644','GO:0007265',
 #                        'GO:0006281','GO:0006270','GO:0006260',
 #                          'GO:0000122','GO:0000082'),]
dt2 <- dt[order(dt$`-log10(pvalue)`,decreasing = T),][1:20,]
range(dt2$`-log10(pvalue)`)
p <- ggplot(data=dt2, aes(x=ID, y=Count,fill=`-log10(pvalue)`)) +
  geom_bar(stat="identity", width=0.8) + 
  coord_flip() +
  theme_bw() + 
  # scale_x_discrete(labels=rev(dt$Description)) +
  xlab("") +
  theme(axis.text=element_text(size=10,face = "plain", color="black"),
        axis.title=element_text(size=10),
        legend.text=element_text(size=10),legend.title = element_text(size=20)) +
  scale_fill_gradient2(mid = "white",low='red',high='blue',midpoint = (9.485309-7.154111)/2+7.154111)
p
ggsave('./BP.pdf',p,width=8,height = 5)
write.table(dt2,'./BP_data.tab',sep='\t',quote=F)
##### MF
ego_mf <- enrichGO(gene = geneinfo2$ENTREZID,
                   OrgDb=org.Hs.eg.db,
                   ont = "MF",##"MF", "BP", and "CC"
                   pAdjustMethod = "BH",
                   minGSSize = 1,
                   pvalueCutoff = 0.05)
ego_mf_result<-as.data.frame(ego_mf@result)
ego_mf_result <- ego_mf_result[ego_mf_result$pvalue<0.05,]
library(ggplot2)
dt <- ego_mf_result
dt$`-log10(pvalue)` <- -log10(dt$pvalue)
dt$Count<-as.numeric(gsub('/[0-9]+','',dt$GeneRatio))
dt$Description<-factor(dt$Description,levels = rev(unique(dt$Description)))
range(dt$`-log10(pvalue)`)
p <- ggplot(data=dt, aes(x=ID, y=Count,fill=`-log10(pvalue)`)) +
  geom_bar(stat="identity", width=0.8) + 
  coord_flip() +
  theme_bw() + 
  # scale_x_discrete(labels=rev(dt$Description)) +
  xlab("") +
  theme(axis.text=element_text(size=10,face = "plain", color="black"),
        axis.title=element_text(size=10),
        legend.text=element_text(size=10),legend.title = element_text(size=20)) +
  scale_fill_gradient2(mid = "white",low='red',high='blue',midpoint = (5.640599-1.667579)/2+1.667579)
p
ggsave('./MF.pdf',p,width=8,height = 5)
write.table(dt,'./MF_data.tab',sep='\t',quote=F)

图分别为


BP
CC
MF

由图中我们可以看出,E2Fs的BP富集到的terms为

GO:0000083 regulation of transcription involved in G1/S transition of mitotic cell cycle
GO:0006977 DNA damage response, signal transduction by p53 class mediator resulting in cell cycle arrest
GO:0072431 signal transduction involved in mitotic G1 DNA damage checkpoint
GO:1902400 intracellular signal transduction involved in G1 DNA damage checkpoint
GO:0072413 signal transduction involved in mitotic cell cycle checkpoint
GO:1902402 signal transduction involved in mitotic DNA damage checkpoint
GO:1902403 signal transduction involved in mitotic DNA integrity checkpoint
GO:0031571 mitotic G1 DNA damage checkpoint
GO:0044819 mitotic G1/S transition checkpoint
GO:0044783 G1 DNA damage checkpoint
GO:0072401 signal transduction involved in DNA integrity checkpoint
GO:0072422 signal transduction involved in DNA damage checkpoint
GO:0072395 signal transduction involved in cell cycle checkpoint
GO:0071158 positive regulation of cell cycle arrest
GO:0072331 signal transduction by p53 class mediator
GO:0000082 G1/S transition of mitotic cell cycle
GO:0044773 mitotic DNA damage checkpoint
GO:0044843 cell cycle G1/S phase transition
GO:0044774 mitotic DNA integrity checkpoint
GO:0030330 DNA damage response, signal transduction by p53 class mediator

与原文找到的通路有些出入,但是我们找到的结果中,已经包含了文章中提到的transcription, DNA-templatedDNA replication initiationDNA replication, G1/S transition of mitotic cell cycle, regulation of cell cycle, negative regulation of transcription from RNA polymerase II promoter,但是此处由于富集的 terms 条数太多,所以我们按照 -log10(pvalue) 选择 Top20 的通路进行了展示,
同理CC的结果如下:

GO:0090575 RNA polymerase II transcription regulator complex
GO:0005667 transcription regulator complex
GO:0035189 Rb-E2F complex
GO:0000790 nuclear chromatin
GO:0044665 MLL1/2 complex
GO:0071339 MLL1 complex
GO:0035097 histone methyltransferase complex
GO:0034708 methyltransferase complex

MF结果如下:

GO:0090575 RNA polymerase II transcription regulator complex
GO:0005667 transcription regulator complex
GO:0035189 Rb-E2F complex
GO:0000790 nuclear chromatin
GO:0044665 MLL1/2 complex
GO:0071339 MLL1 complex
GO:0035097 histone methyltransferase complex
GO:0034708 methyltransferase complex

也包括了文章提到的transcription factor complex和一些细胞周期依赖蛋白复合物

Fig8 kegg

代码如下:

#### kegg
ego_kegg <- enrichKEGG(gene = geneinfo2$ENTREZID,
                   
                   keyType = 'kegg',##"MF", "BP", and "CC"
                   pAdjustMethod = "BH",
                   minGSSize = 1,
                   pvalueCutoff = 0.05)
ego_kegg_result<-as.data.frame(ego_kegg@result)
ego_kegg_result <- ego_kegg_result[ego_kegg_result$pvalue<0.05,]
library(ggplot2)
dt <- ego_kegg_result
dt$`-log10(pvalue)` <- -log10(dt$pvalue)
dt$Count<-as.numeric(gsub('/[0-9]+','',dt$GeneRatio))
dt$Description<-factor(dt$Description,levels = rev(unique(dt$Description)))
range(dt$`-log10(pvalue)`)
p <- ggplot(data=dt, aes(x=Description, y=Count,fill=`-log10(pvalue)`)) +
  geom_bar(stat="identity", width=0.8) + 
  coord_flip() +
  theme_bw() + 
  # scale_x_discrete(labels=rev(dt$Description)) +
  xlab("") +
  theme(axis.text=element_text(size=10,face = "plain", color="black"),
        axis.title=element_text(size=10),
        legend.text=element_text(size=10),legend.title = element_text(size=20)) +
  scale_fill_gradient2(mid = "white",low='red',high='blue',midpoint = (9.078405-1.360482 )/2+1.360482 )
p
ggsave('./kegg.pdf',p,width=8,height = 5)
write.table(dt,'./kegg_data.tab',sep='\t',quote=F)

结果如图
原文只找到了15个通路,我们此处找到了24个通路,包括文章中提到的Cell cycle, TGF-beta signaling pathway,
还有好多cancer的通路 但是没有找到signaling pathway, and cfa04310:Wnt signaling pathway

Fig 9

kegg pathview
打开网站KEGG PATHWAY Database (genome.jp)

输入E2F,点击GO
点击第一个 cell cycle
选择参考基因组为人,点击go
得到如图所示结果,下载标圈即可
同理可以找到 在kegg结果第二页
接着把文件下载即可
上一篇下一篇

猜你喜欢

热点阅读