文章复现进程3
这篇一部分复现的文章是
Comprehensive Analysis of the Expression and Prognosis for E2Fs in Human Breast Cancer的图7图8图9
一、Fig7 :A、 B 、C
由于上图网络图没有分析结果,所以此处用的gene list是E2Fs
使用GO分析结果,代码如下
#bioinformatics bp
setwd('/data/jiarf/literiture')
rm(list=ls())
library(clusterProfiler)
library(org.Hs.eg.db)
bp <- read.delim('./chart_bp.txt')
cc <- read.delim('./chart_cc.txt')
genelist <- c('E2F1',
'E2F2',
'E2F3',
'E2F4',
'E2F5',
'E2F6',
'E2F7',
'E2F8')
geneinfo = select(org.Hs.eg.db, keys=keys(org.Hs.eg.db), columns = c('ENTREZID',"SYMBOL"))
geneinfo2 <- geneinfo[which(geneinfo$SYMBOL%in% genelist),]
ego_cc <- enrichGO(gene = geneinfo2$ENTREZID,
OrgDb=org.Hs.eg.db,
ont = "CC",##"MF", "BP", and "CC"
pAdjustMethod = "BH",
minGSSize = 1,
pvalueCutoff = 0.05)
ego_cc_result<-as.data.frame(ego_cc@result)
ego_cc_result <- ego_cc_result[ego_cc_result$pvalue<0.05,]
library(ggplot2)
dt <- ego_cc_result
dt$`-log10(pvalue)` <- -log10(dt$pvalue)
dt$Count<-as.numeric(gsub('/[0-9]+','',dt$GeneRatio))
dt$Description<-factor(dt$Description,levels = rev(unique(dt$Description)))
# p<- ggplot(dt,aes(`-log10(pvalue)`,y=reorder(Description,-pvalue)))+ #以pvalue和pathway为X轴和Y轴
# geom_point(aes(size=Count,color=Change))+ #点图大小和颜色数据
# labs(y='',x='-log10(Pvalue)')+
# scale_size_area(name="Gene count")+
# theme_bw()+
# geom_vline(xintercept = -log10(0.05),linetype =2,colour = 'black')+
# theme(axis.text=element_text(size=15,face = "bold", color="gray50"),
# axis.title=element_text(size=16),
# legend.text=element_text(size=16),legend.title = element_text(size=15))
# p
p <- ggplot(data=dt, aes(x=ID, y=Count,fill=`-log10(pvalue)`)) +
geom_bar(stat="identity", width=0.8) +
coord_flip() +
theme_bw() +
# scale_x_discrete(labels=rev(dt$Description)) +
xlab("") +
theme(axis.text=element_text(size=10,face = "plain", color="black"),
axis.title=element_text(size=10),
legend.text=element_text(size=10),legend.title = element_text(size=20)) +
scale_fill_gradient2(mid = "white",low='red',high='blue',midpoint = (16.671628-1.347296)/2)
p
ggsave('./CC.pdf',p,width=8,height = 5)
write.table(dt,'./CC_data.tab',sep='\t',quote=F)
#### bp
ego_bp <- enrichGO(gene = geneinfo2$ENTREZID,
OrgDb=org.Hs.eg.db,
ont = "BP",##"MF", "BP", and "CC"
pAdjustMethod = "BH",
minGSSize = 1,
pvalueCutoff = 0.05)
ego_bp_result<-as.data.frame(ego_bp@result)
ego_bp_result <- ego_bp_result[ego_bp_result$pvalue<0.05,]
library(ggplot2)
dt <- ego_bp_result
dt$`-log10(pvalue)` <- -log10(dt$pvalue)
dt$Count<-as.numeric(gsub('/[0-9]+','',dt$GeneRatio))
dt$Description<-factor(dt$Description,levels = rev(unique(dt$Description)))
# dt2 <- dt[dt$ID %in% c('GO:0006352','GO:0051726','GO:0045739','GO:0034644','GO:0007265',
# 'GO:0006281','GO:0006270','GO:0006260',
# 'GO:0000122','GO:0000082'),]
dt2 <- dt[order(dt$`-log10(pvalue)`,decreasing = T),][1:20,]
range(dt2$`-log10(pvalue)`)
p <- ggplot(data=dt2, aes(x=ID, y=Count,fill=`-log10(pvalue)`)) +
geom_bar(stat="identity", width=0.8) +
coord_flip() +
theme_bw() +
# scale_x_discrete(labels=rev(dt$Description)) +
xlab("") +
theme(axis.text=element_text(size=10,face = "plain", color="black"),
axis.title=element_text(size=10),
legend.text=element_text(size=10),legend.title = element_text(size=20)) +
scale_fill_gradient2(mid = "white",low='red',high='blue',midpoint = (9.485309-7.154111)/2+7.154111)
p
ggsave('./BP.pdf',p,width=8,height = 5)
write.table(dt2,'./BP_data.tab',sep='\t',quote=F)
##### MF
ego_mf <- enrichGO(gene = geneinfo2$ENTREZID,
OrgDb=org.Hs.eg.db,
ont = "MF",##"MF", "BP", and "CC"
pAdjustMethod = "BH",
minGSSize = 1,
pvalueCutoff = 0.05)
ego_mf_result<-as.data.frame(ego_mf@result)
ego_mf_result <- ego_mf_result[ego_mf_result$pvalue<0.05,]
library(ggplot2)
dt <- ego_mf_result
dt$`-log10(pvalue)` <- -log10(dt$pvalue)
dt$Count<-as.numeric(gsub('/[0-9]+','',dt$GeneRatio))
dt$Description<-factor(dt$Description,levels = rev(unique(dt$Description)))
range(dt$`-log10(pvalue)`)
p <- ggplot(data=dt, aes(x=ID, y=Count,fill=`-log10(pvalue)`)) +
geom_bar(stat="identity", width=0.8) +
coord_flip() +
theme_bw() +
# scale_x_discrete(labels=rev(dt$Description)) +
xlab("") +
theme(axis.text=element_text(size=10,face = "plain", color="black"),
axis.title=element_text(size=10),
legend.text=element_text(size=10),legend.title = element_text(size=20)) +
scale_fill_gradient2(mid = "white",low='red',high='blue',midpoint = (5.640599-1.667579)/2+1.667579)
p
ggsave('./MF.pdf',p,width=8,height = 5)
write.table(dt,'./MF_data.tab',sep='\t',quote=F)
图分别为
BP
CC
MF
由图中我们可以看出,E2Fs的BP富集到的terms为
GO:0000083
regulation of transcription involved in G1/S transition of mitotic cell cycle
GO:0006977
DNA damage response, signal transduction by p53 class mediator resulting in cell cycle arrest
GO:0072431
signal transduction involved in mitotic G1 DNA damage checkpoint
GO:1902400
intracellular signal transduction involved in G1 DNA damage checkpoint
GO:0072413
signal transduction involved in mitotic cell cycle checkpoint
GO:1902402
signal transduction involved in mitotic DNA damage checkpoint
GO:1902403
signal transduction involved in mitotic DNA integrity checkpoint
GO:0031571
mitotic G1 DNA damage checkpoint
GO:0044819
mitotic G1/S transition checkpoint
GO:0044783
G1 DNA damage checkpoint
GO:0072401
signal transduction involved in DNA integrity checkpoint
GO:0072422
signal transduction involved in DNA damage checkpoint
GO:0072395
signal transduction involved in cell cycle checkpoint
GO:0071158
positive regulation of cell cycle arrest
GO:0072331
signal transduction by p53 class mediator
GO:0000082
G1/S transition of mitotic cell cycle
GO:0044773
mitotic DNA damage checkpoint
GO:0044843
cell cycle G1/S phase transition
GO:0044774
mitotic DNA integrity checkpoint
GO:0030330
DNA damage response, signal transduction by p53 class mediator
与原文找到的通路有些出入,但是我们找到的结果中,已经包含了文章中提到的transcription, DNA-templated、DNA replication initiation、DNA replication, G1/S transition of mitotic cell cycle, regulation of cell cycle, negative regulation of transcription from RNA polymerase II promoter,但是此处由于富集的 terms 条数太多,所以我们按照 -log10(pvalue) 选择 Top20 的通路进行了展示,
同理CC的结果如下:
GO:0090575
RNA polymerase II transcription regulator complex
GO:0005667
transcription regulator complex
GO:0035189
Rb-E2F complex
GO:0000790
nuclear chromatin
GO:0044665
MLL1/2 complex
GO:0071339
MLL1 complex
GO:0035097
histone methyltransferase complex
GO:0034708
methyltransferase complex
MF结果如下:
GO:0090575
RNA polymerase II transcription regulator complex
GO:0005667
transcription regulator complex
GO:0035189
Rb-E2F complex
GO:0000790
nuclear chromatin
GO:0044665
MLL1/2 complex
GO:0071339
MLL1 complex
GO:0035097
histone methyltransferase complex
GO:0034708
methyltransferase complex
也包括了文章提到的transcription factor complex
和一些细胞周期依赖蛋白复合物
Fig8 kegg
代码如下:
#### kegg
ego_kegg <- enrichKEGG(gene = geneinfo2$ENTREZID,
keyType = 'kegg',##"MF", "BP", and "CC"
pAdjustMethod = "BH",
minGSSize = 1,
pvalueCutoff = 0.05)
ego_kegg_result<-as.data.frame(ego_kegg@result)
ego_kegg_result <- ego_kegg_result[ego_kegg_result$pvalue<0.05,]
library(ggplot2)
dt <- ego_kegg_result
dt$`-log10(pvalue)` <- -log10(dt$pvalue)
dt$Count<-as.numeric(gsub('/[0-9]+','',dt$GeneRatio))
dt$Description<-factor(dt$Description,levels = rev(unique(dt$Description)))
range(dt$`-log10(pvalue)`)
p <- ggplot(data=dt, aes(x=Description, y=Count,fill=`-log10(pvalue)`)) +
geom_bar(stat="identity", width=0.8) +
coord_flip() +
theme_bw() +
# scale_x_discrete(labels=rev(dt$Description)) +
xlab("") +
theme(axis.text=element_text(size=10,face = "plain", color="black"),
axis.title=element_text(size=10),
legend.text=element_text(size=10),legend.title = element_text(size=20)) +
scale_fill_gradient2(mid = "white",low='red',high='blue',midpoint = (9.078405-1.360482 )/2+1.360482 )
p
ggsave('./kegg.pdf',p,width=8,height = 5)
write.table(dt,'./kegg_data.tab',sep='\t',quote=F)
结果如图
原文只找到了15个通路,我们此处找到了24个通路,包括文章中提到的Cell cycle, TGF-beta signaling pathway,
还有好多cancer的通路 但是没有找到signaling pathway, and cfa04310:Wnt signaling pathway
Fig 9
kegg pathview
打开网站KEGG PATHWAY Database (genome.jp)
点击第一个 cell cycle
选择参考基因组为人,点击go
得到如图所示结果,下载标圈即可
同理可以找到 在kegg结果第二页
接着把文件下载即可