第二部:简单的GEO数据的GO分析
2020-05-24 本文已影响0人
碌碌无为的杰少
GO分析
GO分析有三个过程,GO_CC细胞组分,GO_BP生物过程, GO_MP分析功能,首先转换成ENTREZID
,然后利用clusterProfiler
函数。
rm(list = ls())
load(file = 'step4output.Rdata')
library(ggplot2)
library(clusterProfiler)
library(org.Hs.eg.db)
library(dplyr)
s2e <- bitr(deg$symbol,
fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = org.Hs.eg.db)#人类
#其他物种http://bioconductor.org/packages/release/BiocViews.html#___OrgDb
deg <- inner_join(deg,s2e,by=c("symbol"="SYMBOL"))
#富集分析考验网速,因此给大家保存了Rdata
#上课运行示例数据无需修改,在做自己的数据时请注意把本行之后的load()去掉
library(clusterProfiler)
library(dplyr)
library(ggplot2)
source("kegg_plot_function.R")
#source表示运行整个kegg_plot_function.R脚本,里面是一个function
#以up_kegg和down_kegg为输入数据做图
#1.GO database analysis ----
#(1)输入数据
gene_up = deg[deg$change == 'up','ENTREZID']
gene_down = deg[deg$change == 'down','ENTREZID']
gene_diff = c(gene_up,gene_down)
gene_all = deg[,'ENTREZID']
#(2)GO分析,分三部分
#以下步骤耗时很长,实际运行时注意把if后面的括号里F改成T
if(T){
#细胞组分
ego_CC <- enrichGO(gene = gene_up,
OrgDb= org.Hs.eg.db,
ont = "CC",
pAdjustMethod = "BH",
minGSSize = 1,
pvalueCutoff = 0.01,
qvalueCutoff = 0.01,
readable = TRUE)
#生物过程
ego_BP <- enrichGO(gene = gene_up,
OrgDb= org.Hs.eg.db,
ont = "BP",
pAdjustMethod = "BH",
minGSSize = 1,
pvalueCutoff = 0.01,
qvalueCutoff = 0.01,
readable = TRUE)
#分子功能:
ego_MF <- enrichGO(gene = gene_up,
OrgDb= org.Hs.eg.db,
ont = "MF",
pAdjustMethod = "BH",
minGSSize = 1,
pvalueCutoff = 0.01,
qvalueCutoff = 0.01,
readable = TRUE)
save(ego_CC,ego_BP,ego_MF,file = "ego_GSE4107.Rdata")
}
ggg <- ego_BP@result
load(file = "ego_GSE4107.Rdata")
#(3)可视化
#条带图
dotplot(ego_BP,showCategory=20)
barplot(ego_BP)
image.png
三条图联合
三条图联合
if(F){
go <- enrichGO(gene = gene_up, OrgDb = "org.Hs.eg.db", ont="all")
}
##save(go,file="go.Rdata")
### 直接加载我保存的数据
library(ggplot2)
p <- dotplot(go, split="ONTOLOGY") +facet_grid(ONTOLOGY~., scale="free")
p
image.png
前五条通路的共同基因的circle图
geneList = deg$logFC
names(geneList)=deg$ENTREZID
geneList = sort(geneList,decreasing = T)
#(3)展示top5通路的共同基因,要放大看。
#Gene-Concept Network
cnetplot(ego_CC, categorySize="pvalue", foldChange=geneList,colorEdge = TRUE)
cnetplot(ego_CC, foldChange=geneList, circular = TRUE, colorEdge = TRUE)
image.png
基因富集结果可视化
emapplot(ego_CC)
image.png
通路之间的关系
goplot(ego_CC)
image.png
基因展示的heatmap
heatplot(ego_CC,foldChange = geneList)
#太多基因就会糊。可通过调整比例或者减少基因来控制。
pdf("heatplot.pdf",width = 14,height = 5)
heatplot(ego_CC,foldChange = geneList)
dev.off()
image.png