GEO数据挖掘富集分析

第二部:简单的GEO数据的GO分析

2020-05-24  本文已影响0人  碌碌无为的杰少

GO分析

GO分析有三个过程,GO_CC细胞组分,GO_BP生物过程, GO_MP分析功能,首先转换成ENTREZID,然后利用clusterProfiler函数。

rm(list = ls())  
load(file = 'step4output.Rdata')
library(ggplot2)
library(clusterProfiler)
library(org.Hs.eg.db)
library(dplyr)
s2e <- bitr(deg$symbol, 
            fromType = "SYMBOL",
            toType = "ENTREZID",
            OrgDb = org.Hs.eg.db)#人类
#其他物种http://bioconductor.org/packages/release/BiocViews.html#___OrgDb
deg <- inner_join(deg,s2e,by=c("symbol"="SYMBOL"))
#富集分析考验网速,因此给大家保存了Rdata
#上课运行示例数据无需修改,在做自己的数据时请注意把本行之后的load()去掉
library(clusterProfiler)
library(dplyr)
library(ggplot2)
source("kegg_plot_function.R")
#source表示运行整个kegg_plot_function.R脚本,里面是一个function
#以up_kegg和down_kegg为输入数据做图

#1.GO database analysis ----

#(1)输入数据
gene_up = deg[deg$change == 'up','ENTREZID'] 
gene_down = deg[deg$change == 'down','ENTREZID'] 
gene_diff = c(gene_up,gene_down)
gene_all = deg[,'ENTREZID']
#(2)GO分析,分三部分
#以下步骤耗时很长,实际运行时注意把if后面的括号里F改成T
if(T){
  #细胞组分
  ego_CC <- enrichGO(gene = gene_up,
                     OrgDb= org.Hs.eg.db,
                     ont = "CC",
                     pAdjustMethod = "BH",
                     minGSSize = 1,
                     pvalueCutoff = 0.01,
                     qvalueCutoff = 0.01,
                     readable = TRUE)
  #生物过程
  ego_BP <- enrichGO(gene = gene_up,
                     OrgDb= org.Hs.eg.db,
                     ont = "BP",
                     pAdjustMethod = "BH",
                     minGSSize = 1,
                     pvalueCutoff = 0.01,
                     qvalueCutoff = 0.01,
                     readable = TRUE)
  #分子功能:
  ego_MF <- enrichGO(gene = gene_up,
                     OrgDb= org.Hs.eg.db,
                     ont = "MF",
                     pAdjustMethod = "BH",
                     minGSSize = 1,
                     pvalueCutoff = 0.01,
                     qvalueCutoff = 0.01,
                     readable = TRUE)
  save(ego_CC,ego_BP,ego_MF,file = "ego_GSE4107.Rdata")
}
ggg <- ego_BP@result
load(file = "ego_GSE4107.Rdata")

#(3)可视化
#条带图
dotplot(ego_BP,showCategory=20)
barplot(ego_BP)
image.png

三条图联合

三条图联合
if(F){
  go <- enrichGO(gene = gene_up, OrgDb = "org.Hs.eg.db", ont="all")
}
##save(go,file="go.Rdata")
### 直接加载我保存的数据

library(ggplot2)
p <- dotplot(go, split="ONTOLOGY") +facet_grid(ONTOLOGY~., scale="free")
p
image.png

前五条通路的共同基因的circle图

geneList = deg$logFC
names(geneList)=deg$ENTREZID
geneList = sort(geneList,decreasing = T)
#(3)展示top5通路的共同基因,要放大看。
#Gene-Concept Network
cnetplot(ego_CC, categorySize="pvalue", foldChange=geneList,colorEdge = TRUE)
cnetplot(ego_CC, foldChange=geneList, circular = TRUE, colorEdge = TRUE)
image.png

基因富集结果可视化

emapplot(ego_CC)
image.png

通路之间的关系

goplot(ego_CC)
image.png

基因展示的heatmap

heatplot(ego_CC,foldChange = geneList)
#太多基因就会糊。可通过调整比例或者减少基因来控制。
pdf("heatplot.pdf",width = 14,height = 5)
heatplot(ego_CC,foldChange = geneList)
dev.off()
image.png
上一篇下一篇

猜你喜欢

热点阅读