六、GEO差异基因功能注释之KEGG和GO
2021-03-27 本文已影响0人
芋圆学徒
一、ENTREZID转换
进行功能注释,首先需要对差异基因deg数据集进行ENTREZID转换
#加ENTREZID列,用于富集分析(symbol转entrezid,然后inner_join)
library(clusterProfiler)
library(org.Hs.eg.db) #Hs代表人类
s2e <- bitr(uniqe(deg$symbol),
fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = org.Hs.eg.db)#人类
#其他物种http://bioconductor.org/packages/release/BiocViews.html#___OrgDb
dim(deg)
deg <- inner_join(deg,s2e,by=c("symbol"="SYMBOL"))
dim(deg)
length(unique(deg$symbol))
二、输入数据准备
接下来拿出up基因和down基因的ENTREZID,组成diff基因
#输入数据整理
gene_up = deg[deg$change == 'up','ENTREZID']
gene_down = deg[deg$change == 'down','ENTREZID']
gene_diff = c(gene_up,gene_down)
gene_all = deg[,'ENTREZID']
三、KEGG通路富集分析
1.首先加载所需要的包
library(clusterProfiler)
library(dplyr)
library(ggplot2)
library(stringr)
library(enrichplot)
2.KEGG富集分析
#####超几何分布来判断是否富集
#上调基因的通路富集
kk.up <- enrichKEGG(gene = gene_up,
organism = 'hsa',
universe = gene_all,
pvalueCutoff = 0.9,
qvalueCutoff = 0.05)
head(kk.up)[,1:6]
#下调基因的通路富集
kk.down <- enrichKEGG(gene = gene_down,
organism = 'hsa',
universe = gene_all,
pvalueCutoff = 0.9,
qvalueCutoff = 0.05)
head(kk.down )[,1:6]
3.理解数据
接着将得到的结果加上一列分组信息,以备后续作图使用
table(kk.up@result$p.adjust<0.05)
table(kk.down@result$p.adjust<0.05)
down_kegg <- kk.down@result %>%
filter(pvalue<0.05) %>% #筛选行
mutate(group=-1) #新增列
up_kegg <- kk.up@result %>%
filter(pvalue<0.05) %>%
mutate(group=1)
4.可视化kegg结果
#####绘图
#自定义绘图函数keggplot
kegg_plot <- function(up_kegg,down_kegg){
dat=rbind(up_kegg,down_kegg)
colnames(dat)
dat$pvalue = -log10(dat$pvalue)
dat$pvalue=dat$pvalue*dat$group
dat=dat[order(dat$pvalue,decreasing = F),]
g_kegg<- ggplot(dat, aes(x=reorder(Description,order(pvalue, decreasing = F)), y=pvalue, fill=group)) +
geom_bar(stat="identity") +
scale_fill_gradient(low="blue",high="red",guide = FALSE) +
scale_x_discrete(name ="Pathway names") +
scale_y_continuous(name ="-log10Pvalue") +
coord_flip() +
theme_bw()+
theme(plot.title = element_text(hjust = 0.5))+
ggtitle("Pathway Enrichment")
return(g_kegg)
}
##每次使用直接将代码复制放在project里面,用之前
source("functions.R")
##之后直接使用函数即可,绘制图像仅需要一句代码即可
g_kegg = kegg_plot(up_kegg,down_kegg)
print(g_kegg)
KEGG_barplot
四、GO富集分析
首先,第一步也是数据准备
输入数据
gene_up = deg[deg$change == 'up','ENTREZID']
gene_down = deg[deg$change == 'down','ENTREZID']
gene_diff = c(gene_up,gene_down)
gene_all = deg[,'ENTREZID']
g_list<- list(gene_up= gene_up,gene_down=gene_down,gene_diff= gene_diff)
第二步开始富集分析
ego <- enrichGO(gene = gene_diff,
OrgDb= org.Hs.eg.db,
ont = "ALL",
pAdjustMethod= "BH",
readable = TRUE,
universe = gene_all,
pvalueCutoff = 0.99,
qvalueCutoff = 0.99)
第三步,可视化富集结果
#条带图
barplot(ego)
#气泡图
dotplot(ego)
dotplot(ego, split = "ONTOLOGY", font.size = 10,
showCategory = 5) + facet_grid(ONTOLOGY ~ ., scale = "free") +
scale_y_discrete(labels = function(x) str_wrap(x, width = 45))
GO_barplot
GO_centplot
注意这里的图根据具体的数据而不同
最后,我们也可以通过clusterprofilter包做GO, KEGG的GSEA富集分析,谁让Y叔这么勤劳这么优秀呢
但我们需要整理一下输入数据,整理成GSEA输入数据类型
geneList=deg$logFC
names(geneList)=deg$ENTREZID
geneList=sort(geneList,decreasing = T)
接着一句代码搞定GSEA
KEGG
kk_gse <- gseKEGG(geneList = geneList,
organism = 'hsa',
verbose = FALSE)
#简单展示某一个
gseaplot2(ekk, geneSetID = 1:3)
###绘制条形图展示
down_kegg<-kk_gse[kk_gse$pvalue<0.05 & kk_gse$enrichmentScore < 0,];down_kegg$group=-1
up_kegg<-kk_gse[kk_gse$pvalue<0.05 & kk_gse$enrichmentScore > 0,];up_kegg$group=1
g2 = kegg_plot(up_kegg,down_kegg)
g2
GO
ego <- gseGO(geneList = geneList,
OrgDb = org.Hs.eg.db,
ont = "BP")
head(ego@result,3)
gseaplot2(ego, geneSetID = 1, title = ego$Description[1])
gseaplot2(ego, geneSetID = 1:3)