第三步:简单的GEO数据的KEGG分析和GSEA分析
2020-05-24 本文已影响0人
碌碌无为的杰少
KEGG分析
gene_up = deg[deg$change == 'up','ENTREZID']
gene_down = deg[deg$change == 'down','ENTREZID']
gene_diff = c(gene_up,gene_down)
gene_all = deg[,'ENTREZID']
#(2)对上调/下调/所有差异基因进行富集分析
#注意这里又有个F
if(T){
kk.up <- enrichKEGG(gene = gene_up,
organism = 'hsa',
universe = gene_all,
pvalueCutoff = 0.9,
qvalueCutoff = 0.9)
kk.down <- enrichKEGG(gene = gene_down,
organism = 'hsa',
universe = gene_all,
pvalueCutoff = 0.9,
qvalueCutoff =0.9)
kk.diff <- enrichKEGG(gene = gene_diff,
organism = 'hsa',
pvalueCutoff = 0.9)
save(kk.diff,kk.down,kk.up,file = "GSE4107kegg.Rdata")
}
load("GSE4107kegg.Rdata")
#(3)从富集结果中提取出结果数据框
kegg_diff_dt <- kk.up@result
#(4)按照pvalue筛选通路
#在enrichkegg时没有设置pvaluecutoff,在此处筛选
down_kegg <- kk.down@result %>%
filter(pvalue<0.01) %>% #筛选行
mutate(group=-1) #新增列
up_kegg <- kk.up@result %>%
filter(pvalue<0.01) %>%
mutate(group=1)
#(5)可视化
g_kegg <- kegg_plot(up_kegg,down_kegg)
g_kegg
g_kegg +scale_y_continuous(labels = c(15,10,5,0,5,10))
ggsave(g_kegg,filename = 'kegg_up_down.png')
image.png
按照count数目进行KEGG
count数目进行kegg,并且可视化
if(F){
x = kk.up
df = data.frame(x)
## 计算富集分数
x@result$richFactor =x@result$Count / as.numeric(sub("/\\d+", "", x@result$BgRatio))
y =x@result
library(dplyr)
library(ggplot2)
showCategory = 20
y %>%
arrange(p.adjust) %>%
slice(1:showCategory) %>%
ggplot(aes(richFactor,forcats::fct_reorder(Description, richFactor))) +
geom_segment(aes(xend=0, yend = Description)) +
geom_point(aes(color=p.adjust, size = Count)) +
## 调整颜色的区间,begin越大,整体颜色越明艳
scale_color_viridis_c(begin = 0.3, end = 1) +
## 调整泡泡的大小
scale_size_continuous(range=c(2, 10)) +
theme_minimal() +
xlab("rich factor") +
ylab(NULL) +
ggtitle("")
}
browseKEGG( kk.up, 'hsa04510')
image.png
image.png
GSEA分析
GSEA与KEGG不同的是,GSEA是所有基因
的增减,而kegg是争对 上调基因
和下调基因
library(GSEABase)
library(clusterProfiler)
library(DOSE)
library(org.Hs.eg.db)
library(ggplot2)
library(stringr)
options(stringsAsFactors = F)
data(geneList)
geneList = deg$logFC
names(geneList) = deg$ENTREZID
geneList = sort(geneList,decreasing = T)
#准备gmt文件
geneset <- read.gmt("h.all.v7.1.entrez.gmt")
geneset$ont = str_remove(geneset$ont,"HALLMARK_")
egmt <- GSEA(geneList, TERM2GENE=geneset,verbose=F)
#> Warning in fgsea(pathways = geneSets, stats = geneList, nperm = nPerm, minSize = minGSSize, : There are ties in the preranked stats (0.05% of the list).
#> The order of those tied genes will be arbitrary, which may produce unexpected results.
egmt2<- setReadable(egmt,OrgDb=org.Hs.eg.db, keyType = "ENTREZID")
y=data.frame(egmt2)
#气泡图,展示geneset被激活还是抑制
dotplot(egmt2,split=".sign")+facet_grid(~.sign)
#dotplot(egmt2,split=".sign")+facet_grid(~.sign)+
#scale_y_discrete(labels=function(x) str_wrap(str_replace_all(x,"_"," "), width=30))
# scale_y_discrete(labels=function(x) str_remove(x,"HALLMARK_"))
#dotplot(y,showCategory=20,split=".sign")+facet_grid(~.sign)
#山峦图,展示每个geneset的基因logFC分布
ridgeplot(egmt)
image.png
image.png
单通道和多通道的gsea
library(enrichplot)
gseaplot2(egmt, geneSetID = 1, title = egmt$Description[1])
#多个gnenset合并展示
gseaplot2(egmt, geneSetID = 1:3,pvalue_table = T)
#gseaplot2(egmt,2,color = "blue",pvalue_table = T)
image.png
image.png