基于mRNA表达表格做信号通路富集图
2021-11-06 本文已影响0人
千容安
KEGG
library(clusterProfiler)
setwd("C:\\Users\\Administrator.DESKTOP-4UQ3Q0K\\Desktop")
创建该geneList方法如下,
首先导入文件,第一列是不重复的gene_IDs,第二列是fold change。
按照如下方法创建自己的GeneList
d <- read.table("RNA-seq - 1.txt",header = T,row.names = 1,sep="\t",check.names = F)
## assume that 1st column is ID
## 2nd column is fold change
geneList <- d[,2]
geneList<-rownames(d) 这个
## feature 1: numeric vector
names(geneList) <- as.character(d[,1])
## feature 2: named vector(vector命名)
geneEntrezID <- bitr(geneList, fromType="SYMBOL", toType=c("ENTREZID","UNIPROT"), OrgDb="org.Mm.eg.db",)
kk <- enrichKEGG(gene= geneEntrezID[,2],organism= 'mouse',pvalueCutoff = 0.05)
head(kk)
geneList <- sort(geneList, decreasing = TRUE)
## feature 3: decreasing order(vector降序)
R包msigdbr已经整理打包了MsigDB基因集,可以直接被clusterProfile采用。
library(msigdbr)
msigdbr_show_species()
m_df <- msigdbr(species = "Mus musculus")
head(m_df, 2) %>% as.data.frame
Bar plot
以bar的颜色和高度体现enrichment scores(比如pvalue),gene counts或者ratio作为bar
library(DOSE)
data(geneList)
de <- names(geneList)[abs(geneList) > 2]
edo <- enrichDGN(de)
library(enrichplot)
barplot(edo, showCategory=20)

Dot plot
edo2 <- gseDO(geneList)
dotplot(edo, showCategory=30) + ggtitle("dotplot for ORA")
dotplot(edo2, showCategory=30) + ggtitle("dotplot for GSEA")

Gene-Concept Network
library(org.Hs.eg.db)
edox <- setReadable(edo, 'org.Mm.eg.db', 'ENTREZID')
p1 <- cnetplot(edox, foldChange=geneList)
p1

install.packages("ggnewscale")
library(ggnewscale)
p2 <- cnetplot(edox, categorySize="pvalue", foldChange=geneList)
p2

p3 <- cnetplot(edox, foldChange=geneList, circular = TRUE, colorEdge = TRUE)
p3

p1 <- cnetplot(edox, node_label="category")
p2 <- cnetplot(edox, node_label="gene")
p3 <- cnetplot(edox, node_label="all")
cowplot::plot_grid(p1, p2, p3, ncol=2, labels=LETTERS[1:3])
下面单独显示p1-4



gene category (A), gene name (B), both gene category and gene name (C, default)
Heatmap-like functional classification
p1 <- heatplot(edox)
p2 <- heatplot(edox, foldChange=geneList)


Heatmap plot of enriched terms. default (A), foldChange=geneList (B)
UnSet plot
library(ggupset)
upsetplot(edo)

GSEA结果表达分布的脊线图
library(ggridges)
library(pathview)
library(org.Mm.eg.db)
edo2 <- gseDO(geneList)
ridgeplot(edo2)
Tree plot
BiocManager::install("org.Mm.eg.db")
# 这个包下载不成功:这个包来自Bioconductor,下载zip,解压后直接library。
edox <- setReadable(edo, 'org.Mm.eg.db', 'ENTREZID')
# Error in .testForValidKeys(x, keys, keytype, fks) : None of the keys entered are valid keys for 'ENTREZID'. Please use the keys method to see a listing of valid arguments.
library(enrichplot)
edox2 <- pairwise_termsim(edox)
p1 <- treeplot(edox2)
p2 <- treeplot(edox2, hclust_method = "average")
p1


安装org.Mm.eg.db不成功,无法连接:
选中国区镜像chooseBioCmirror()
这个包来自Bioconductor,从这里下载zip文件,通过本地安装的方式给rstudio
Bioconductor - org.Mm.eg.db
- 同样方法:下载到本地
install.packages("/where/your/packages/org.Mm.eg.db_3.14.0.tar.gz")
来装
Enrichment MAP
edo <- pairwise_termsim(edo)
p1 <- emapplot(edo)
p3 <- emapplot(edo, layout="kk")
p2 <- emapplot(edo, cex_category=1.5) # cex_category=1.5将点变大
emapplot支持超几何检验和GSEA分析的结果。pie_scale 设置node大小;
layout 设布局。


Biological theme comparison
library(clusterProfiler)
data(gcSample)
xx <- compareCluster(gcSample, fun="enrichKEGG",
organism="hsa", pvalueCutoff=0.05)
xx <- pairwise_termsim(xx)
p1 <- emapplot(xx)
p2 <- emapplot(xx, legend_n=2)
p3 <- emapplot(xx, pie="count")
p4 <- emapplot(xx, pie="count", cex_category=1.5, layout="kk")
cowplot::plot_grid(p1, p2, p3, p4, ncol=2, labels=LETTERS[1:4])

ridgeline plot for expression distribution of GSEA result
ridgeplot(edo2)

KEGG的table
library(clusterProfiler)
data(geneList, package="DOSE")
geneList<-rownames(d)
geneEntrezID <- bitr(geneList, fromType="SYMBOL", toType=c("ENTREZID","UNIPROT"), OrgDb="org.Mm.eg.db",)
kk <- enrichKEGG(gene= geneEntrezID[,2],organism= 'mouse',pvalueCutoff = 0.05)
head(kk)
write.table (kk, file ="kk.txt", sep ="\t", row.names =TRUE, col.names =TRUE, quote =TRUE)
再用xlsx打开

用这个table可以得到KEGG信号通路富集图
browseKEGG(kk, 'mmu04061')
这个mmu04061是上面这个通路的ID
running score and preranked list of GSEA result
p1 <- gseaplot(edo2, geneSetID = 1, by = "runningScore", title = edo2$Description[1])

gseaplot2(edo2, geneSetID = 1, title = edo2$Description[1])

gseaplot2(edo2, geneSetID = 1:3)

gseaplot2(edo2, geneSetID = 1:3, pvalue_table = TRUE,
color = c("#E495A5", "#86B875", "#7DB0DD"), ES_geom = "dot")

## pubmed trend of enriched terms
terms <- edo$Description[1:5]
p <- pmcplot(terms, 2010:2020)
p2 <- pmcplot(terms, 2010:2020, proportion=FALSE)
