基于mRNA表达表格做信号通路富集图
2021-11-06 本文已影响0人
千容安
KEGG
library(clusterProfiler)
setwd("C:\\Users\\Administrator.DESKTOP-4UQ3Q0K\\Desktop")
创建该geneList方法如下,
首先导入文件,第一列是不重复的gene_IDs,第二列是fold change。
按照如下方法创建自己的GeneList
d <- read.table("RNA-seq - 1.txt",header = T,row.names = 1,sep="\t",check.names = F)
## assume that 1st column is ID
## 2nd column is fold change
geneList <- d[,2]
geneList<-rownames(d) 这个
## feature 1: numeric vector
names(geneList) <- as.character(d[,1])
## feature 2: named vector(vector命名)
geneEntrezID <- bitr(geneList, fromType="SYMBOL", toType=c("ENTREZID","UNIPROT"), OrgDb="org.Mm.eg.db",)
kk <- enrichKEGG(gene= geneEntrezID[,2],organism= 'mouse',pvalueCutoff = 0.05)
head(kk)
geneList <- sort(geneList, decreasing = TRUE)
## feature 3: decreasing order(vector降序)
R包msigdbr已经整理打包了MsigDB基因集,可以直接被clusterProfile采用。
library(msigdbr)
msigdbr_show_species()
m_df <- msigdbr(species = "Mus musculus")
head(m_df, 2) %>% as.data.frame
Bar plot
以bar的颜色和高度体现enrichment scores(比如pvalue),gene counts或者ratio作为bar
library(DOSE)
data(geneList)
de <- names(geneList)[abs(geneList) > 2]
edo <- enrichDGN(de)
library(enrichplot)
barplot(edo, showCategory=20)
data:image/s3,"s3://crabby-images/a6117/a611763d1779c04845accbe3fb5567421b3bdbd0" alt=""
Dot plot
edo2 <- gseDO(geneList)
dotplot(edo, showCategory=30) + ggtitle("dotplot for ORA")
dotplot(edo2, showCategory=30) + ggtitle("dotplot for GSEA")
data:image/s3,"s3://crabby-images/29937/29937f7fdf6dc98be4cd33cb556d45286db8d30b" alt=""
Gene-Concept Network
library(org.Hs.eg.db)
edox <- setReadable(edo, 'org.Mm.eg.db', 'ENTREZID')
p1 <- cnetplot(edox, foldChange=geneList)
p1
data:image/s3,"s3://crabby-images/6724f/6724fb344c6b9203c023ba95f5a5c7dd89f301c1" alt=""
install.packages("ggnewscale")
library(ggnewscale)
p2 <- cnetplot(edox, categorySize="pvalue", foldChange=geneList)
p2
data:image/s3,"s3://crabby-images/e68e8/e68e8b32a706efbc1dcb372972cd889887dc0f2e" alt=""
p3 <- cnetplot(edox, foldChange=geneList, circular = TRUE, colorEdge = TRUE)
p3
data:image/s3,"s3://crabby-images/eba3e/eba3e986e37871d7c817762724acd4c117df1336" alt=""
p1 <- cnetplot(edox, node_label="category")
p2 <- cnetplot(edox, node_label="gene")
p3 <- cnetplot(edox, node_label="all")
cowplot::plot_grid(p1, p2, p3, ncol=2, labels=LETTERS[1:3])
下面单独显示p1-4
data:image/s3,"s3://crabby-images/cdd1b/cdd1b1b62167e49a81e78f988d4106c6041dcbcd" alt=""
data:image/s3,"s3://crabby-images/4d14c/4d14c97b8c2efc802bd2faa44bd4ac0e7a1d2446" alt=""
data:image/s3,"s3://crabby-images/90752/90752157af183393d8a211f6b113c39629cd659a" alt=""
gene category (A), gene name (B), both gene category and gene name (C, default)
Heatmap-like functional classification
p1 <- heatplot(edox)
p2 <- heatplot(edox, foldChange=geneList)
data:image/s3,"s3://crabby-images/5b345/5b3458aca523a97a7eeea2fe435fb18681771a8b" alt=""
data:image/s3,"s3://crabby-images/e7b22/e7b224da0d9ad62c3d1d414cb23cfd4838de4d2d" alt=""
Heatmap plot of enriched terms. default (A), foldChange=geneList (B)
UnSet plot
library(ggupset)
upsetplot(edo)
data:image/s3,"s3://crabby-images/e73c2/e73c2508808e221a8f3d74d0881fa61497e73d1c" alt=""
GSEA结果表达分布的脊线图
library(ggridges)
library(pathview)
library(org.Mm.eg.db)
edo2 <- gseDO(geneList)
ridgeplot(edo2)
Tree plot
BiocManager::install("org.Mm.eg.db")
# 这个包下载不成功:这个包来自Bioconductor,下载zip,解压后直接library。
edox <- setReadable(edo, 'org.Mm.eg.db', 'ENTREZID')
# Error in .testForValidKeys(x, keys, keytype, fks) : None of the keys entered are valid keys for 'ENTREZID'. Please use the keys method to see a listing of valid arguments.
library(enrichplot)
edox2 <- pairwise_termsim(edox)
p1 <- treeplot(edox2)
p2 <- treeplot(edox2, hclust_method = "average")
p1
data:image/s3,"s3://crabby-images/627ed/627ed7bf6b62cf1c835effb4f71cf69339fa2582" alt=""
data:image/s3,"s3://crabby-images/54131/54131c1b76ec2fd48400a154b3db35aaf484d14c" alt=""
安装org.Mm.eg.db不成功,无法连接:
选中国区镜像chooseBioCmirror()
这个包来自Bioconductor,从这里下载zip文件,通过本地安装的方式给rstudio
Bioconductor - org.Mm.eg.db
- 同样方法:下载到本地
install.packages("/where/your/packages/org.Mm.eg.db_3.14.0.tar.gz")
来装
Enrichment MAP
edo <- pairwise_termsim(edo)
p1 <- emapplot(edo)
p3 <- emapplot(edo, layout="kk")
p2 <- emapplot(edo, cex_category=1.5) # cex_category=1.5将点变大
emapplot支持超几何检验和GSEA分析的结果。pie_scale 设置node大小;
layout 设布局。
data:image/s3,"s3://crabby-images/774f7/774f77821d7fa636af350435f0757f7c3b548cb8" alt=""
data:image/s3,"s3://crabby-images/b902f/b902f04ef08bd3e4ea40300246a2fef1b23175ec" alt=""
Biological theme comparison
library(clusterProfiler)
data(gcSample)
xx <- compareCluster(gcSample, fun="enrichKEGG",
organism="hsa", pvalueCutoff=0.05)
xx <- pairwise_termsim(xx)
p1 <- emapplot(xx)
p2 <- emapplot(xx, legend_n=2)
p3 <- emapplot(xx, pie="count")
p4 <- emapplot(xx, pie="count", cex_category=1.5, layout="kk")
cowplot::plot_grid(p1, p2, p3, p4, ncol=2, labels=LETTERS[1:4])
data:image/s3,"s3://crabby-images/41406/4140606a163e061a016068fa8a462225813b6c82" alt=""
ridgeline plot for expression distribution of GSEA result
ridgeplot(edo2)
data:image/s3,"s3://crabby-images/da3d5/da3d5b8a0ac485bc05a602e89bd8e403f4a85f36" alt=""
KEGG的table
library(clusterProfiler)
data(geneList, package="DOSE")
geneList<-rownames(d)
geneEntrezID <- bitr(geneList, fromType="SYMBOL", toType=c("ENTREZID","UNIPROT"), OrgDb="org.Mm.eg.db",)
kk <- enrichKEGG(gene= geneEntrezID[,2],organism= 'mouse',pvalueCutoff = 0.05)
head(kk)
write.table (kk, file ="kk.txt", sep ="\t", row.names =TRUE, col.names =TRUE, quote =TRUE)
再用xlsx打开
data:image/s3,"s3://crabby-images/fbbc8/fbbc842c94343f5b072362a0a470835240d235ae" alt=""
用这个table可以得到KEGG信号通路富集图
browseKEGG(kk, 'mmu04061')
这个mmu04061是上面这个通路的ID
running score and preranked list of GSEA result
p1 <- gseaplot(edo2, geneSetID = 1, by = "runningScore", title = edo2$Description[1])
data:image/s3,"s3://crabby-images/498f9/498f937f19bf3aaa9cd1afe46858c13f05f79e98" alt=""
gseaplot2(edo2, geneSetID = 1, title = edo2$Description[1])
data:image/s3,"s3://crabby-images/8d3bf/8d3bf6fcd4ecb4bfc3e57b11788981581f2fde68" alt=""
gseaplot2(edo2, geneSetID = 1:3)
data:image/s3,"s3://crabby-images/a363a/a363ae108eb132b56b8ca47752d17637caae374c" alt=""
gseaplot2(edo2, geneSetID = 1:3, pvalue_table = TRUE,
color = c("#E495A5", "#86B875", "#7DB0DD"), ES_geom = "dot")
data:image/s3,"s3://crabby-images/7fe7f/7fe7f86d5b713405d7abfd93132cd1a41f5dbf66" alt=""
## pubmed trend of enriched terms
terms <- edo$Description[1:5]
p <- pmcplot(terms, 2010:2020)
p2 <- pmcplot(terms, 2010:2020, proportion=FALSE)
data:image/s3,"s3://crabby-images/09a7e/09a7e1789150acc4f23df74de773220fd9f879ef" alt=""