转录组数据分析注释和富集转录组学

基于mRNA表达表格做信号通路富集图

2021-11-06  本文已影响0人  千容安

KEGG

library(clusterProfiler)
setwd("C:\\Users\\Administrator.DESKTOP-4UQ3Q0K\\Desktop")
创建该geneList方法如下,
首先导入文件,第一列是不重复的gene_IDs,第二列是fold change。
按照如下方法创建自己的GeneList
d <- read.table("RNA-seq - 1.txt",header = T,row.names = 1,sep="\t",check.names = F)  
## assume that 1st column is ID
## 2nd column is fold change

geneList <- d[,2]  
geneList<-rownames(d)   这个
## feature 1: numeric vector

names(geneList) <- as.character(d[,1])
## feature 2: named vector(vector命名)

geneEntrezID <- bitr(geneList, fromType="SYMBOL", toType=c("ENTREZID","UNIPROT"), OrgDb="org.Mm.eg.db",)
kk <- enrichKEGG(gene= geneEntrezID[,2],organism= 'mouse',pvalueCutoff = 0.05)
head(kk)

geneList <- sort(geneList, decreasing = TRUE)
## feature 3: decreasing order(vector降序)

R包msigdbr已经整理打包了MsigDB基因集,可以直接被clusterProfile采用。
library(msigdbr)
msigdbr_show_species() 
m_df <- msigdbr(species = "Mus musculus")
head(m_df, 2) %>% as.data.frame

Bar plot

以bar的颜色和高度体现enrichment scores(比如pvalue),gene counts或者ratio作为bar

library(DOSE)
data(geneList)
de <- names(geneList)[abs(geneList) > 2]
edo <- enrichDGN(de)
library(enrichplot)
barplot(edo, showCategory=20)

Dot plot

edo2 <- gseDO(geneList)
dotplot(edo, showCategory=30) + ggtitle("dotplot for ORA")
dotplot(edo2, showCategory=30) + ggtitle("dotplot for GSEA")

Gene-Concept Network

library(org.Hs.eg.db)
edox <- setReadable(edo, 'org.Mm.eg.db', 'ENTREZID')
p1 <- cnetplot(edox, foldChange=geneList)
p1
install.packages("ggnewscale")
library(ggnewscale)
p2 <- cnetplot(edox, categorySize="pvalue", foldChange=geneList)
p2
p3 <- cnetplot(edox, foldChange=geneList, circular = TRUE, colorEdge = TRUE)
p3
p1 <- cnetplot(edox, node_label="category") 
p2 <- cnetplot(edox, node_label="gene") 
p3 <- cnetplot(edox, node_label="all") 
cowplot::plot_grid(p1, p2, p3, ncol=2, labels=LETTERS[1:3])

下面单独显示p1-4


p1
p2
p3

gene category (A), gene name (B), both gene category and gene name (C, default)

Heatmap-like functional classification

p1 <- heatplot(edox)
p2 <- heatplot(edox, foldChange=geneList)
p1
p2

Heatmap plot of enriched terms. default (A), foldChange=geneList (B)

UnSet plot

library(ggupset)
upsetplot(edo)

GSEA结果表达分布的脊线图

library(ggridges)
library(pathview)
library(org.Mm.eg.db)
edo2 <- gseDO(geneList)
ridgeplot(edo2)

Tree plot

BiocManager::install("org.Mm.eg.db")
# 这个包下载不成功:这个包来自Bioconductor,下载zip,解压后直接library。
edox <- setReadable(edo, 'org.Mm.eg.db', 'ENTREZID')
# Error in .testForValidKeys(x, keys, keytype, fks) : None of the keys entered are valid keys for 'ENTREZID'. Please use the keys method to see a listing of valid arguments.
library(enrichplot)
edox2 <- pairwise_termsim(edox)
p1 <- treeplot(edox2)
p2 <- treeplot(edox2, hclust_method = "average")
p1
p1
p2

安装org.Mm.eg.db不成功,无法连接:
选中国区镜像chooseBioCmirror()
这个包来自Bioconductor,从这里下载zip文件,通过本地安装的方式给rstudio
Bioconductor - org.Mm.eg.db

Enrichment MAP

edo <- pairwise_termsim(edo)
p1 <- emapplot(edo)
p3 <- emapplot(edo, layout="kk")
p2 <- emapplot(edo, cex_category=1.5)  # cex_category=1.5将点变大

emapplot支持超几何检验和GSEA分析的结果。pie_scale 设置node大小;
layout 设布局。


p1
p3

Biological theme comparison

library(clusterProfiler)
data(gcSample)
xx <- compareCluster(gcSample, fun="enrichKEGG",
                     organism="hsa", pvalueCutoff=0.05)
xx <- pairwise_termsim(xx)                     
p1 <- emapplot(xx)
p2 <- emapplot(xx, legend_n=2) 
p3 <- emapplot(xx, pie="count")
p4 <- emapplot(xx, pie="count", cex_category=1.5, layout="kk")
cowplot::plot_grid(p1, p2, p3, p4, ncol=2, labels=LETTERS[1:4])
image.png

ridgeline plot for expression distribution of GSEA result

ridgeplot(edo2)

KEGG的table

library(clusterProfiler)
data(geneList, package="DOSE")
geneList<-rownames(d)
geneEntrezID <- bitr(geneList, fromType="SYMBOL", toType=c("ENTREZID","UNIPROT"), OrgDb="org.Mm.eg.db",)
kk <- enrichKEGG(gene= geneEntrezID[,2],organism= 'mouse',pvalueCutoff = 0.05)
head(kk)
write.table (kk, file ="kk.txt", sep ="\t", row.names =TRUE, col.names =TRUE, quote =TRUE)

再用xlsx打开


table

用这个table可以得到KEGG信号通路富集图
browseKEGG(kk, 'mmu04061')
这个mmu04061是上面这个通路的ID

running score and preranked list of GSEA result

p1 <- gseaplot(edo2, geneSetID = 1, by = "runningScore", title = edo2$Description[1])
gseaplot2(edo2, geneSetID = 1, title = edo2$Description[1])
gseaplot2(edo2, geneSetID = 1:3)
gseaplot2(edo2, geneSetID = 1:3, pvalue_table = TRUE,
          color = c("#E495A5", "#86B875", "#7DB0DD"), ES_geom = "dot")

## pubmed trend of enriched terms

terms <- edo$Description[1:5]
p <- pmcplot(terms, 2010:2020)
p2 <- pmcplot(terms, 2010:2020, proportion=FALSE)


上一篇 下一篇

猜你喜欢

热点阅读