R语言可视化转录组专题

bulk RNA-Seq(9)富集分析

2022-07-07  本文已影响0人  Bioinfor生信云

欢迎关注Bioinfor 生信云!

这一期我们来讲讲富集分析。

构建OrgDB

OrgDB是bioconductor中存储不同数据库基因ID之间的对应关系,以及基因与GO等注释的对应关系的R软件包。共有19个物种,可以在这里下载

Bioconductor - BiocViews

没有OrgDB的物种可以通过AnnotationForge自己构建

使用eggnog-mapper的注释结果构建OrgDB(私聊我获取构建OrgDB脚本)

#安装自己构建的OrgDB
dir.create('R_lib', recursive = T)
install.packages('org.My.eg.db_1.0.tar.gz',
                 repos = NULL,
                 lib = 'R_lib')
library(org.My.eg.db, lib = 'R_lib')

GO富集(自建OrgDB的物种)

gene <- filter(de_result, 
               abs(log2FoldChange) > 1 & padj < 0.05) %>%
  pull(id) 

geneList <- de_result$log2FoldChange
names(geneList) <- de_result$id
geneList <- sort(geneList, decreasing = T)

de_go <- enrichGO(gene = gene,
          OrgDb = org.My.eg.db,
          keyType = 'GID',
          ont = 'ALL',
          qvalueCutoff = 0.05,
          pvalueCutoff = 0.05)
de_go_df <- as.data.frame(de_go)
barplot(de_ego, showCategory = 10)
dotplot(de_ego, showCategory = 10)
cnetplot(de_ego, 
         foldChange = geneList, 
         showCategory = 5,
         node_label = "all", # category | gene | all | none
         circular = TRUE, 
         colorEdge = TRUE)

GO富集(已有OrgDB的物种,拟南芥为例)

library(org.At.tair.db)
de_go <- enrichGO(gene = gene,
          OrgDb = org.At.tair.db,
          keyType = 'TAIR',
          ont = 'ALL',
          qvalueCutoff = 0.05,
          pvalueCutoff = 0.05)

barplot(de_go, showCategory = 10)
dotplot(de_go, showCategory = 10)

test <- pairwise_termsim(de_go)
p1 <- treeplot(test, showCategory = 30)
p2 <- treeplot(test, hclust_method = "average")
plot_grid(p1, p2, lables = c('A', 'B'), nrow = 2)

emapplot(test)
emapplot_cluster(test)



KEGG富集分析(不常见物种)

emapper <- read_delim('query_seqs.fa.emapper.annotations', 
                      "\t", escape_double = FALSE, col_names = FALSE, 
                      comment = "#", trim_ws = TRUE) %>%
  dplyr::select(GID = X1, 
                KO = X9, 
                Pathway = X10)

pathway2gene <- dplyr::select(emapper, Pathway, GID) %>%
  separate_rows(Pathway, sep = ',', convert = F) %>%
  filter(str_detect(Pathway, 'ko')) %>%
  mutate(Pathway = str_remove(Pathway, 'ko'))

library(magrittr)
get_path2name <- function(){
  keggpathid2name.df <- clusterProfiler:::kegg_list("pathway")
  keggpathid2name.df[,1] %<>% gsub("path:map", "", .)
  colnames(keggpathid2name.df) <- c("path_id","path_name")
  return(keggpathid2name.df)
}
pathway2name <- get_path2name()

ibrary(clusterProfiler)
 de_kegg <- enricher(gene,
                 TERM2GENE = pathway2gene,
                 TERM2NAME = pathway2name,
                 pvalueCutoff = 0.05,
                 qvalueCutoff = 0.05)

de_kegg_df <- as.data.frame(de_kegg)
barplot(de_kegg, showCategory = 10)
dotplot(de_kegg, showCategory = 10)
cnetplot(de_kegg, 
         foldChange = geneList, 
         showCategory = 3,
         node_label = "category", # category | gene | all | none
         circular = TRUE, 
         colorEdge = TRUE)

KEGG富集分析(常见物种,拟南芥为例)

library(tidyverse)
library(clusterProfiler)
de_kegg <- enrichKEGG(gene,
                      keyType = 'kegg',
                      organism = 'ath',
                      pvalueCutoff = 0.05,
                      pAdjustMethod = 'BH')

barplot(de_kegg, showCategory = 10)
dotplot(de_kegg, showCategory = 10)
cnetplot(de_kegg, 
         foldChange = geneList, 
         showCategory = 3,
         node_label = "category", # category | gene | all | none
         circular = TRUE, 
         colorEdge = TRUE)

喜欢的话就点个赞吧

欢迎关注Bioinfor 生信云 微信公众号

上一篇下一篇

猜你喜欢

热点阅读