R语言 生信GEO数据挖掘生物信息分析:从入门到精通

富集分析不光有条带图和点图,还有...

2020-12-01  本文已影响0人  小洁忘了怎么分身

参考资料

南方医科大学网红博导Y叔的一本神奇的书
https://yulab-smu.github.io/clusterProfiler-book/chapter12.html#bar-plot

好像一直在更新,没怎么宣传过,不管了,拿来学习。

1.安装和加载包

if(!require(clusterProfiler)) BiocManager::install("clusterProfiler")
if(!require(dplyr)) install.packages("dplyr")
#> Loading required package: dplyr
#> 
#> Attaching package: 'dplyr'
#> The following object is masked from 'package:AnnotationDbi':
#> 
#>     select
#> The following objects are masked from 'package:IRanges':
#> 
#>     collapse, desc, intersect, setdiff, slice, union
#> The following objects are masked from 'package:S4Vectors':
#> 
#>     first, intersect, rename, setdiff, setequal, union
#> The following object is masked from 'package:Biobase':
#> 
#>     combine
#> The following objects are masked from 'package:BiocGenerics':
#> 
#>     combine, intersect, setdiff, union
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
if(!require(ggplot2)) install.packages("ggplot2")
library(clusterProfiler)
library(dplyr)
library(ggplot2)
library(enrichplot)
library(cowplot)

2.示例数据

使用limma差异分析的结果,在生信星球公众号回复“富集输入”即可获得

load("step4output.Rdata")
geneList=deg$logFC
names(geneList)=deg$ENTREZID
geneList=sort(geneList,decreasing = T)
geneList2 = geneList[abs(geneList) > 1.5]
gene <- names(geneList)[abs(geneList) > 1.5]

3.GO富集分析

(1)富集分析

ego <- enrichGO(gene          = gene,
                universe      = names(geneList),
                OrgDb         = "org.Hs.eg.db",
                ont           = "CC",
                pAdjustMethod = "BH",
                pvalueCutoff  = 0.01,
                qvalueCutoff  = 0.05,
                readable      = TRUE)
head(ego)
#>                    ID                    Description GeneRatio   BgRatio
#> GO:0042555 GO:0042555                    MCM complex     8/330  11/16112
#> GO:0044454 GO:0044454        nuclear chromosome part    34/330 436/16112
#> GO:0000228 GO:0000228             nuclear chromosome    35/330 472/16112
#> GO:0098687 GO:0098687             chromosomal region    28/330 320/16112
#> GO:0000785 GO:0000785                      chromatin    33/330 468/16112
#> GO:0000775 GO:0000775 chromosome, centromeric region    16/330 177/16112
#>                  pvalue     p.adjust       qvalue
#> GO:0042555 4.453098e-12 1.705536e-09 1.528116e-09
#> GO:0044454 2.276461e-11 4.359423e-09 3.905928e-09
#> GO:0000228 4.620969e-11 5.899437e-09 5.285740e-09
#> GO:0098687 1.037845e-10 9.937367e-09 8.903619e-09
#> GO:0000785 6.295599e-10 4.822429e-08 4.320769e-08
#> GO:0000775 7.480238e-07 4.774885e-05 4.278171e-05
#>                                                                                                                                                                                                                                           geneID
#> GO:0042555                                                                                                                                                                                             MCM3/MCM8/MCM2/MCM7/MCM5/MCM4/MCM6/MMS22L
#> GO:0044454                SNAI2/JUN/PPARGC1A/SATB1/ZEB2/MUC1/MCM3/CHAF1A/TIPIN/E2F1/MCM2/MCM7/HIST1H3A/HIST1H3J/SGO1/MCM5/MCM4/CDK1/MCM6/ASF1B/HIST1H3G/MMS22L/HIST1H1A/BRCA2/ORC1/RAD51AP1/HIST1H2BB/UHRF1/CDCA5/HIST1H1B/POLE2/ESCO2/BLM/RAD51
#> GO:0000228          SNAI2/JUN/PPARGC1A/SATB1/ZEB2/MUC1/MCM3/CHAF1A/TIPIN/HMGA2/E2F1/MCM2/MCM7/HIST1H3A/HIST1H3J/SGO1/MCM5/MCM4/CDK1/MCM6/ASF1B/HIST1H3G/MMS22L/HIST1H1A/BRCA2/ORC1/RAD51AP1/HIST1H2BB/UHRF1/CDCA5/HIST1H1B/POLE2/ESCO2/BLM/RAD51
#> GO:0098687                                                                         KAT2B/MCM3/ZWINT/TTK/NUF2/SKA3/MCM2/MCM7/CENPK/SGO1/DSCC1/MCM5/MAD2L1/MCM4/CDK1/MCM6/HELLS/SPC25/HJURP/SKA1/BRCA2/ORC1/HIST1H2BB/ERCC6L/CDCA5/ESCO2/BLM/RAD51
#> GO:0000785 SNAI2/JUN/PPARGC1A/MAF/SATB1/ZEB2/MUC1/CHAF1A/TIPIN/HMGA2/PHC2/E2F1/MCM2/MCM7/HIST1H3A/HIST1H3J/DSCC1/HIST1H2AB/HIST1H2AI/ASF1B/HELLS/HIST1H2BM/HIST1H3G/HIST1H2AK/WDR76/HIST1H1A/RAD51AP1/HIST1H2BB/UHRF1/CDCA5/HIST1H1B/ESCO2/RAD51
#> GO:0000775                                                                                                                                           KAT2B/ZWINT/TTK/NUF2/SKA3/CENPK/SGO1/DSCC1/MAD2L1/HELLS/SPC25/HJURP/SKA1/ERCC6L/CDCA5/ESCO2
#>            Count
#> GO:0042555     8
#> GO:0044454    34
#> GO:0000228    35
#> GO:0098687    28
#> GO:0000785    33
#> GO:0000775    16

(2)富集分析可视化

噼里啪啦8张图来了,为了省点地方可以做成拼图

#(1)条带图
p1=barplot(ego,showCategory=20)
#(2)点图
p2=dotplot(ego)
#> wrong orderBy parameter; set to default `orderBy = "x"`
#(3)展示top5通路的共同基因
#Gene-Concept Network
p3=cnetplot(ego, categorySize="pvalue", foldChange=geneList,colorEdge = TRUE)
p4=cnetplot(ego, foldChange=geneList, circular = TRUE, colorEdge = TRUE)
gg <- ggdraw() +     
    draw_plot(p1, x=0, y=0.5, width=0.5, height=0.5) +  
    draw_plot(p2, 0, 0, 0.5, 0.5) +   
    draw_plot(p3, 0.5, 0, 0.5, 0.5) + 
  draw_plot(p4,0.5, 0.5, 0.5, 0.5)
gg
#Enrichment Map
emapplot(ego)
#(4)展示通路关系
goplot(ego)
#(5)Heatmap-like functional classification
heatplot(ego,foldChange = geneList)

像最后的热图,不调整比例的话不好看

pdf("heatplot.pdf",width = 14,height = 5)
heatplot(ego,foldChange = geneList)
dev.off()
#> RStudioGD 
#>         2

4.gsea-GO分析

ego2 <- gseGO(geneList     = geneList2,
              OrgDb        = org.Hs.eg.db,
              ont          = "CC",
              nPerm        = 1000,
              minGSSize    = 100,
              maxGSSize    = 500,
              pvalueCutoff = 0.05,
              verbose      = FALSE)
#gsea
ridgeplot(ego2)
#> Picking joint bandwidth of 0.131
#gesa图
gseaplot2(ego2, geneSetID = 1, title = ego2$Description[1])
#多条
gseaplot2(ego2, geneSetID = 1:3)
#加table
gseaplot2(ego2, geneSetID = 2:4, pvalue_table = TRUE)
#上下三联

pp <- lapply(1:3, function(i) {
  anno <- ego2[i, c("NES", "pvalue", "p.adjust")]
  lab <- paste0(names(anno), "=",  round(anno, 3), collapse="\n")
  
  gsearank(ego2, i, ego2[i, 2]) + xlab(NULL) +ylab(NULL) +
    annotate("text", 0, ego2[i, "enrichmentScore"] * .9, label = lab, hjust=0, vjust=0)
})
plot_grid(plotlist=pp, ncol=1)

保存时调比例

ggsave("try.pdf",width = 15,height = 18)

公众号生信星球同步更新我的文章,欢迎大家关注!

上一篇下一篇

猜你喜欢

热点阅读