clusterProfiler

有参转录组学习七:基因功能注释及富集分析

2019-05-18  本文已影响0人  颤抖吧__小虫子

Author: ligc

Date: 19/5/18

library(AnnotationHub)
library(clusterProfiler)
##安装旧版本的包
packageurl <- "https://cran.r-project.org/bin/windows/contrib/3.5/gridGraphics_0.3-0.zip"
install.packages(packageurl,repos=NULL, type="source")
AH <- AnnotationHub()
AH
AnnotationHub with 47474 records
# snapshotDate(): 2018-10-24 
# $dataprovider: BroadInstitute, Ensembl, UCSC, ftp://ftp.ncbi....
# $species: Homo sapiens, Mus musculus, Drosophila melanogaster...
# $rdataclass: GRanges, BigWigFile, FaFile, TwoBitFile, OrgDb, ...
# additional mcols(): taxonomyid, genome, description,
#   coordinate_1_based, maintainer, rdatadateadded,
#   preparerclass, tags, rdatapath, sourceurl, sourcetype 
# retrieve records with, e.g., 'object[["AH2"]]' 

            title                                               
  AH2     | Ailuropoda_melanoleuca.ailMel1.69.dna.toplevel.fa   
  AH3     | Ailuropoda_melanoleuca.ailMel1.69.dna_rm.toplevel.fa
  AH4     | Ailuropoda_melanoleuca.ailMel1.69.dna_sm.toplevel.fa
  AH5     | Ailuropoda_melanoleuca.ailMel1.69.ncrna.fa          
  AH6     | Ailuropoda_melanoleuca.ailMel1.69.pep.all.fa        
  ...       ...                                                 
  AH67895 | org.Vibrio_vulnificus.eg.sqlite                     
  AH67896 | org.Achromobacter_group_B.eg.sqlite                 
  AH67897 | org.Achromobacter_group_E.eg.sqlite                 
  AH67898 | org.Pannonibacter_phragmitetus.eg.sqlite            
  AH67899 | org.Salinispora_arenicola_CNS-205.eg.sqlite 
数据来源
unique(AH$dataprovider)
[1] "Ensembl"                              
 [2] "UCSC"                                 
 [3] "RefNet"                               
 [4] "Inparanoid8"                          
 [5] "NHLBI"                                
 [6] "ChEA"                                 
 [7] "Pazar"                                
 [8] "NIH Pathway Interaction Database"     
 [9] "Haemcode"                             
[10] "BroadInstitute"                       
[11] "PRIDE"                                
[12] "Gencode"                              
[13] "CRIBI"                                
[14] "Genoscope"                            
[15] "MISO, VAST-TOOLS, UCSC"               
[16] "UWashington"                          
[17] "Stanford"                             
[18] "dbSNP"                                
[19] "BioMart"                              
[20] "GeneOntology"                         
[21] "KEGG"                                 
[22] "URGI"                                 
[23] "EMBL-EBI"                             
[24] "MicrosporidiaDB"                      
[25] "FungiDB"                              
[26] "TriTrypDB"                            
[27] "ToxoDB"                               
[28] "AmoebaDB"                             
[29] "PlasmoDB"                             
[30] "PiroplasmaDB"                         
[31] "CryptoDB"                             
[32] "TrichDB"                              
[33] "GiardiaDB"                            
[34] "ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/"
head(unique(AH$species))
[1] "Ailuropoda melanoleuca" "Anolis carolinensis"    "Bos taurus"            
[4] "Caenorhabditis elegans" "Callithrix jacchus"     "Canis familiaris" 
#查找具体的物种信息
grs <- query(AH,"Mus musculus ")
#AH[AH$species == 'Arabidopsis thaliana' & AH$rdataclass == 'OrgDb']
#subset(AH, species == 'Arabidopsis thaliana' & rdataclass == 'OrgDb')
#图形化
display(AH)
unique(AH$rdataclass)
##下载
mus <- AH[["AH53222"]]
class(mus)
columns(mus)
keytypes(mus)
head(keys(mus,keytype = "SYMBOL"))
select(mus,keys = "0610006L08Rik",columns = c("GENEID","GO"),keytype = "SYMBOL")
mus.sample <- sample(keys(mus,keytype = "ENTREZID"),200)
test <- enrichGO(gene          = mus.sample,
                 OrgDb         = mus,
                 keyType       = "ENTREZID",
                 pAdjustMethod = "none",
                 pvalueCutoff  = 0.1,
                 qvalueCutoff  = 0.2)
mapIds(mus, keys = mus.sample, column = c("TXID"), keytype = "ENTREZID")
### keys是原始的ID,columns是转换之后的ID,keytype是要指定的原始ID类型
BiocManager::install("org.Mm.eg.db")
library(org.Mm.eg.db)
keytypes(org.Mm.eg.db)
columns(org.Mm.eg.db)
select(org.Mm.eg.db,keys = "ENSMUSG00000001123",columns = c("SYMBOL","GENENAME","GO"),keytype = "ENSEMBL")
mouse_DEG_list <- read.table(file = "mouse_DEG_list",header = T,row.names = 1)
mouse_DEG_list_name <- row.names(mouse_DEG_list)
mouse_DEG_list_transID <- select(org.Mm.eg.db,keys = mouse_DEG_list_name,columns = c("SYMBOL","GENENAME","ENTREZID"),keytype = "ENSEMBL")
mouse_DEG_list_transID2 <- mapIds(org.Mm.eg.db,keys = mouse_DEG_list_name,column = c("SYMBOL"),keytype = "ENSEMBL")
##GO
ego <- enrichGO(
  gene = row.names(mouse_DEG_list),
  OrgDb = org.Mm.eg.db,
  keyType = "ENSEMBL",
  ont = "CC")
ego_res <- as.data.frame(ego)
library(ggplot2)
library(dplyr)
##气泡图
dotplot(ego,font.size=8)
##网络图
enrichMap(ego, vertex.label.cex=1.2, layout=igraph::layout.kamada.kawai)
##GO图
plotGOgraph(ego)
##KEGG
kegg_list <- mouse_DEG_list_transID[,4]
kegg <- enrichKEGG(kegg_list,keyType = "kegg",organism = "mmu",pvalueCutoff = 0.05,pAdjustMethod = "BH",qvalueCutoff = 0.1)

dotplot(kegg,font.size=8)

browseKEGG(kegg,'mmu01100')
image.png image.png
image.png

参考文章

1.https://www.jianshu.com/p/fb15249200d7
2.https://www.jianshu.com/p/fb15249200d7
3.https://www.jianshu.com/p/ae94178918bc

上一篇下一篇

猜你喜欢

热点阅读