有参转录组学习七:基因功能注释及富集分析
2019-05-18 本文已影响0人
颤抖吧__小虫子
Author: ligc
Date: 19/5/18
library(AnnotationHub)
library(clusterProfiler)
##安装旧版本的包
packageurl <- "https://cran.r-project.org/bin/windows/contrib/3.5/gridGraphics_0.3-0.zip"
install.packages(packageurl,repos=NULL, type="source")
AH <- AnnotationHub()
AH
AnnotationHub with 47474 records
# snapshotDate(): 2018-10-24
# $dataprovider: BroadInstitute, Ensembl, UCSC, ftp://ftp.ncbi....
# $species: Homo sapiens, Mus musculus, Drosophila melanogaster...
# $rdataclass: GRanges, BigWigFile, FaFile, TwoBitFile, OrgDb, ...
# additional mcols(): taxonomyid, genome, description,
# coordinate_1_based, maintainer, rdatadateadded,
# preparerclass, tags, rdatapath, sourceurl, sourcetype
# retrieve records with, e.g., 'object[["AH2"]]'
title
AH2 | Ailuropoda_melanoleuca.ailMel1.69.dna.toplevel.fa
AH3 | Ailuropoda_melanoleuca.ailMel1.69.dna_rm.toplevel.fa
AH4 | Ailuropoda_melanoleuca.ailMel1.69.dna_sm.toplevel.fa
AH5 | Ailuropoda_melanoleuca.ailMel1.69.ncrna.fa
AH6 | Ailuropoda_melanoleuca.ailMel1.69.pep.all.fa
... ...
AH67895 | org.Vibrio_vulnificus.eg.sqlite
AH67896 | org.Achromobacter_group_B.eg.sqlite
AH67897 | org.Achromobacter_group_E.eg.sqlite
AH67898 | org.Pannonibacter_phragmitetus.eg.sqlite
AH67899 | org.Salinispora_arenicola_CNS-205.eg.sqlite
数据来源
unique(AH$dataprovider)
[1] "Ensembl"
[2] "UCSC"
[3] "RefNet"
[4] "Inparanoid8"
[5] "NHLBI"
[6] "ChEA"
[7] "Pazar"
[8] "NIH Pathway Interaction Database"
[9] "Haemcode"
[10] "BroadInstitute"
[11] "PRIDE"
[12] "Gencode"
[13] "CRIBI"
[14] "Genoscope"
[15] "MISO, VAST-TOOLS, UCSC"
[16] "UWashington"
[17] "Stanford"
[18] "dbSNP"
[19] "BioMart"
[20] "GeneOntology"
[21] "KEGG"
[22] "URGI"
[23] "EMBL-EBI"
[24] "MicrosporidiaDB"
[25] "FungiDB"
[26] "TriTrypDB"
[27] "ToxoDB"
[28] "AmoebaDB"
[29] "PlasmoDB"
[30] "PiroplasmaDB"
[31] "CryptoDB"
[32] "TrichDB"
[33] "GiardiaDB"
[34] "ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/"
head(unique(AH$species))
[1] "Ailuropoda melanoleuca" "Anolis carolinensis" "Bos taurus"
[4] "Caenorhabditis elegans" "Callithrix jacchus" "Canis familiaris"
#查找具体的物种信息
grs <- query(AH,"Mus musculus ")
#AH[AH$species == 'Arabidopsis thaliana' & AH$rdataclass == 'OrgDb']
#subset(AH, species == 'Arabidopsis thaliana' & rdataclass == 'OrgDb')
#图形化
display(AH)
unique(AH$rdataclass)
##下载
mus <- AH[["AH53222"]]
class(mus)
columns(mus)
keytypes(mus)
head(keys(mus,keytype = "SYMBOL"))
select(mus,keys = "0610006L08Rik",columns = c("GENEID","GO"),keytype = "SYMBOL")
mus.sample <- sample(keys(mus,keytype = "ENTREZID"),200)
test <- enrichGO(gene = mus.sample,
OrgDb = mus,
keyType = "ENTREZID",
pAdjustMethod = "none",
pvalueCutoff = 0.1,
qvalueCutoff = 0.2)
mapIds(mus, keys = mus.sample, column = c("TXID"), keytype = "ENTREZID")
### keys是原始的ID,columns是转换之后的ID,keytype是要指定的原始ID类型
BiocManager::install("org.Mm.eg.db")
library(org.Mm.eg.db)
keytypes(org.Mm.eg.db)
columns(org.Mm.eg.db)
select(org.Mm.eg.db,keys = "ENSMUSG00000001123",columns = c("SYMBOL","GENENAME","GO"),keytype = "ENSEMBL")
mouse_DEG_list <- read.table(file = "mouse_DEG_list",header = T,row.names = 1)
mouse_DEG_list_name <- row.names(mouse_DEG_list)
mouse_DEG_list_transID <- select(org.Mm.eg.db,keys = mouse_DEG_list_name,columns = c("SYMBOL","GENENAME","ENTREZID"),keytype = "ENSEMBL")
mouse_DEG_list_transID2 <- mapIds(org.Mm.eg.db,keys = mouse_DEG_list_name,column = c("SYMBOL"),keytype = "ENSEMBL")
##GO
ego <- enrichGO(
gene = row.names(mouse_DEG_list),
OrgDb = org.Mm.eg.db,
keyType = "ENSEMBL",
ont = "CC")
ego_res <- as.data.frame(ego)
library(ggplot2)
library(dplyr)
##气泡图
dotplot(ego,font.size=8)
##网络图
enrichMap(ego, vertex.label.cex=1.2, layout=igraph::layout.kamada.kawai)
##GO图
plotGOgraph(ego)
##KEGG
kegg_list <- mouse_DEG_list_transID[,4]
kegg <- enrichKEGG(kegg_list,keyType = "kegg",organism = "mmu",pvalueCutoff = 0.05,pAdjustMethod = "BH",qvalueCutoff = 0.1)
dotplot(kegg,font.size=8)
browseKEGG(kegg,'mmu01100')
image.png
image.png
image.png
参考文章
1.https://www.jianshu.com/p/fb15249200d7
2.https://www.jianshu.com/p/fb15249200d7
3.https://www.jianshu.com/p/ae94178918bc