2020-04-09重新学习Y叔的ChIPseeker系列
2020-04-09 本文已影响0人
程凉皮儿
参考学习资料
- CS0: ChIPseq从入门到放弃
- CS1: ChIPseq简介
- CS2: BED文件
- CS3: peak注释
- CS4:关于ChIPseq注释的几个问题
- CS5: 吃着火锅,唱着歌,还把分析给做了
- CS6: ChIPseeker的可视化方法(中秋节的视觉饕餮)
- CS7:Genomic coordination的富集性分析(1)
- CS8:Genomic coordination的富集性分析(2)
- CS9: GEO数据挖掘
- CS10: 八卦终结版
1.加载需要使用的R包
rm(list = ls())
library(ChIPseeker)
library(ggplot2)
library(dplyr)
library(Vennerable)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
2.获取示例数据
2.1.covplot,peakHeatmap,plotAvgProf2,upsetplot & vennpie
参数annoDb = "org.Hs.eg.db"
使结果更有可读性。
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
files <- getSampleFiles()
peak <- readPeakFile(files[[4]])
promoter <- getPromoters(TxDb=txdb,
upstream=3000, downstream=3000)
tagMatrix <- getTagMatrix(peak,
windows=promoter)
tagHeatmap(tagMatrix, xlim=c(-3000, 3000),
color="red")
![](https://img.haomeiwen.com/i19037128/9c42ebeb37fce252.png)
# peakHeatmap(files, weightCol="V5", TxDb=txdb,
# upstream=3000, downstream=3000,
# color=rainbow(length(files)))
covplot(files[[4]])
![](https://img.haomeiwen.com/i19037128/55a4a3a4ee4fea1c.png)
head(files)
#> $ARmo_0M
#> [1] "/Library/Frameworks/R.framework/Versions/3.6/Resources/library/ChIPseeker/extdata/GEO_sample_data/GSM1174480_ARmo_0M_peaks.bed.gz"
#>
#> $ARmo_1nM
#> [1] "/Library/Frameworks/R.framework/Versions/3.6/Resources/library/ChIPseeker/extdata/GEO_sample_data/GSM1174481_ARmo_1nM_peaks.bed.gz"
#>
#> $ARmo_100nM
#> [1] "/Library/Frameworks/R.framework/Versions/3.6/Resources/library/ChIPseeker/extdata/GEO_sample_data/GSM1174482_ARmo_100nM_peaks.bed.gz"
#>
#> $CBX6_BF
#> [1] "/Library/Frameworks/R.framework/Versions/3.6/Resources/library/ChIPseeker/extdata/GEO_sample_data/GSM1295076_CBX6_BF_ChipSeq_mergedReps_peaks.bed.gz"
#>
#> $CBX7_BF
#> [1] "/Library/Frameworks/R.framework/Versions/3.6/Resources/library/ChIPseeker/extdata/GEO_sample_data/GSM1295077_CBX7_BF_ChipSeq_mergedReps_peaks.bed.gz"
peak=GenomicRanges::GRangesList(CBX6=readPeakFile(files[[4]]),CBX7=readPeakFile(files[[5]]))
covplot(peak, weightCol="V5") + facet_grid(chr ~ .id)
#> Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
#> Warning in bind_rows_(x, .id): binding character and factor vector,
#> coercing into character vector
#> Warning in bind_rows_(x, .id): binding character and factor vector,
#> coercing into character vector
#> Warning in bind_rows_(x, .id): binding character and factor vector,
#> coercing into character vector
![](https://img.haomeiwen.com/i19037128/ad9b5ed87db5f9bc.png)
covplot(peak, weightCol="V5", chrs=c("chr17", "chr18"), xlim=c(4e7, 5e7)) + facet_grid(chr ~ .id)
#> Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
#> Warning in bind_rows_(x, .id): binding character and factor vector,
#> coercing into character vector
#> Warning in bind_rows_(x, .id): binding character and factor vector,
#> coercing into character vector
#> Warning in bind_rows_(x, .id): binding character and factor vector,
#> coercing into character vector
![](https://img.haomeiwen.com/i19037128/8c06c831c94493bf.png)
plotAvgProf(tagMatrix, xlim=c(-3000, 3000),
xlab="Genomic Region (5'->3')",
ylab = "Read Count Frequency")
![](https://img.haomeiwen.com/i19037128/d154aa4f0526d058.png)
plotAvgProf2(files[[4]], TxDb=txdb,
upstream=3000, downstream=3000,
xlab="Genomic Region (5'->3')",
ylab = "Read Count Frequency")
#> >> preparing promoter regions... 2020-04-09 20:05:39
#> >> preparing tag matrix... 2020-04-09 20:05:39
#> >> plotting figure... 2020-04-09 20:05:48
![](https://img.haomeiwen.com/i19037128/d46da3ee935245d0.png)
plotAvgProf(tagMatrix, xlim=c(-3000, 3000),
conf = 0.95, resample = 1000)
#> >> Running bootstrapping for tag matrix... 2020-04-09 20:05:56
![](https://img.haomeiwen.com/i19037128/94291578d3dbb677.png)
tagMatrixList <- lapply(files, getTagMatrix,
windows=promoter)
plotAvgProf(tagMatrixList, xlim=c(-3000, 3000))
![](https://img.haomeiwen.com/i19037128/0e97d752fe71aef0.png)
plotAvgProf(tagMatrixList, xlim=c(-3000, 3000),
conf=0.95,resample=500, facet="row")
#> >> Running bootstrapping for tag matrix... 2020-04-09 20:06:49
#> >> Running bootstrapping for tag matrix... 2020-04-09 20:06:51
#> >> Running bootstrapping for tag matrix... 2020-04-09 20:06:54
#> >> Running bootstrapping for tag matrix... 2020-04-09 20:06:59
#> >> Running bootstrapping for tag matrix... 2020-04-09 20:07:07
![](https://img.haomeiwen.com/i19037128/07fd28f9db936497.png)
peakAnno <- annotatePeak(files[[4]],
tssRegion=c(-3000, 3000),
TxDb=txdb, annoDb="org.Hs.eg.db")
#> >> loading peak file... 2020-04-09 20:07:10
#> >> preparing features information... 2020-04-09 20:07:10
#> >> identifying nearest features... 2020-04-09 20:07:10
#> >> calculating distance from peak to TSS... 2020-04-09 20:07:11
#> >> assigning genomic annotation... 2020-04-09 20:07:11
#> >> adding gene annotation... 2020-04-09 20:07:32
#> >> assigning chromosome lengths 2020-04-09 20:07:32
#> >> done... 2020-04-09 20:07:32
plotAnnoPie(peakAnno)
![](https://img.haomeiwen.com/i19037128/8282a27aa4899eab.png)
plotAnnoBar(peakAnno)
![](https://img.haomeiwen.com/i19037128/17f59fee0e93721c.png)
vennpie(peakAnno)
upsetplot(peakAnno)
upsetplot(peakAnno, vennpie=TRUE)
![](https://img.haomeiwen.com/i19037128/c933bb6f48dff4da.png)
![](https://img.haomeiwen.com/i19037128/8e40b268c5241d02.png)
plotDistToTSS(peakAnno,
title="Distribution of transcription factor-binding loci\nrelative to TSS")
![](https://img.haomeiwen.com/i19037128/4491b5233aed69fd.png)
peakAnnoList <- lapply(files, annotatePeak,
TxDb=txdb,tssRegion=c(-3000, 3000))
#> >> loading peak file... 2020-04-09 20:07:35
#> >> preparing features information... 2020-04-09 20:07:35
#> >> identifying nearest features... 2020-04-09 20:07:35
#> >> calculating distance from peak to TSS... 2020-04-09 20:07:35
#> >> assigning genomic annotation... 2020-04-09 20:07:35
#> >> assigning chromosome lengths 2020-04-09 20:07:38
#> >> done... 2020-04-09 20:07:38
#> >> loading peak file... 2020-04-09 20:07:38
#> >> preparing features information... 2020-04-09 20:07:38
#> >> identifying nearest features... 2020-04-09 20:07:38
#> >> calculating distance from peak to TSS... 2020-04-09 20:07:38
#> >> assigning genomic annotation... 2020-04-09 20:07:38
#> >> assigning chromosome lengths 2020-04-09 20:07:40
#> >> done... 2020-04-09 20:07:40
#> >> loading peak file... 2020-04-09 20:07:40
#> >> preparing features information... 2020-04-09 20:07:40
#> >> identifying nearest features... 2020-04-09 20:07:40
#> >> calculating distance from peak to TSS... 2020-04-09 20:07:41
#> >> assigning genomic annotation... 2020-04-09 20:07:41
#> >> assigning chromosome lengths 2020-04-09 20:07:43
#> >> done... 2020-04-09 20:07:43
#> >> loading peak file... 2020-04-09 20:07:43
#> >> preparing features information... 2020-04-09 20:07:43
#> >> identifying nearest features... 2020-04-09 20:07:43
#> >> calculating distance from peak to TSS... 2020-04-09 20:07:43
#> >> assigning genomic annotation... 2020-04-09 20:07:43
#> >> assigning chromosome lengths 2020-04-09 20:07:47
#> >> done... 2020-04-09 20:07:47
#> >> loading peak file... 2020-04-09 20:07:47
#> >> preparing features information... 2020-04-09 20:07:48
#> >> identifying nearest features... 2020-04-09 20:07:48
#> >> calculating distance from peak to TSS... 2020-04-09 20:07:50
#> >> assigning genomic annotation... 2020-04-09 20:07:50
#> >> assigning chromosome lengths 2020-04-09 20:07:52
#> >> done... 2020-04-09 20:07:52
plotAnnoBar(peakAnnoList)
![](https://img.haomeiwen.com/i19037128/ecb5c68103ea6589.png)
plotDistToTSS(peakAnnoList)
![](https://img.haomeiwen.com/i19037128/9a339a50dc7b8534.png)
genes <- lapply(peakAnnoList, function(i)
as.data.frame(i)$geneId)
vennplot(genes[2:4], by='Vennerable')
![](https://img.haomeiwen.com/i19037128/fca0846de0e0ab84.png)
f = getSampleFiles()[[4]]
#x = annotatePeak(f, tssRegion=c(-1000, 1000), TxDb=txdb)
#x
x = annotatePeak(f, tssRegion=c(-1000, 1000), TxDb=txdb,annoDb = "org.Hs.eg.db",
addFlankGeneInfo=TRUE, flankDistance=5000)
#> >> loading peak file... 2020-04-09 20:07:55
#> >> preparing features information... 2020-04-09 20:07:55
#> >> identifying nearest features... 2020-04-09 20:07:55
#> >> calculating distance from peak to TSS... 2020-04-09 20:07:55
#> >> assigning genomic annotation... 2020-04-09 20:07:55
#> >> adding gene annotation... 2020-04-09 20:07:57
#> >> adding flank feature information from peaks... 2020-04-09 20:07:57
#> >> assigning chromosome lengths 2020-04-09 20:07:58
#> >> done... 2020-04-09 20:07:58
as.GRanges(x) %>% head(3)
#> GRanges object with 3 ranges and 17 metadata columns:
#> seqnames ranges strand | V4 V5
#> <Rle> <IRanges> <Rle> | <factor> <numeric>
#> [1] chr1 815093-817883 * | MACS_peak_1 295.76
#> [2] chr1 1243288-1244338 * | MACS_peak_2 63.19
#> [3] chr1 2979977-2981228 * | MACS_peak_3 100.16
#> annotation geneChr geneStart geneEnd geneLength
#> <character> <integer> <integer> <integer> <integer>
#> [1] Distal Intergenic 1 803451 812182 8732
#> [2] Promoter 1 1243994 1247057 3064
#> [3] Promoter 1 2976181 2980350 4170
#> geneStrand geneId transcriptId distanceToTSS ENSEMBL
#> <integer> <character> <character> <numeric> <character>
#> [1] 2 284593 uc001abt.4 -2911 ENSG00000230368
#> [2] 1 126789 uc001aed.3 0 ENSG00000169972
#> [3] 2 440556 uc001aka.3 0 ENSG00000177133
#> SYMBOL GENENAME
#> <character> <character>
#> [1] FAM41C family with sequence similarity 41 member C
#> [2] PUSL1 pseudouridine synthase like 1
#> [3] PRDM16-DT PRDM16 divergent transcript
#> flank_txIds
#> <character>
#> [1] uc001abt.4
#> [2] uc001aea.2;uc001aeb.2;uc001aec.1;uc001aed.3;uc010nyi.2;uc009vjx.3;uc009vjy.1;uc001aee.2;uc001aef.2;uc001aeg.2;uc001aeh.2;uc001aei.2;uc001aek.2;uc009vjz.2;uc010nyj.2
#> [3] uc001aka.3;uc010nzg.1;uc001akc.3;uc001ake.3;uc001akf.3;uc009vlh.3
#> flank_geneIds
#> <character>
#> [1] 284593
#> [2] 116983;116983;116983;126789;126789;126789;54973;54973;54973;54973;54973;54973;54973;54973;54973
#> [3] 440556;440556;63976;63976;63976;63976
#> flank_gene_distances
#> <character>
#> [1] -2911
#> [2] -1979;-19;0;0;0;-128;8492;15729;15729;15729;15729;15729;15729;15729;15729
#> [3] 0;0;-4514;-4514;-4514;-4514
#> -------
#> seqinfo: 24 sequences from hg19 genome
2.2 组学间基因富集自定义分析
compareCluster:可对分簇后基因做富集分析可视化
library(ChIPseeker)
library(clusterProfiler)
#> Warning: package 'clusterProfiler' was built under R version 3.6.2
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
bedfile=getSampleFiles()
seq=lapply(bedfile, readPeakFile)
genes=lapply(seq, function(i)
seq2gene(i, c(-1000, 3000), 3000, TxDb=txdb))
cc = compareCluster(geneClusters = genes,
fun="enrichKEGG", organism="hsa")
dotplot(cc, showCategory=10)
![](https://img.haomeiwen.com/i19037128/3824e01450355642.png)
后面可以自行生成bed文件生成Grange对象,或geneClusters进行富集分析。