ChIP-seq

2020-04-09重新学习Y叔的ChIPseeker系列

2020-04-09  本文已影响0人  程凉皮儿

参考学习资料

1.加载需要使用的R包

rm(list = ls())
library(ChIPseeker)
library(ggplot2)
library(dplyr)
library(Vennerable)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)

2.获取示例数据

2.1.covplot,peakHeatmap,plotAvgProf2,upsetplot & vennpie

参数annoDb = "org.Hs.eg.db"使结果更有可读性。

txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
files <- getSampleFiles()
peak <- readPeakFile(files[[4]])
promoter <- getPromoters(TxDb=txdb, 
                         upstream=3000, downstream=3000)
tagMatrix <- getTagMatrix(peak, 
                          windows=promoter)
tagHeatmap(tagMatrix, xlim=c(-3000, 3000), 
           color="red")
image.png
# peakHeatmap(files, weightCol="V5", TxDb=txdb, 
#             upstream=3000, downstream=3000, 
#             color=rainbow(length(files)))
covplot(files[[4]])
image.png
head(files)
#> $ARmo_0M
#> [1] "/Library/Frameworks/R.framework/Versions/3.6/Resources/library/ChIPseeker/extdata/GEO_sample_data/GSM1174480_ARmo_0M_peaks.bed.gz"
#> 
#> $ARmo_1nM
#> [1] "/Library/Frameworks/R.framework/Versions/3.6/Resources/library/ChIPseeker/extdata/GEO_sample_data/GSM1174481_ARmo_1nM_peaks.bed.gz"
#> 
#> $ARmo_100nM
#> [1] "/Library/Frameworks/R.framework/Versions/3.6/Resources/library/ChIPseeker/extdata/GEO_sample_data/GSM1174482_ARmo_100nM_peaks.bed.gz"
#> 
#> $CBX6_BF
#> [1] "/Library/Frameworks/R.framework/Versions/3.6/Resources/library/ChIPseeker/extdata/GEO_sample_data/GSM1295076_CBX6_BF_ChipSeq_mergedReps_peaks.bed.gz"
#> 
#> $CBX7_BF
#> [1] "/Library/Frameworks/R.framework/Versions/3.6/Resources/library/ChIPseeker/extdata/GEO_sample_data/GSM1295077_CBX7_BF_ChipSeq_mergedReps_peaks.bed.gz"
peak=GenomicRanges::GRangesList(CBX6=readPeakFile(files[[4]]),CBX7=readPeakFile(files[[5]]))
covplot(peak, weightCol="V5") + facet_grid(chr ~ .id)
#> Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character
#> Warning in bind_rows_(x, .id): binding character and factor vector,
#> coercing into character vector

#> Warning in bind_rows_(x, .id): binding character and factor vector,
#> coercing into character vector

#> Warning in bind_rows_(x, .id): binding character and factor vector,
#> coercing into character vector
image.png
covplot(peak, weightCol="V5", chrs=c("chr17", "chr18"), xlim=c(4e7, 5e7)) + facet_grid(chr ~ .id)
#> Warning in bind_rows_(x, .id): Unequal factor levels: coercing to character

#> Warning in bind_rows_(x, .id): binding character and factor vector,
#> coercing into character vector

#> Warning in bind_rows_(x, .id): binding character and factor vector,
#> coercing into character vector

#> Warning in bind_rows_(x, .id): binding character and factor vector,
#> coercing into character vector
image.png

plotAvgProf(tagMatrix, xlim=c(-3000, 3000),
            xlab="Genomic Region (5'->3')", 
            ylab = "Read Count Frequency")
image.png
plotAvgProf2(files[[4]], TxDb=txdb, 
             upstream=3000, downstream=3000,
             xlab="Genomic Region (5'->3')", 
             ylab = "Read Count Frequency")
#> >> preparing promoter regions...  2020-04-09 20:05:39 
#> >> preparing tag matrix...        2020-04-09 20:05:39 
#> >> plotting figure...             2020-04-09 20:05:48
image.png
plotAvgProf(tagMatrix, xlim=c(-3000, 3000), 
            conf = 0.95, resample = 1000)
#> >> Running bootstrapping for tag matrix...        2020-04-09 20:05:56
image.png
tagMatrixList <- lapply(files, getTagMatrix, 
                        windows=promoter)
plotAvgProf(tagMatrixList, xlim=c(-3000, 3000))
image.png
plotAvgProf(tagMatrixList, xlim=c(-3000, 3000), 
            conf=0.95,resample=500, facet="row")
#> >> Running bootstrapping for tag matrix...        2020-04-09 20:06:49 
#> >> Running bootstrapping for tag matrix...        2020-04-09 20:06:51 
#> >> Running bootstrapping for tag matrix...        2020-04-09 20:06:54 
#> >> Running bootstrapping for tag matrix...        2020-04-09 20:06:59 
#> >> Running bootstrapping for tag matrix...        2020-04-09 20:07:07
image.png
peakAnno <- annotatePeak(files[[4]], 
                         tssRegion=c(-3000, 3000),
                         TxDb=txdb, annoDb="org.Hs.eg.db")
#> >> loading peak file...               2020-04-09 20:07:10 
#> >> preparing features information...      2020-04-09 20:07:10 
#> >> identifying nearest features...        2020-04-09 20:07:10 
#> >> calculating distance from peak to TSS...   2020-04-09 20:07:11 
#> >> assigning genomic annotation...        2020-04-09 20:07:11 
#> >> adding gene annotation...          2020-04-09 20:07:32 
#> >> assigning chromosome lengths           2020-04-09 20:07:32 
#> >> done...                    2020-04-09 20:07:32
plotAnnoPie(peakAnno)
image.png
plotAnnoBar(peakAnno)
image.png
vennpie(peakAnno)
upsetplot(peakAnno)
upsetplot(peakAnno, vennpie=TRUE)
image.png image.png
plotDistToTSS(peakAnno,
              title="Distribution of transcription factor-binding loci\nrelative to TSS")
image.png
peakAnnoList <- lapply(files, annotatePeak, 
                       TxDb=txdb,tssRegion=c(-3000, 3000))
#> >> loading peak file...               2020-04-09 20:07:35 
#> >> preparing features information...      2020-04-09 20:07:35 
#> >> identifying nearest features...        2020-04-09 20:07:35 
#> >> calculating distance from peak to TSS...   2020-04-09 20:07:35 
#> >> assigning genomic annotation...        2020-04-09 20:07:35 
#> >> assigning chromosome lengths           2020-04-09 20:07:38 
#> >> done...                    2020-04-09 20:07:38 
#> >> loading peak file...               2020-04-09 20:07:38 
#> >> preparing features information...      2020-04-09 20:07:38 
#> >> identifying nearest features...        2020-04-09 20:07:38 
#> >> calculating distance from peak to TSS...   2020-04-09 20:07:38 
#> >> assigning genomic annotation...        2020-04-09 20:07:38 
#> >> assigning chromosome lengths           2020-04-09 20:07:40 
#> >> done...                    2020-04-09 20:07:40 
#> >> loading peak file...               2020-04-09 20:07:40 
#> >> preparing features information...      2020-04-09 20:07:40 
#> >> identifying nearest features...        2020-04-09 20:07:40 
#> >> calculating distance from peak to TSS...   2020-04-09 20:07:41 
#> >> assigning genomic annotation...        2020-04-09 20:07:41 
#> >> assigning chromosome lengths           2020-04-09 20:07:43 
#> >> done...                    2020-04-09 20:07:43 
#> >> loading peak file...               2020-04-09 20:07:43 
#> >> preparing features information...      2020-04-09 20:07:43 
#> >> identifying nearest features...        2020-04-09 20:07:43 
#> >> calculating distance from peak to TSS...   2020-04-09 20:07:43 
#> >> assigning genomic annotation...        2020-04-09 20:07:43 
#> >> assigning chromosome lengths           2020-04-09 20:07:47 
#> >> done...                    2020-04-09 20:07:47 
#> >> loading peak file...               2020-04-09 20:07:47 
#> >> preparing features information...      2020-04-09 20:07:48 
#> >> identifying nearest features...        2020-04-09 20:07:48 
#> >> calculating distance from peak to TSS...   2020-04-09 20:07:50 
#> >> assigning genomic annotation...        2020-04-09 20:07:50 
#> >> assigning chromosome lengths           2020-04-09 20:07:52 
#> >> done...                    2020-04-09 20:07:52
plotAnnoBar(peakAnnoList)
image.png
plotDistToTSS(peakAnnoList)              
image.png
genes <- lapply(peakAnnoList, function(i) 
  as.data.frame(i)$geneId)

vennplot(genes[2:4], by='Vennerable')
image.png

f = getSampleFiles()[[4]]
#x = annotatePeak(f, tssRegion=c(-1000, 1000), TxDb=txdb)
#x
x = annotatePeak(f, tssRegion=c(-1000, 1000), TxDb=txdb,annoDb = "org.Hs.eg.db",
                 addFlankGeneInfo=TRUE, flankDistance=5000)
#> >> loading peak file...               2020-04-09 20:07:55 
#> >> preparing features information...      2020-04-09 20:07:55 
#> >> identifying nearest features...        2020-04-09 20:07:55 
#> >> calculating distance from peak to TSS...   2020-04-09 20:07:55 
#> >> assigning genomic annotation...        2020-04-09 20:07:55 
#> >> adding gene annotation...          2020-04-09 20:07:57 
#> >> adding flank feature information from peaks...     2020-04-09 20:07:57 
#> >> assigning chromosome lengths           2020-04-09 20:07:58 
#> >> done...                    2020-04-09 20:07:58
as.GRanges(x) %>% head(3)
#> GRanges object with 3 ranges and 17 metadata columns:
#>       seqnames          ranges strand |          V4        V5
#>          <Rle>       <IRanges>  <Rle> |    <factor> <numeric>
#>   [1]     chr1   815093-817883      * | MACS_peak_1    295.76
#>   [2]     chr1 1243288-1244338      * | MACS_peak_2     63.19
#>   [3]     chr1 2979977-2981228      * | MACS_peak_3    100.16
#>              annotation   geneChr geneStart   geneEnd geneLength
#>             <character> <integer> <integer> <integer>  <integer>
#>   [1] Distal Intergenic         1    803451    812182       8732
#>   [2]          Promoter         1   1243994   1247057       3064
#>   [3]          Promoter         1   2976181   2980350       4170
#>       geneStrand      geneId transcriptId distanceToTSS         ENSEMBL
#>        <integer> <character>  <character>     <numeric>     <character>
#>   [1]          2      284593   uc001abt.4         -2911 ENSG00000230368
#>   [2]          1      126789   uc001aed.3             0 ENSG00000169972
#>   [3]          2      440556   uc001aka.3             0 ENSG00000177133
#>            SYMBOL                                    GENENAME
#>       <character>                                 <character>
#>   [1]      FAM41C family with sequence similarity 41 member C
#>   [2]       PUSL1               pseudouridine synthase like 1
#>   [3]   PRDM16-DT                 PRDM16 divergent transcript
#>                                                                                                                                                                flank_txIds
#>                                                                                                                                                                <character>
#>   [1]                                                                                                                                                           uc001abt.4
#>   [2] uc001aea.2;uc001aeb.2;uc001aec.1;uc001aed.3;uc010nyi.2;uc009vjx.3;uc009vjy.1;uc001aee.2;uc001aef.2;uc001aeg.2;uc001aeh.2;uc001aei.2;uc001aek.2;uc009vjz.2;uc010nyj.2
#>   [3]                                                                                                    uc001aka.3;uc010nzg.1;uc001akc.3;uc001ake.3;uc001akf.3;uc009vlh.3
#>                                                                                         flank_geneIds
#>                                                                                           <character>
#>   [1]                                                                                          284593
#>   [2] 116983;116983;116983;126789;126789;126789;54973;54973;54973;54973;54973;54973;54973;54973;54973
#>   [3]                                                           440556;440556;63976;63976;63976;63976
#>                                                            flank_gene_distances
#>                                                                     <character>
#>   [1]                                                                     -2911
#>   [2] -1979;-19;0;0;0;-128;8492;15729;15729;15729;15729;15729;15729;15729;15729
#>   [3]                                               0;0;-4514;-4514;-4514;-4514
#>   -------
#>   seqinfo: 24 sequences from hg19 genome

2.2 组学间基因富集自定义分析

compareCluster:可对分簇后基因做富集分析可视化

library(ChIPseeker)
library(clusterProfiler)
#> Warning: package 'clusterProfiler' was built under R version 3.6.2
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene

bedfile=getSampleFiles()
seq=lapply(bedfile, readPeakFile)
genes=lapply(seq, function(i) 
  seq2gene(i, c(-1000, 3000), 3000, TxDb=txdb))

cc = compareCluster(geneClusters = genes, 
                    fun="enrichKEGG", organism="hsa")

dotplot(cc, showCategory=10)
image.png

后面可以自行生成bed文件生成Grange对象,或geneClusters进行富集分析。

上一篇 下一篇

猜你喜欢

热点阅读