ChIP-seqR语言ChipSeq数据分析

② valr实例计算ChIP-seq中TSS上下游reads的分

2018-11-26  本文已影响3人  热衷组培的二货潜

计算的思维导图

image.png
# Summarizing interval coverage across genomic features
bedfile <- valr_example("genes.hg19.chr22.bed.gz")
genomefile <- valr_example("hg19.chrom.sizes.gz")
bgfile  <- valr_example("hela.h3k4.chip.bg.gz")

genes <- read_bed(bedfile, n_fields = 6)
genome <- read_genome(genomefile)
y <- read_bedgraph(bgfile)
# generate 1 bp TSS intervals, "+" strand only
tss <- genes %>%
  filter(strand == "+") %>%
  mutate(end = start + 1)

# 1000 bp up and downstream
region_size <- 1000
# 50 bp windows
win_size <- 50

# add slop to the TSS, break into windows and add a group
# bed_slop函数扩展区域空间,延长区间上下游,这里延长上下游1KB
# bed_makewindows 扩展后的bed区间以50bp为一个bin进行划分
x <- tss %>%
  bed_slop(genome, both = region_size) %>%
  bed_makewindows(genome, win_size)
# map signals to TSS regions and calculate summary statistics.
# 使用bed_map函数统计每一个bin中的总reads数目
res <- bed_map(x, y, win_sum = sum(value, na.rm = TRUE)) %>%
  group_by(.win_id) %>%
  summarize(win_mean = mean(win_sum, na.rm = TRUE),
            win_sd = sd(win_sum, na.rm = TRUE))
## ggplot2进行可视化
library(ggplot2)

x_labels <- seq(-region_size, region_size, by = win_size * 5)
x_breaks <- seq(1, 41, by = 5)

sd_limits <- aes(ymax = win_mean + win_sd, ymin = win_mean - win_sd)

p <- ggplot(res, aes(x = .win_id, y = win_mean)) +
  geom_point(size = 0.25) + geom_pointrange(sd_limits, size = 0.1) +
  scale_x_continuous(labels = x_labels, breaks = x_breaks) +
  xlab("Position (bp from TSS)") + ylab("Signal") +
  theme_classic()

ggplot(res, aes(x = .win_id, y = win_mean)) +
  geom_point(size = 0.25) + geom_line(size = 1) + 
  scale_x_continuous(labels = x_labels, breaks = x_breaks) +
  xlab("Position (bp from TSS)") + ylab("Signal") +
  theme_bw()
image.png
上一篇下一篇

猜你喜欢

热点阅读