基因组学遗传学

BSA分析-实战笔记(五)绘图

2022-05-11  本文已影响0人  Akuooo

参考:
以水稻为例教你如何使用BSA方法进行遗传定位(下篇) - 简书 (jianshu.com)
使用QTLseqr进行BSA-seq分析 - 简书 (jianshu.com)

  1. 加上拟合线
    文献中有拟合线(黑色的那个),如何按照每1Mb为窗口,每次移动10kb计算均值
    以KY0DN1为例
calcValueByWindow <- function(pos, value,
                              window_size = 1000000,
                              step_size = 100000){
  # 找到最大位置
  max_pos <- max(pos)
  
  # 构建窗口
  #window_start:seq等差数列,以最大位置加窗口大小作为最大值,步长为公差
  #window_end:起始位置加步长
  window_start <- seq(0, max_pos + window_size,step_size)
  window_end <- window_start + step_size
  mean_value <- vector(mode = "numeric", length = length(window_start))
  
  # 选择窗口内的值
  for (j in seq_along(window_start)){
    
    pos_in_window <- which(pos > window_start[j] &
                             pos < window_end[j])
    value_in_window <- value[pos_in_window]
    
    mean_value[j] <- mean(value_in_window)
    
  }
  # remove the Not A Number position
  nan_pos <-  is.nan(mean_value)
  mean_value <- mean_value[! nan_pos]
  window_pos <- ((window_start + window_end)/ 2)[!nan_pos]
  df <- data.frame(pos   = window_pos,
                   value = mean_value)
  return(df)
}
par(mfrow = c(3,4))

for (i in paste0("chr", formatC(1:12, width = 2, flag=0)) ){
  
  freq_flt <- freq2[grepl(i,row.names(freq2)), ]
  pos <- as.numeric(substring(row.names(freq_flt), 7))
  snp_index <- freq_flt[,1] - freq_flt[,2]
  
  # bin
  df <- calcValueByWindow(pos = pos, value = snp_index)
  
  plot(x = pos, y =snp_index, 
       ylim = c(-1,1),
       pch = 20, cex = 0.2,
       xlab = i,
       ylab = expression(paste(Delta, " " ,"SNP index")))
  lines(x = df$pos, y = df$value, col = "red")
}
lines

2.QTLseqr

devtools::install_github("bmansfeld/QTLseqr")
library(QTLseqr)
library(vcfR)

#建议开始前都清空一下变量,不然容易出错
rm(list = ls())
#设置工作路径
setwd("~/workspace/BSA/practice/")
#加载数据,
vcf <- read.vcfR("4.variants_filter/snps.vcf")
chrom <- getCHROM(vcf)
pos <- getPOS(vcf)
ref <- getREF(vcf)
alt <- getALT(vcf)

ad <- extract.gt(vcf, "AD")
ref_split <- masplit(ad, record = 1, sort = 0)
alt_split <- masplit(ad, record = 2, sort = 0)
gt <- extract.gt(vcf, "GT")
#生成一个适用QTLseqr包importFromTable()函数的数据框
df <- data.frame(CHROM = chrom,
                 POS = pos,
                 REF = ref,
                 ALT = alt,
                 AD_REF.SRR6327817 = ref_split[,3],
                 AD_ALT.SRR6327817 = alt_split[,3],
                 AD_REF.SRR6327818 = ref_split[,4],
                 AD_ALT.SRR6327818 = alt_split[,4]
)

mask <- which(gt[,"SRR6327815"] != "0/1" &  gt[,"SRR6327816"] == "0/1")
df <- df[mask,]
write.table(df, file = "rice.tsv", sep = "\t", row.names = F, quote = F)
#读取数据
df <- importFromTable("rice.tsv",
                      highBulk = "SRR6327817",
                      lowBulk = "SRR6327818",
                      chromList = paste0("chr", formatC(1:12, width = 2, flag=0)),
                      sep = "\t")
#删去SNPindex为NA的值
df <- subset(df, !is.na(SNPindex.LOW) & !is.na(SNPindex.HIGH))
#G统计值
df <- runGprimeAnalysis(SNPset = df,
                        windowSize = 1e6,
                        outlierFilter = "deltaSNP")
#delta SNP置信区间
df <- runQTLseqAnalysis(SNPset = df,
                        windowSize = 1e6,
                        popStruc = "RIL",
                        bulkSize = c(20,20))
#绘图
plotQTLStats(
  SNPset = df,
  var = "Gprime",
  plotThreshold = TRUE,
  q = 0.01
)

plotQTLStats(
  SNPset = df,
  var = "deltaSNP",
  plotIntervals = TRUE)

Gprime deltaSNP
  1. ggplot2绘图
    跟着文献里的图画的,努力在还原了…
ggplot(data = df,aes(x = POS,y = deltaSNP)) +  #映射x、y轴
  geom_point(aes(color=as.factor(CHROM)),        #按照CHROM进行分组
             alpha=0.8, size=0.8,position ="jitter") +
  facet_wrap(~CHROM,ncol = 12,scales = "free_x",strip.position = 'bottom') +  #分成12列,x轴设置一下自由尺度,分面标签位置改为bottom
  geom_smooth(method = 'gam',fullrange = TRUE,
              size = 0.7,color = "black",
              se=FALSE) + #加上拟合线,se=FLASE为限制置信区间,相当于去掉拟合线附近的阴影
  ylim(0,1) +#设置一下y轴范围
  ylab(expression(paste(Delta, " " ,"SNP index")))+ #更改y轴标签
  theme( 
    legend.position="none",
    panel.border = element_blank(), #绘图区边框
    panel.grid.major.x = element_blank(),  #主网格线
    panel.grid.minor.x = element_blank(),  #次网格线
    panel.spacing.x = unit(0, "cm"),  #分面之间的x轴方向距离
    strip.placement = "outside",   #设置分面标签位于图的外侧还是内侧
    strip.background.x = element_rect(color = "white",fill = "white"), #分面标签背景设为白色
    axis.text.x = element_blank()  #删去x轴的刻度
  )
△SNP index

这里的拟合线我是直接用的lm,但感觉还是1里的那个线比较好。
但是1里是分了12条染色体进行绘制的,我再想想怎么样在这个图里加上1里的拟合线。

2022.5.17更新
对数据处理改了一下,上面那个是按照binmapr算出来的deltaSNP画的图
感觉结果与文献里不太符合,换了一下参数设置,(SRR17和SRR18换了个位置),然后就是加了一条y=0.5的水平虚线


calc_deltaSNP

至于那个文章中的拟合曲线,还没想到怎么样可以加上去。。待我再思考思考。。

上一篇下一篇

猜你喜欢

热点阅读