全基因组/外显子组测序分析生物信息分析:从入门到精通

Varscan+AnnotSV+RCircos 拷贝数变异分析

2019-03-25  本文已影响249人  五谷吃不完了

1.使用varscan软件检测肿瘤配对样本的拷贝数变异情况

1.1-1.2:官网上1.1-1.2的步骤如下:

#! /bin/sh
#PBS -q core24
#PBS -l mem=5gb,nodes=1:ppn=2,walltime=1000:00:00
#HSCHED -s rnaseq+hisat2+human

for i in sample1 sample2
do
genome=/pathway to reference genome/ucsc.hg19.genome.fa
normal=/pathway to normal sample/$i/normal_deduped.bam
tumor=/pathway to tumor sample/$i/tumor_deduped.bam

samtools mpileup -q 1 -f $genome $normal $tumor |\
awk -F"\t" '$4 > 0 && $7 > 0' |\
java -jar /pathway to varscan/VarScan.v2.4.0.jar copynumber  /pathway to VarScan_cnv/$i --mpileup 1

java -jar /pathway to varscan/VarScan.v2.4.0.jar  copyCaller  /pathway to VarScan_cnv/$i.copynumber --output-file /output way/VarScan_cnv/$i.varScan.cnv.called

done

1.3:用R包DNAcopy

library(DNAcopy)
cn <- read.table("your.cn.file",header=F)
CNA.object <-CNA( genomdat = cn[,6], chrom = cn[,1], maploc = cn[,2], data.type = 'logratio')
CNA.smoothed <- smooth.CNA(CNA.object)
segs <- segment(CNA.smoothed, verbose=0, min.width=2)
segs2 = segs$output
write.table(segs2[,2:6], file="out.file", row.names=F, col.names=F, quote=F, sep="\t")

1.4:如果上一步结果有问题,重复步骤3
1.5:将临近的,拷贝数变化相近的片段整合到一起

perl mergeSegments.pl out.copynumber.called.seg  --ref-arm-sizes armsize.txt --output out

以上:参考链接1 参考链接2 参考链接3

2.用AnnotSV软件注释拷贝数变异结果

该软件提供了在线注释的网站,速度很快,结果会以邮件的形式发送。

3.用RCircos包可视化拷贝数变异的结果

rm(list = ls())
setwd('your work place')

library(RCircos)
data(UCSC.HG19.Human.CytoBandIdeogram)
head(UCSC.HG19.Human.CytoBandIdeogram)
chr.exclude <- NULL
cyto.info <- UCSC.HG19.Human.CytoBandIdeogram
tracks.inside <- 2
tracks.outside <- 0
RCircos.Set.Core.Components(cyto.info, chr.exclude,tracks.inside, tracks.outside)
RCircos.Set.Plot.Area() #根据实际情况构建画图区域
RCircos.Chromosome.Ideogram.Plot() #一键画图

rcircos.params <- RCircos.Get.Plot.Parameters()
rcircos.params$heatmap.color <- 'BlueWhiteRed'
RCircos.Reset.Plot.Parameters(rcircos.params)

########################################
###########for sample1 ###############
########################################
data=read.table('sample1.annotSV.gene.tsv',header = TRUE,sep='\t',stringsAsFactors = F)
for (i in 1:length(data$Chromosome)){
  data[i,1]=paste('chr',data[i,1],sep = '')
}
data.col <- 5
track.num <- 1
side <- "in"
RCircos.Heatmap.Plot(data,data.col,track.num,side)

########################################
###########for sample2 ###############
########################################
data=read.table('sample2.annotSV.gene.tsv',header = TRUE,sep='\t',stringsAsFactors = F)
for (i in 1:length(data$Chromosome)){
  data[i,1]=paste('chr',data[i,1],sep = '')
}
data.col <- 5
track.num <- 2
side <- "in"
RCircos.Heatmap.Plot(data, data.col,track.num, side)

以上:参考链接1 参考链接2

最后附上一张结果图:

上一篇下一篇

猜你喜欢

热点阅读