Varscan+AnnotSV+RCircos 拷贝数变异分析
2019-03-25 本文已影响249人
五谷吃不完了
1.使用varscan软件检测肿瘤配对样本的拷贝数变异情况
1.1-1.2:官网上1.1-1.2的步骤如下:
#! /bin/sh
#PBS -q core24
#PBS -l mem=5gb,nodes=1:ppn=2,walltime=1000:00:00
#HSCHED -s rnaseq+hisat2+human
for i in sample1 sample2
do
genome=/pathway to reference genome/ucsc.hg19.genome.fa
normal=/pathway to normal sample/$i/normal_deduped.bam
tumor=/pathway to tumor sample/$i/tumor_deduped.bam
samtools mpileup -q 1 -f $genome $normal $tumor |\
awk -F"\t" '$4 > 0 && $7 > 0' |\
java -jar /pathway to varscan/VarScan.v2.4.0.jar copynumber /pathway to VarScan_cnv/$i --mpileup 1
java -jar /pathway to varscan/VarScan.v2.4.0.jar copyCaller /pathway to VarScan_cnv/$i.copynumber --output-file /output way/VarScan_cnv/$i.varScan.cnv.called
done
1.3:用R包DNAcopy
library(DNAcopy)
cn <- read.table("your.cn.file",header=F)
CNA.object <-CNA( genomdat = cn[,6], chrom = cn[,1], maploc = cn[,2], data.type = 'logratio')
CNA.smoothed <- smooth.CNA(CNA.object)
segs <- segment(CNA.smoothed, verbose=0, min.width=2)
segs2 = segs$output
write.table(segs2[,2:6], file="out.file", row.names=F, col.names=F, quote=F, sep="\t")
1.4:如果上一步结果有问题,重复步骤3
1.5:将临近的,拷贝数变化相近的片段整合到一起
perl mergeSegments.pl out.copynumber.called.seg --ref-arm-sizes armsize.txt --output out
2.用AnnotSV软件注释拷贝数变异结果
该软件提供了在线注释的网站,速度很快,结果会以邮件的形式发送。
3.用RCircos包可视化拷贝数变异的结果
rm(list = ls())
setwd('your work place')
library(RCircos)
data(UCSC.HG19.Human.CytoBandIdeogram)
head(UCSC.HG19.Human.CytoBandIdeogram)
chr.exclude <- NULL
cyto.info <- UCSC.HG19.Human.CytoBandIdeogram
tracks.inside <- 2
tracks.outside <- 0
RCircos.Set.Core.Components(cyto.info, chr.exclude,tracks.inside, tracks.outside)
RCircos.Set.Plot.Area() #根据实际情况构建画图区域
RCircos.Chromosome.Ideogram.Plot() #一键画图
rcircos.params <- RCircos.Get.Plot.Parameters()
rcircos.params$heatmap.color <- 'BlueWhiteRed'
RCircos.Reset.Plot.Parameters(rcircos.params)
########################################
###########for sample1 ###############
########################################
data=read.table('sample1.annotSV.gene.tsv',header = TRUE,sep='\t',stringsAsFactors = F)
for (i in 1:length(data$Chromosome)){
data[i,1]=paste('chr',data[i,1],sep = '')
}
data.col <- 5
track.num <- 1
side <- "in"
RCircos.Heatmap.Plot(data,data.col,track.num,side)
########################################
###########for sample2 ###############
########################################
data=read.table('sample2.annotSV.gene.tsv',header = TRUE,sep='\t',stringsAsFactors = F)
for (i in 1:length(data$Chromosome)){
data[i,1]=paste('chr',data[i,1],sep = '')
}
data.col <- 5
track.num <- 2
side <- "in"
RCircos.Heatmap.Plot(data, data.col,track.num, side)
最后附上一张结果图: