RNASeq 数据分析

GATK's Pipeline for calling

2018-02-01  本文已影响58人  晨语凡心

#!/usr/bin/sh

genome=/home/chenfan/mnt/sdc/RNAseq/hs37d5.fasta

refgene=/home/chenfan/mnt/sdc/RNAseq/hg19_Refgene.gtf

picard=/home/chenfan/mnt/sdc/RNAseq/picard.jar

gatk=/home/chenfan/mnt/sdc/RNAseq/GenomeAnalysisTK.jar

knownindel=/home/chenfan/VQSR/data/Mills_and_1000G_gold_standard.indels.b37.vcf

knownsnp=/home/chenfan/VQSR/data/1000G_phase1.snps.high_confidence.b37.vcf

result_path=/home/chenfan/mnt/sdc/RNAseq/result

#------------------------------STAR 2-pass alignment steps-----------------------------------------

echo "------------------------------STAR 2-pass alignment steps-----------------------------------------"

in_dir=${result_path}/1_bamtofastq_output

out_dir=${result_path}/2_star_output

cd ${out_dir}/1index

STAR --runMode genomeGenerate --genomeDir ${out_dir}/1index --genomeFastaFiles $genome --sjdbGTFfile $refgene --runThreadN 30

cd ${out_dir}/1pass

STAR --genomeDir ${out_dir}/1index --readFilesIn ${in_dir}/1.fastq  ${in_dir}/2.fastq --runThreadN 30

cd ${out_dir}/2index

STAR --runMode genomeGenerate --genomeDir ${out_dir}/2index --genomeFastaFiles $genome --sjdbFileChrStartEnd ${out_dir}/1pass/SJ.out.tab --sjdbOverhang 154 --runThreadN 30

cd ${out_dir}/2pass

STAR --genomeDir ${out_dir}/2index --readFilesIn ${in_dir}/1.fastq ${in_dir}/2.fastq  --runThreadN 30

#------------------------------Picard add read groups,sort,mark duplicates,and create index-----------------------------------------

echo "------------------------------Picard add read groups,sort,mark duplicates,and create index-----------------------------------------"

in_dir=${out_dir}

out_dir=${result_path}/3_picard_output

cd ${out_dir}

java -jar $picard AddOrReplaceReadGroups I=${in_dir}/2pass/Aligned.out.sam O=${out_dir}/sort_addRG_align.bam SO=coordinate RGID=hap1 RGLB=rnaseq RGPL=illumina RGPU=runname RGSM=C20

java -jar $picard MarkDuplicates I=${out_dir}/sort_addRG_align.bam O=${out_dir}/markdup_sort_addRG_align.bam  CREATE_INDEX=true VALIDATION_STRINGENCY=SILENT M=${out_dir}/output.metrics

#-------------------------------SplitNCigarReads---------------------------------------

echo "------------------------------SplitNCigarReads-----------------------------------------"

in_dir=${out_dir}

out_dir=${result_path}/4_splitncigarReads_output

cd ${out_dir}

java -jar $gatk -T SplitNCigarReads -R $genome -I ${in_dir}/markdup_sort_addRG_align.bam -o ${out_dir}/split_markdup_sort_addRG_align.bam -rf ReassignOneMappingQuality -RMQF 255 -RMQT 60 -U ALLOW_N_CIGAR_READS

#-------------------------------Indel Realignment ---------------------------------------

echo "------------------------------Indel Realignment -----------------------------------------"

in_dir=${out_dir}

out_dir=${result_path}/5_indelrealign_output

cd ${out_dir}

java -jar $gatk \

-T RealignerTargetCreator \

-nt 32 \

-R $genome \

-I ${in_dir}/split_markdup_sort_addRG_align.bam \

-o ${out_dir}/realign_interval.list \

-known $knownindel

java -jar $gatk \

-T IndelRealigner \

-R $genome \

-I ${in_dir}/split_markdup_sort_addRG_align.bam \

-known $knownindel \

-o ${out_dir}/realign_split_markdup_sort_addRG_align.bam \

-targetIntervals ${out_dir}/realign_interval.list

#-------------------------------Base Recalibration---------------------------------------

echo "------------------------------Base Recalibration-----------------------------------------"

in_dir=${out_dir}

out_dir=${result_path}/6_bqsr_output

cd ${out_dir}

java -jar $gatk \

-T BaseRecalibrator \

-nct 32 \

-R $genome \

-I ${in_dir}/realign_split_markdup_sort_addRG_align.bam \

-knownSites $knownsnp \

-knownSites $knownindel \

-o ${out_dir}/recal_data.table

java -jar $gatk \

-T PrintReads \

-nct 32 \

-R $genome \

-I ${in_dir}/realign_split_markdup_sort_addRG_align.bam \

-BQSR ${out_dir}/recal_data.table \

-o ${out_dir}/bqsr_realign_split_markdup_sort_addRG_align.bam

#-------------------------------Variant calling---------------------------------------

echo "------------------------------Variant calling-----------------------------------------"

in_dir=${out_dir}

out_dir=${result_path}/7_callvariants_output

cd ${out_dir}

java -jar $gatk \

-T HaplotypeCaller \

-nct 32 \

-R $genome \

-ploidy 1 \

-I ${in_dir}/bqsr_realign_split_markdup_sort_addRG_align.bam \

-dontUseSoftClippedBases \

-stand_call_conf 20.0 \

-o ${out_dir}/Hap1_rnaseq.vcf

#-------------------------------Variant filtering---------------------------------------

echo "------------------------------Variant filtering-----------------------------------------"

in_dir=${out_dir}

out_dir=${result_path}/8_filtervariants_output

cd ${out_dir}

java -jar $gatk \

-T VariantFiltration \

-R $genome \

-V ${in_dir}/Hap1_rnaseq.vcf \

-window 35 \

-cluster 3 \

--filterName FS -filter "FS>30.0" \

--filterName QD -filter "QD<2.0" \

-o ${out_dir}/filtered_Hap1_rnaseq.vcf

java -jar $gatk \

-T SelectVariants \

-ef \

-R $genome \

-V ${out_dir}/filtered_Hap1_rnaseq.vcf \

-o ${out_dir}/ef_Hap1_C20_rnaseq.vcf

上一篇下一篇

猜你喜欢

热点阅读