snakemake-RNAseq
2022-01-15 本文已影响0人
球果假水晶蓝
# snakemake.py
SAMPLE = ['BLO_S','KID_S']
TIME = [1,2,3,4]
REP = ['1','2','3']
gtffile = "/public1/home/stu_zhangyixing/workspace/snakemake/RNASeq-ref/2.Quantification/script/run_Quantification.sh"
rule all:
input:
expand("2.Quantification/{sample}{time}_LD{rep}", sample = SAMPLE, time=TIME, rep=REP)
rule hisat2_build:
# hisat2-build ../ref/genome.fasta ../ref/genome 1>hisat2-build.log 2>&1
input:
"ref/genome.fasta"
output:
"ref/genome"
shell:
"hisat2-build {input} {output} "
rule hisat2:
# hisat2 --new-summary -p 10 -x ../ref/genome -U ../data/BLO_S1_LD1.fq.gz -S BLO_S1_LD1.sam --rna-strandness R 1>BLO_S1_LD1.log 2>&1
# BLO_S1_LD1.fq.gz BLO_S2_LD3.fq.gz
input:
"ref/genome",
"data/{sample}{time}_LD{rep}.fq.gz"
output:
"1.Mapping/{sample}{time}_LD{rep}.sam"
shell:
"hisat2 --new-summary -p 10 -x {input[0]} -U {input[1]} -S {output} --rna-strandness R "
rule samtobam:
# samtools sort -o BLO_S1_LD1.bam BLO_S1_LD1.sam
input:
"1.Mapping/{sample}{time}_LD{rep}.sam"
output:
"1.Mapping/{sample}{time}_LD{rep}.bam"
shell:
"samtools sort -o {output} {input}"
rule bam_build:
# samtools index BLO_S1_LD1.bam
input:
"1.Mapping/{sample}{time}_LD{rep}.bam"
output:
"1.Mapping/{sample}{time}_LD{rep}.bam.bai"
shell:
"samtools index {input}"
rule quantification:
# /public1/home/stu_zhangyixing/workspace/snakemake/RNASeq-ref/2.Quantification/script/run_Quantification.sh
input:
"1.Mapping/{sample}{time}_LD{rep}.bam",
"ref/genes.gtf",
"1.Mapping/{sample}{time}_LD{rep}.bam.bai"
output:
"2.Quantification/{sample}{time}_LD{rep}"
script:
"Rscript {gtffile} -b {input[0]} -g {input[1]} -o {output}"
rulegraph.png
# snakemake -np 不执行,只是打印出来要执行的命令, -s 指定要打开的文件, 不加-s 默认是Snakefile
snakemake -np -s snakemake.py
image.png下面的job list 是以26字母的顺序排列的,我以为是按任务顺序呢。一直在找错误原因