WDL

WDL-第5学

2021-03-31  本文已影响0人  MR来了

问题:用bam2fastx将bam文件转成fastq格式,这里面学到的知识:

解决办法:

来源于bioWDL的一个例子

version 1.0
task Bam2Fasta {
    input {
        ## 至少有一个文件
        Array[File]+ bam 
        Array[File]+ bamIndex
        String outputPrefix
        Int compressionLevel = 1
        Boolean splitByBarcode = false
        String preCommand = ''

        String? seqIdPrefix
        Int timeMinutes = 15
    }
    command <<<
        set -e 
        mkdir -p "$(dirname ~{outputPrefix})"
        cd "$(dirname ~{outputPrefix})"
        ## 链接文件及处理输入
        bamFiles=""
        for bamFile in ~{sep=" " bam}
        do
            ln $bamFile ./
            bamFiles=$bamFiles" $(basename $bamFile)"
        done

        for index in ~{sep=" " bamIndex}
        do
            ln $index ./
        done
        ~{preCommand}
        bam2fasta \
        --output ~{outputPrefix} \
        -c ~{compressionLevel} \
        ~{true="--split-barcodes" false="" splitByBarcode} \
        ~{"--seqid-prefix " + seqIdPrefix} \
        $bamFiles
    >>>
    output {
        File fastaFile = outputPrefix + ".fasta.gz"
    }
}
task Bam2Fastq {
    input {
        Array[File]+ bam
        Array[File]+ bamIndex
        String outputPrefix
        Int compressionLevel = 1
        Boolean splitByBarcode = false
        String preCommand = ''
        String? seqIdPrefix
    }
    command <<<
        set -e
        mkdir -p "$(dirname ~{outputPrefix})"
        cd "$(dirname ~{outputPrefix})"
        ## 链接文件及处理输入
        bamFiles=""
        for bamFile in ~{sep=" " bam}
        do
            ln $bamFile ./
            bamFiles=$bamFiles" $(basename $bamFile)"
        done
        for index in ~{sep=" " bamIndex}
        do
            ln $index ./
        done
        ~{preCommand}
        bam2fastq \
        --output ~{outputPrefix} \
        -c ~{compressionLevel} \
        ~{true="--split-barcodes" false="" splitByBarcode} \
        ~{"--seqid-prefix " + seqIdPrefix} \
        $bamFiles       
    >>>
    output {
        File fastqFile = outputPrefix + ".fastq.gz"
    }
}
workflow wf_bam2fastx {
    input {
        Boolean bam2fastq_exec = true
        Boolean bam2fasta_exec = false
        Array[File]+ bam
        Array[File]+ bamIndex
        String outputPrefix
        Int compressionLevel = 1
        Boolean splitByBarcode = false
        String preCommand = ''
        String? seqIdPrefix
    }
    if (bam2fastq_exec) {
        call Bam2Fastq {
            input:
                bam = bam,
                bamIndex = bamIndex,
                outputPrefix = outputPrefix,
                compressionLevel = compressionLevel,
                splitByBarcode = splitByBarcode,
                seqIdPrefix = seqIdPrefix,
                preCommand = preCommand
        }
    }
    if (bam2fasta_exec) {
        call Bam2Fasta {
            input:
                bam = bam,
                bamIndex = bamIndex,
                outputPrefix = outputPrefix,
                compressionLevel = compressionLevel,
                splitByBarcode = splitByBarcode,
                seqIdPrefix = seqIdPrefix,  
                preCommand = preCommand         
        }
    }
    parameter_meta {
        # inputs
        bam: {description: "The input pacbio bam file(s).", category: "required"}
        bamIndex: {description: "The .pbi index for the input file(s).", category: "required"}
        outputPrefix: {description: "Output directory path + output file prefix.", category: "required"}
        compressionLevel: {description: "Gzip compression level [1-9].", category: "advanced"}
        splitByBarcode: {description: "Split output into multiple fastq files, by barcode pairs.", category: "advanced"}
        seqIdPrefix: {description: "Prefix for sequence IDs in headers.", category: "advanced"}
        # outpus
        fastaFile: {description: "The fasta output file."}
    }
}
{
  "wf_bam2fastx.preCommand": "unset PYTHONPATH && export PATH=\"/your_path/bin:$PATH\" && source activate /your_path/envs/ccs_env",
  "wf_bam2fastx.compressionLevel": 1,
  "wf_bam2fastx.bam": ["/your_path/PM.1029.ccs.bam", "/your_path/PM.0033.ccs.bam"],
  "wf_bam2fastx.splitByBarcode": false,
  "wf_bam2fastx.outputPrefix": "/your_path/bam2fastx_out/CCS",
  "wf_bam2fastx.bamIndex": ["/your_path/PM.1029.ccs.bam.pbi", "/your_path/PM.0033.ccs.bam.pbi"],
  "wf_bam2fastx.bam2fastq_exec": true,
  "wf_bam2fastx.bam2fasta_exec": false
}
java -jar cromwell-57.jar run bam2fastx.wdl --inputs bam2fastx.wdl.json
{
  "wf_bam2fastx.Bam2Fastq.fastqFile": "/your_path/bam2fastx_out/CCS.fastq.gz",
  "wf_bam2fastx.Bam2Fasta.fastaFile": null
}
[2021-03-31 16:03:40,49] [info] WorkflowManagerActor WorkflowActor-6f9f888d-fefa-4c20-8199-820645949ec0 is in a terminal state: WorkflowSucceededState
[2021-03-31 16:03:45,36] [info] SingleWorkflowRunnerActor workflow finished with status 'Succeeded'.
{
  "outputs": {
    "wf_bam2fastx.Bam2Fasta.fastaFile": null,
    "wf_bam2fastx.Bam2Fastq.fastqFile": "/your_path/bam2fastx_out/CCS.fastq.gz"
  },
  "id": "6f9f888d-fefa-4c20-8199-820645949ec0"
}

上一篇下一篇

猜你喜欢

热点阅读