🔧 batch running adjustments; more docs

kids-first · Sep 7, 2022 · 2f92e5f · 2f92e5f
1 parent 703a229
commit 2f92e5f
Show file tree

Hide file tree

Showing 2 changed files with 39 additions and 21 deletions.
diff --git a/docs/GERMLINE_SV_README.md b/docs/GERMLINE_SV_README.md
@@ -1,4 +1,4 @@
-# Kids First Data Resource Center Germline Structural Variant Caller Workflow 
+# Kids First Data Resource Center Germline Structural Variant Caller Workflow
 
 <p align="center">
   <img src="https://github.com/d3b-center/d3b-research-workflows/raw/master/doc/kfdrc-logo-sm.png">
@@ -65,7 +65,7 @@ potential pathogenicity and ii) filter out SV potential false positives.
 ## Input Files
 
 At the moment the workflow uses only a few inputs:
-- `input_bams`: The germline BAM input that has been aligned to a reference
+- `germline_bam`: The germline BAM input that has been aligned to a reference
   genome.
 - `indexed_reference_fasta`: The reference genome fasta (and associated
   indicies) to which the germline BAM was aligned.
@@ -75,7 +75,7 @@ At the moment the workflow uses only a few inputs:
 these are the annotations installed with v3.1.1 of the software. Newer or older
 annotations can be slotted in here as needed.
 - `annotsv_genome_build`: The genome build of the reference fasta. AnnotSV is
-  capable of annotating the following genomes: "GRCh37","GRCh38","mm9","mm10". 
+  capable of annotating the following genomes: "GRCh37","GRCh38","mm9","mm10".
 - `output_basename`: Basename to use for the outputs.
 
 ## Output Files

diff --git a/workflows/kfdrc-germline-sv-wf.cwl b/workflows/kfdrc-germline-sv-wf.cwl
@@ -70,7 +70,7 @@ doc: |
   ## Input Files
 
   At the moment the workflow uses only a few inputs:
-  - `input_bams`: The germline BAM input that has been aligned to a reference
+  - `germline_bam`: The germline BAM input that has been aligned to a reference
     genome.
   - `indexed_reference_fasta`: The reference genome fasta (and associated
     indicies) to which the germline BAM was aligned.
@@ -124,6 +124,8 @@ inputs:
     - {pattern: '.64.bwt', required: true}
     - {pattern: '.64.pac', required: true}
     - {pattern: '.64.sa', required: true}
+    doc: |
+      The reference genome fasta (and associated indicies) to which the germline BAM was aligned.
     "sbg:fileTypes": "FASTA, FA"
     "sbg:suggestedValue": {class: File, path: 60639014357c3a53540ca7a3, name: Homo_sapiens_assembly38.fasta,
       secondaryFiles: [{class: File, path: 60639019357c3a53540ca7e7, name: Homo_sapiens_assembly38.dict},
@@ -134,20 +136,22 @@ inputs:
         {class: File, path: 6063901d357c3a53540ca81e, name: Homo_sapiens_assembly38.fasta.64.bwt},
         {class: File, path: 6063901c357c3a53540ca801, name: Homo_sapiens_assembly38.fasta.64.pac},
         {class: File, path: 60639015357c3a53540ca7a9, name: Homo_sapiens_assembly38.fasta.64.sa}]}
-  input_bams: {type: 'File[]', secondaryFiles: [{pattern: '^.bai', required: false},
+  germline_bam: {type: 'File', secondaryFiles: [{pattern: '^.bai', required: false},
       {pattern: '.bai', required: false}], doc: "Input BAM file", "sbg:fileTypes": "BAM"}
 
-  annotsv_annotations_dir: {type: 'File', doc: "TAR.GZ'd Directory containing annotations",
-    "sbg:fileTypes": "TAR, TAR.GZ, TGZ", "sbg:suggestedValue": {class: File, path: 6245fde8274f85577d646da0,
-      name: annotsv_311_annotations_dir.tgz}}
+  annotsv_annotations_dir: {type: 'File', doc: "TAR.GZ'd Directory containing AnnotSV\
+      \ annotations", "sbg:fileTypes": "TAR, TAR.GZ, TGZ", "sbg:suggestedValue": {
+      class: File, path: 6245fde8274f85577d646da0, name: annotsv_311_annotations_dir.tgz}}
   annotsv_genome_build:
     type:
     - 'null'
     - type: enum
       name: annotsv_genome_build
       symbols: ["GRCh37", "GRCh38", "mm9", "mm10"]
+    doc: |
+      The genome build of the reference fasta. AnnotSV is capable of annotating the following genomes: "GRCh37","GRCh38","mm9","mm10".
 
-  output_basename: {type: 'string', doc: "String value to use as basename for outputs"}
+  output_basename: {type: 'string?', doc: "String value to use as basename for outputs"}
 
   # Resource Requirements
   svaba_cpu: {type: 'int?', doc: "CPUs to allocate to SVaba"}
@@ -156,24 +160,33 @@ inputs:
   manta_ram: {type: 'int?', doc: "GB of RAM to allocate to Manta"}
 
 outputs:
-  svaba_indels: {type: 'File', outputSource: svaba/germline_indel_vcf_gz}
-  svaba_svs: {type: 'File', outputSource: svaba/germline_sv_vcf_gz}
-  svaba_annotated_svs: {type: 'File?', outputSource: annotsv_svaba/annotated_calls}
-  svaba_unannotated_svs: {type: 'File?', outputSource: annotsv_svaba/unannotated_calls}
-  manta_indels: {type: 'File', outputSource: manta/small_indels}
-  manta_svs: {type: 'File', outputSource: manta/output_sv}
-  manta_annotated_svs: {type: 'File?', outputSource: annotsv_manta/annotated_calls}
-  manta_unannotated_svs: {type: 'File?', outputSource: annotsv_manta/unannotated_calls}
+  svaba_indels: {type: 'File', outputSource: svaba/germline_indel_vcf_gz, doc: "VCF\
+      \ containing INDEL variants called by SvABA"}
+  svaba_svs: {type: 'File', outputSource: svaba/germline_sv_vcf_gz, doc: "VCF containing\
+      \ SV called by SvABA"}
+  svaba_annotated_svs: {type: 'File?', outputSource: annotsv_svaba/annotated_calls,
+    doc: "TSV containing annotated variants from the svaba_svs output"}
+  manta_indels: {type: 'File', outputSource: manta/small_indels, doc: "VCF containing\
+      \ INDEL variants called by Manta"}
+  manta_svs: {type: 'File', outputSource: manta/output_sv, doc: "VCF containing SV\
+      \ called by Manta"}
+  manta_annotated_svs: {type: 'File?', outputSource: annotsv_manta/annotated_calls,
+    doc: "TSV containing annotated variants from the manta_svs output"}
 
 steps:
   svaba:
     run: ../tools/svaba.cwl
     in:
-      tumor_bams: input_bams
+      tumor_bams:
+        source: germline_bam
+        valueFrom: $([self])
       reference_genome: indexed_reference_fasta
       germline:
         valueFrom: $(1 == 1)
-      output_basename: output_basename
+      output_basename:
+        source: output_basename
+        valueFrom: |
+          $(self != null ? self : inputs.tumor_bams[0].basename.split('.')[0])
       cores: svaba_cpu
       ram: svaba_ram
     out: [alignments, bps, contigs, log, germline_indel_vcf_gz, germline_indel_unfiltered_vcf_gz,
@@ -183,8 +196,13 @@ steps:
     run: ../tools/manta.cwl
     in:
       reference: indexed_reference_fasta
-      input_normal_reads: input_bams
-      output_basename: output_basename
+      input_normal_reads:
+        source: germline_bam
+        valueFrom: $([self])
+      output_basename:
+        source: output_basename
+        valueFrom: |
+          $(self != null ? self : inputs.input_normal_reads[0].basename.split('.')[0])
       cores: manta_cpu
       ram: manta_ram
     out: [output_sv, small_indels]