From 25d1e192b00d6128f7d5c8ddfe16ee8cf823f969 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Thu, 22 Aug 2024 17:23:14 -0700 Subject: [PATCH 1/7] Add computed parameter for liftover direction --- config/methods.config | 14 ++++++++++++ module/snv_workflow.nf | 51 ++++++++++++++++++++++++++++++------------ 2 files changed, 51 insertions(+), 14 deletions(-) diff --git a/config/methods.config b/config/methods.config index 05cf337..8708a2a 100644 --- a/config/methods.config +++ b/config/methods.config @@ -37,6 +37,18 @@ methods { } } + determine_liftover_direction = { + if (params.funcotator_data.src_reference_id == params.funcotator_data.dest_reference_id) { + throw new IllegalArgumentException("params.funcotator_data.src_reference_id and params.funcotator_data.dest_reference_id must be different!") + } + + if (![params.funcotator_data.src_reference_id, params.funcotator_data.dest_reference_id].contains("hg38")) { + throw new IllegalArgumentException("One of params.funcotator_data.src_reference_id and params.funcotator_data.dest_reference_id must be 'hg38'!") + } + + params.liftover_forward = params.funcotator_data.dest_reference_id == "hg38" + } + setup = { schema.load_custom_types("${projectDir}/config/custom_schema_types.config") schema.validate() @@ -49,6 +61,8 @@ methods { methods.setup_docker_cpus() methods.setup_process_afterscript() + methods.determine_liftover_direction() + json_extractor.store_object_as_json( params, new File("${params.log_output_dir}/nextflow-log/params.json") diff --git a/module/snv_workflow.nf b/module/snv_workflow.nf index f04de2d..7e63673 100644 --- a/module/snv_workflow.nf +++ b/module/snv_workflow.nf @@ -86,24 +86,47 @@ workflow workflow_extract_snv_annotations { main: - // Step 1: Liftover - run_liftover_BCFtools( - vcf_with_sample_id, - src_sequence, - dest_sequence, - chain_file - ) - - // Step 2: Annotate - workflow_apply_snv_annotations( - run_liftover_BCFtools.out.liftover_vcf_with_index, - dest_sequence - ) + // We want to do all of the annotating with the GRCh38 / hg38 reference. If + // the liftover is going from h38 to hg19, defer until after annotations + if (params.liftover_forward) { + // Step 1: Liftover + run_liftover_BCFtools( + vcf_with_sample_id, + src_sequence, + dest_sequence, + chain_file + ) + + // Step 2: Annotate + workflow_apply_snv_annotations( + run_liftover_BCFtools.out.liftover_vcf_with_index, + dest_sequence + ) + + workflow_apply_snv_annotations.out.annotated_vcf.set { annotated_vcf } + + } else { + // Step 1: Annotate + workflow_apply_snv_annotations( + vcf_with_sample_id, + dest_sequence + ) + + // Step 2: Liftover + run_liftover_BCFtools( + workflow_apply_snv_annotations.out.annotated_vcf, + src_sequence, + dest_sequence, + chain_file + ) + + run_liftover_BCFtools.out.liftover_vcf_with_index.set { annotated_vcf } + } // Step 3: Extract features // FIXME Parallelize HaplotypeCaller extract_VCF_features_StableLift( - workflow_apply_snv_annotations.out.annotated_vcf + annotated_vcf ) emit: From 9af66e8372a1f88902ed4d1d9f0eee34aec4b3eb Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Mon, 26 Aug 2024 08:34:48 -0700 Subject: [PATCH 2/7] Write index file for consistency --- module/snv_annotations.nf | 6 ++++-- module/snv_workflow.nf | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/module/snv_annotations.nf b/module/snv_annotations.nf index 78ea248..141d863 100644 --- a/module/snv_annotations.nf +++ b/module/snv_annotations.nf @@ -117,7 +117,7 @@ process annotate_trinucleotide_BCFtools { container params.docker_image_bcftools publishDir path: "${params.output_dir_base}/intermediate/${task.process.replace(':', '/')}", - pattern: "output.vcf.gz", + pattern: "output.vcf.gz{,.tbi}", mode: "copy", enabled: params.save_intermediate_files, saveAs: { "Trinucleotide-annotated-${sample_id}.vcf.gz" } @@ -129,7 +129,7 @@ process annotate_trinucleotide_BCFtools { path(tsv_tbi, stageAs: 'inputs/*') output: - tuple val(sample_id), path('output.vcf.gz'), emit: trinucleotide_vcf + tuple val(sample_id), path('output.vcf.gz'), path('output.vcf.gz.tbi'), emit: trinucleotide_vcf script: """ @@ -137,6 +137,7 @@ process annotate_trinucleotide_BCFtools { --annotations ${tsv} \ --columns CHROM,POS,TRINUCLEOTIDE \ --header-lines <(echo '##INFO=') \ + --write-index=tbi \ --output output.vcf.gz \ ${vcf} """ @@ -144,6 +145,7 @@ process annotate_trinucleotide_BCFtools { stub: """ touch "output.vcf.gz" + touch "output.vcf.gz.tbi" """ } diff --git a/module/snv_workflow.nf b/module/snv_workflow.nf index 7e63673..34bc347 100644 --- a/module/snv_workflow.nf +++ b/module/snv_workflow.nf @@ -57,7 +57,7 @@ process extract_VCF_features_StableLift { saveAs: { "StableLift-${sample_id}.Rds" } input: - tuple val(sample_id), path(vcf) + tuple val(sample_id), path(vcf), path(index) output: tuple val(sample_id), path('features.Rds'), emit: r_annotations From f39b012a67a7a895c157df32931edd226f14187f Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Mon, 26 Aug 2024 08:35:54 -0700 Subject: [PATCH 3/7] Add NFTest case for reverse case --- nftest.yml | 7 +++++++ test/snv-backward.yaml | 4 ++++ 2 files changed, 11 insertions(+) create mode 100644 test/snv-backward.yaml diff --git a/nftest.yml b/nftest.yml index 6cbca4f..81c4baa 100644 --- a/nftest.yml +++ b/nftest.yml @@ -13,6 +13,13 @@ cases: skip: false verbose: true + - name: SNV-backward + nf_script: ./main.nf + nf_config: test/backward.config + params_file: test/snv-backward.yaml + skip: false + verbose: true + - name: SV nf_script: ./main.nf nf_config: test/sv.config diff --git a/test/snv-backward.yaml b/test/snv-backward.yaml new file mode 100644 index 0000000..83a448c --- /dev/null +++ b/test/snv-backward.yaml @@ -0,0 +1,4 @@ +--- +sample_id: ExampleID +input: + vcf: /hot/project/method/AlgorithmEvaluation/BNCH-000142-GRCh37v38/validation/TCGA-SARC_WXS/GRCh38/Mutect2/TCGA-SARC_WXS_Mutect2_merge.vcf.gz From b54932cd6b4ee67d31205bfa05c078312f95f7ad Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Mon, 26 Aug 2024 08:37:19 -0700 Subject: [PATCH 4/7] Bugfix with reference genome selection --- module/snv_workflow.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/module/snv_workflow.nf b/module/snv_workflow.nf index 34bc347..55b1449 100644 --- a/module/snv_workflow.nf +++ b/module/snv_workflow.nf @@ -97,7 +97,7 @@ workflow workflow_extract_snv_annotations { chain_file ) - // Step 2: Annotate + // Step 2: Annotate with GRCh38 workflow_apply_snv_annotations( run_liftover_BCFtools.out.liftover_vcf_with_index, dest_sequence @@ -106,10 +106,10 @@ workflow workflow_extract_snv_annotations { workflow_apply_snv_annotations.out.annotated_vcf.set { annotated_vcf } } else { - // Step 1: Annotate + // Step 1: Annotate with GRCh38 workflow_apply_snv_annotations( vcf_with_sample_id, - dest_sequence + src_sequence ) // Step 2: Liftover From e68b07a1e30cb66255fc30cf1b6c3b99b7ed35c0 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Mon, 26 Aug 2024 11:42:23 -0700 Subject: [PATCH 5/7] Update diagram with note about reversing step order --- docs/pipeline.mmd | 27 ++++++++++++++++++--------- docs/pipeline.mmd.svg | 2 +- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/docs/pipeline.mmd b/docs/pipeline.mmd index f0bba92..0bc792b 100644 --- a/docs/pipeline.mmd +++ b/docs/pipeline.mmd @@ -50,15 +50,24 @@ flowchart TD sv_vs_snv --> bcftools_liftover subgraph SNV ["`**SNV**`"] - funcotator_sources([funcotator_sources]):::input - chain_file([chain_file]):::input - repeat_bed([repeat_bed]):::input - - bcftools_liftover[bcftools +liftover]:::bcftools - ---> gatk_func[gatk Funcotator]:::gatk - --> bcftools_annotate["`bcftools annotate*RepeatMasker*`"]:::bcftools - --> bcftools_annotate2["`bcftools annotate*Trinucleotide*`"]:::bcftools - --> r_extract_snv[extract-VCF-features.R]:::R + subgraph SNV_liftover["`**Liftover**`"] + chain_file([chain_file]):::input + bcftools_liftover[bcftools +liftover]:::bcftools + end + + subgraph SNV_annotation["`**Annotation**`"] + funcotator_sources([funcotator_sources]):::input + repeat_bed([repeat_bed]):::input + + gatk_func[gatk Funcotator]:::gatk + --> bcftools_annotate["`bcftools annotate*RepeatMasker*`"]:::bcftools + --> bcftools_annotate2["`bcftools annotate*Trinucleotide*`"]:::bcftools + end + + blocknote["`**Note:** Annotation is performed before Liftover when lifting backward`"] + + bcftools_liftover ---> gatk_func + bcftools_annotate2 --> r_extract_snv[extract-VCF-features.R]:::R end funcotator_sources .-> gatk_func diff --git a/docs/pipeline.mmd.svg b/docs/pipeline.mmd.svg index 714aef7..8983e1b 100644 --- a/docs/pipeline.mmd.svg +++ b/docs/pipeline.mmd.svg @@ -1 +1 @@ -         Predict StabilitySNVSVbcftools annotateStabilitypredict-liftover-stability.Rrf_modelfuncotator_sourceschain_filerepeat_bedextract-VCF-features.Rbcftools annotateTrinucleotidebcftools annotateRepeatMaskergatk Funcotatorbcftools +liftoverheader_contigschain_filegnomad_rdsextract-VCF-features-SV.Rliftover-Delly2-vcf.RLegendNodesInput FileParameterized InputOutput fileProcessesGATKbcftoolsRscriptGeneric LinuxInput VCFpipevalVariant Type?Output VCFs \ No newline at end of file +         Predict StabilitySNVSVLiftoverAnnotationbcftools annotateStabilitypredict-liftover-stability.Rrf_modelNote: Annotation isperformed beforeLiftover when liftingbackwardextract-VCF-features.Rfuncotator_sourcesrepeat_bedbcftools annotateTrinucleotidebcftools annotateRepeatMaskergatk Funcotatorchain_filebcftools +liftoverheader_contigschain_filegnomad_rdsextract-VCF-features-SV.Rliftover-Delly2-vcf.RLegendNodesInput FileParameterized InputOutput fileProcessesGATKbcftoolsRscriptGeneric LinuxInput VCFpipevalVariant Type?Output VCFs \ No newline at end of file From 6e38e94c47a546898c46533513bf6f3a727edf7a Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Mon, 26 Aug 2024 11:43:22 -0700 Subject: [PATCH 6/7] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bc93c82..11aa2a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - Add workflow for SNV callers (Mutect2, HaplotypeCaller, Strelka2, Muse2, SomaticSniper) - Add workflow for SV caller (Delly2) - Add pipeline diagram +- Add reverse liftover (GRCh38 -> GRCh37) for SNV branch ### Changed From 6a99793dad82495f4efc4c69812028b3c7b1361a Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Mon, 26 Aug 2024 11:55:44 -0700 Subject: [PATCH 7/7] Harmonize process outputs --- module/snv_workflow.nf | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/module/snv_workflow.nf b/module/snv_workflow.nf index 55b1449..f298b7e 100644 --- a/module/snv_workflow.nf +++ b/module/snv_workflow.nf @@ -103,7 +103,7 @@ workflow workflow_extract_snv_annotations { dest_sequence ) - workflow_apply_snv_annotations.out.annotated_vcf.set { annotated_vcf } + workflow_apply_snv_annotations.out.annotated_vcf.set { annotated_vcf_with_index } } else { // Step 1: Annotate with GRCh38 @@ -120,17 +120,23 @@ workflow workflow_extract_snv_annotations { chain_file ) - run_liftover_BCFtools.out.liftover_vcf_with_index.set { annotated_vcf } + run_liftover_BCFtools.out.liftover_vcf_with_index.set { annotated_vcf_with_index } } // Step 3: Extract features // FIXME Parallelize HaplotypeCaller extract_VCF_features_StableLift( - annotated_vcf + annotated_vcf_with_index ) + // For consistency with the SV branch, remove the index file from the + // output VCF channel + annotated_vcf_with_index + .map { sample_id, vcf, index -> [sample_id, vcf] } + .set { annotated_vcf } + emit: - liftover_vcf = workflow_apply_snv_annotations.out.annotated_vcf + liftover_vcf = annotated_vcf r_annotations = extract_VCF_features_StableLift.out.r_annotations }