Skip to content

Commit

Permalink
Merge pull request #15 from uclahs-cds/nwiltsie-liftover-backward
Browse files Browse the repository at this point in the history
Add support for GRCh38->GRCh37
  • Loading branch information
nkwang24 authored Aug 28, 2024
2 parents dcab30a + 6a99793 commit 5c2bc28
Show file tree
Hide file tree
Showing 8 changed files with 94 additions and 28 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
- Add workflow for SNV callers (Mutect2, HaplotypeCaller, Strelka2, Muse2, SomaticSniper)
- Add workflow for SV caller (Delly2)
- Add pipeline diagram
- Add reverse liftover (GRCh38 -> GRCh37) for SNV branch

### Changed

Expand Down
14 changes: 14 additions & 0 deletions config/methods.config
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,18 @@ methods {
}
}

determine_liftover_direction = {
if (params.funcotator_data.src_reference_id == params.funcotator_data.dest_reference_id) {
throw new IllegalArgumentException("params.funcotator_data.src_reference_id and params.funcotator_data.dest_reference_id must be different!")
}

if (![params.funcotator_data.src_reference_id, params.funcotator_data.dest_reference_id].contains("hg38")) {
throw new IllegalArgumentException("One of params.funcotator_data.src_reference_id and params.funcotator_data.dest_reference_id must be 'hg38'!")
}

params.liftover_forward = params.funcotator_data.dest_reference_id == "hg38"
}

setup = {
schema.load_custom_types("${projectDir}/config/custom_schema_types.config")
schema.validate()
Expand All @@ -49,6 +61,8 @@ methods {
methods.setup_docker_cpus()
methods.setup_process_afterscript()

methods.determine_liftover_direction()

json_extractor.store_object_as_json(
params,
new File("${params.log_output_dir}/nextflow-log/params.json")
Expand Down
27 changes: 18 additions & 9 deletions docs/pipeline.mmd
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,24 @@ flowchart TD
sv_vs_snv --> bcftools_liftover

subgraph SNV ["`**SNV**`"]
funcotator_sources([funcotator_sources]):::input
chain_file([chain_file]):::input
repeat_bed([repeat_bed]):::input

bcftools_liftover[bcftools +liftover]:::bcftools
---> gatk_func[gatk Funcotator]:::gatk
--> bcftools_annotate["`bcftools annotate*RepeatMasker*`"]:::bcftools
--> bcftools_annotate2["`bcftools annotate*Trinucleotide*`"]:::bcftools
--> r_extract_snv[extract-VCF-features.R]:::R
subgraph SNV_liftover["`**Liftover**`"]
chain_file([chain_file]):::input
bcftools_liftover[bcftools +liftover]:::bcftools
end

subgraph SNV_annotation["`**Annotation**`"]
funcotator_sources([funcotator_sources]):::input
repeat_bed([repeat_bed]):::input

gatk_func[gatk Funcotator]:::gatk
--> bcftools_annotate["`bcftools annotate*RepeatMasker*`"]:::bcftools
--> bcftools_annotate2["`bcftools annotate*Trinucleotide*`"]:::bcftools
end

blocknote["`**Note:** Annotation is performed before Liftover when lifting backward`"]

bcftools_liftover ---> gatk_func
bcftools_annotate2 --> r_extract_snv[extract-VCF-features.R]:::R
end

funcotator_sources .-> gatk_func
Expand Down
2 changes: 1 addition & 1 deletion docs/pipeline.mmd.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
6 changes: 4 additions & 2 deletions module/snv_annotations.nf
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ process annotate_trinucleotide_BCFtools {
container params.docker_image_bcftools

publishDir path: "${params.output_dir_base}/intermediate/${task.process.replace(':', '/')}",
pattern: "output.vcf.gz",
pattern: "output.vcf.gz{,.tbi}",
mode: "copy",
enabled: params.save_intermediate_files,
saveAs: { "Trinucleotide-annotated-${sample_id}.vcf.gz" }
Expand All @@ -129,21 +129,23 @@ process annotate_trinucleotide_BCFtools {
path(tsv_tbi, stageAs: 'inputs/*')

output:
tuple val(sample_id), path('output.vcf.gz'), emit: trinucleotide_vcf
tuple val(sample_id), path('output.vcf.gz'), path('output.vcf.gz.tbi'), emit: trinucleotide_vcf

script:
"""
bcftools annotate \
--annotations ${tsv} \
--columns CHROM,POS,TRINUCLEOTIDE \
--header-lines <(echo '##INFO=<ID=TRINUCLEOTIDE,Number=1,Type=String,Description="Trinucleotide Context">') \
--write-index=tbi \
--output output.vcf.gz \
${vcf}
"""

stub:
"""
touch "output.vcf.gz"
touch "output.vcf.gz.tbi"
"""
}

Expand Down
61 changes: 45 additions & 16 deletions module/snv_workflow.nf
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ process extract_VCF_features_StableLift {
saveAs: { "StableLift-${sample_id}.Rds" }

input:
tuple val(sample_id), path(vcf)
tuple val(sample_id), path(vcf), path(index)

output:
tuple val(sample_id), path('features.Rds'), emit: r_annotations
Expand Down Expand Up @@ -86,28 +86,57 @@ workflow workflow_extract_snv_annotations {

main:

// Step 1: Liftover
run_liftover_BCFtools(
vcf_with_sample_id,
src_sequence,
dest_sequence,
chain_file
)

// Step 2: Annotate
workflow_apply_snv_annotations(
run_liftover_BCFtools.out.liftover_vcf_with_index,
dest_sequence
)
// We want to do all of the annotating with the GRCh38 / hg38 reference. If
// the liftover is going from h38 to hg19, defer until after annotations
if (params.liftover_forward) {
// Step 1: Liftover
run_liftover_BCFtools(
vcf_with_sample_id,
src_sequence,
dest_sequence,
chain_file
)

// Step 2: Annotate with GRCh38
workflow_apply_snv_annotations(
run_liftover_BCFtools.out.liftover_vcf_with_index,
dest_sequence
)

workflow_apply_snv_annotations.out.annotated_vcf.set { annotated_vcf_with_index }

} else {
// Step 1: Annotate with GRCh38
workflow_apply_snv_annotations(
vcf_with_sample_id,
src_sequence
)

// Step 2: Liftover
run_liftover_BCFtools(
workflow_apply_snv_annotations.out.annotated_vcf,
src_sequence,
dest_sequence,
chain_file
)

run_liftover_BCFtools.out.liftover_vcf_with_index.set { annotated_vcf_with_index }
}

// Step 3: Extract features
// FIXME Parallelize HaplotypeCaller
extract_VCF_features_StableLift(
workflow_apply_snv_annotations.out.annotated_vcf
annotated_vcf_with_index
)

// For consistency with the SV branch, remove the index file from the
// output VCF channel
annotated_vcf_with_index
.map { sample_id, vcf, index -> [sample_id, vcf] }
.set { annotated_vcf }

emit:
liftover_vcf = workflow_apply_snv_annotations.out.annotated_vcf
liftover_vcf = annotated_vcf
r_annotations = extract_VCF_features_StableLift.out.r_annotations
}

7 changes: 7 additions & 0 deletions nftest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@ cases:
skip: false
verbose: true

- name: SNV-backward
nf_script: ./main.nf
nf_config: test/backward.config
params_file: test/snv-backward.yaml
skip: false
verbose: true

- name: SV
nf_script: ./main.nf
nf_config: test/sv.config
Expand Down
4 changes: 4 additions & 0 deletions test/snv-backward.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
sample_id: ExampleID
input:
vcf: /hot/project/method/AlgorithmEvaluation/BNCH-000142-GRCh37v38/validation/TCGA-SARC_WXS/GRCh38/Mutect2/TCGA-SARC_WXS_Mutect2_merge.vcf.gz

0 comments on commit 5c2bc28

Please sign in to comment.