-
Notifications
You must be signed in to change notification settings - Fork 863
gatk4/selectvariants: Update documentation and module improvements #7907 #8121
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -2,57 +2,62 @@ process GATK4_SELECTVARIANTS { | |||||||||
tag "$meta.id" | ||||||||||
label 'process_single' | ||||||||||
|
||||||||||
|
||||||||||
conda "${moduleDir}/environment.yml" | ||||||||||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||||||||||
'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b28daf5d9bb2f0d129dcad1b7410e0dd8a9b087aaf3ec7ced929b1f57624ad98/data': | ||||||||||
'community.wave.seqera.io/library/gatk4_gcnvkernel:e48d414933d188cd' }" | ||||||||||
'https://depot.galaxyproject.org/singularity/gatk4:4.6.1.0--py310hdfd78af_0': | ||||||||||
'biocontainers/gatk4:4.6.1.0--py310hdfd78af_0' }" | ||||||||||
|
||||||||||
input: | ||||||||||
tuple val(meta), path(vcf), path(vcf_idx), path (intervals) | ||||||||||
tuple val(meta), path(vcf), path(vcf_index) | ||||||||||
path fasta | ||||||||||
path fasta_fai | ||||||||||
path dict | ||||||||||
val intervals | ||||||||||
|
||||||||||
output: | ||||||||||
tuple val(meta), path("*.vcf.gz") , emit: vcf | ||||||||||
tuple val(meta), path("*.vcf.gz.tbi") , emit: tbi | ||||||||||
path "versions.yml" , emit: versions | ||||||||||
path "versions.yml", emit: versions | ||||||||||
Comment on lines
-16
to
+21
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please leave as is :) |
||||||||||
|
||||||||||
when: | ||||||||||
task.ext.when == null || task.ext.when | ||||||||||
|
||||||||||
script: | ||||||||||
def args = task.ext.args ?: '' | ||||||||||
def prefix = task.ext.prefix ?: "${meta.id}" | ||||||||||
def interval = intervals ? "--intervals ${intervals}" : "" | ||||||||||
|
||||||||||
def avail_mem = 3072 | ||||||||||
if (!task.memory) { | ||||||||||
log.info '[GATK SelectVariants] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' | ||||||||||
} else { | ||||||||||
avail_mem = (task.memory.mega*0.8).intValue() | ||||||||||
def interval_command = intervals ? "--intervals ${intervals}" : "" | ||||||||||
def avail_mem = 3 | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this value hardcoded? |
||||||||||
if (task.memory) { | ||||||||||
avail_mem = task.memory.toGiga() | ||||||||||
} | ||||||||||
|
||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||
""" | ||||||||||
gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\ | ||||||||||
gatk --java-options "-Xmx${avail_mem}g" \\ | ||||||||||
SelectVariants \\ | ||||||||||
--variant $vcf \\ | ||||||||||
--variant ${vcf} \\ | ||||||||||
--reference ${fasta} \\ | ||||||||||
--output ${prefix}.vcf.gz \\ | ||||||||||
$interval \\ | ||||||||||
--tmp-dir . \\ | ||||||||||
$args | ||||||||||
${interval_command} \\ | ||||||||||
${args} | ||||||||||
|
||||||||||
|
||||||||||
cat <<-END_VERSIONS > versions.yml | ||||||||||
"${task.process}": | ||||||||||
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') | ||||||||||
gatk4: "\$(gatk --version 2>&1 | sed -n 's/^.*(GATK) v\\([^ ]*\\).*/\\1/p')" | ||||||||||
END_VERSIONS | ||||||||||
""" | ||||||||||
|
||||||||||
stub: | ||||||||||
def prefix = task.ext.prefix ?: "${meta.id}" | ||||||||||
|
||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||
""" | ||||||||||
touch ${prefix}.vcf.gz | ||||||||||
touch ${prefix}.vcf.gz.tbi | ||||||||||
Comment on lines
55
to
56
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These wont work, do
Suggested change
|
||||||||||
|
||||||||||
cat <<-END_VERSIONS > versions.yml | ||||||||||
"${task.process}": | ||||||||||
gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//') | ||||||||||
END_VERSIONS | ||||||||||
"${task.process}": | ||||||||||
gatk4: "\$(gatk --version 2>&1 | sed -n 's/^.*(GATK) v\\([^ ]*\\).*/\\1/p')" | ||||||||||
END_VERSIONS | ||||||||||
""" | ||||||||||
} |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -1,41 +1,53 @@ | ||||||
name: gatk4_selectvariants | ||||||
description: Select a subset of variants from a VCF file | ||||||
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json | ||||||
name: "gatk4_selectvariants" | ||||||
description: It is used to select a subset of variants from a VCF file | ||||||
keywords: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
- gatk4 | ||||||
- selectvariants | ||||||
- vcf | ||||||
tools: | ||||||
- gatk4: | ||||||
description: | | ||||||
Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools | ||||||
with a primary focus on variant discovery and genotyping. Its powerful processing engine | ||||||
and high-performance computing features make it capable of taking on projects of any size. | ||||||
homepage: https://gatk.broadinstitute.org/hc/en-us | ||||||
documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360036362532-SelectVariants | ||||||
tool_dev_url: https://github.com/broadinstitute/gatk | ||||||
doi: 10.1158/1538-7445.AM2017-3590 | ||||||
licence: ["Apache-2.0"] | ||||||
- "gatk4": | ||||||
description: "It was developed in the Data Sciences Platform at the Broad Institute, | ||||||
the toolkit offers a wide array of tools with a primary focus on variant discovery | ||||||
and genotyping. Its powerful processing engine and high-performance computing | ||||||
features make it capable of taking on projects of any size." | ||||||
homepage: "https://gatk.broadinstitute.org/hc/en-us" | ||||||
documentation: "https://gatk.broadinstitute.org/hc/en-us/articles/360037055952-SelectVariants" | ||||||
tool_dev_url: "https://github.com/broadinstitute/gatk" | ||||||
doi: "10.1158/1538-7445.AM2017-3590" | ||||||
licence: ["BSD-3-clause"] | ||||||
identifier: "" | ||||||
|
||||||
input: | ||||||
# Only when we have meta | ||||||
- - meta: | ||||||
type: map | ||||||
description: | | ||||||
Groovy Map containing sample information | ||||||
e.g. [ id:'test'] | ||||||
e.g. `[ id:'sample1', single_end:false ]` | ||||||
- vcf: | ||||||
type: list | ||||||
description: VCF(.gz) file | ||||||
type: file | ||||||
description: Input VCF file | ||||||
pattern: "*.{vcf,vcf.gz}" | ||||||
- vcf_idx: | ||||||
type: list | ||||||
description: VCF file index | ||||||
pattern: "*.{idx,tbi}" | ||||||
- intervals: | ||||||
- vcf_index: | ||||||
type: file | ||||||
description: Index file of the input VCF | ||||||
pattern: "*.{tbi,idx}" | ||||||
- - fasta: | ||||||
type: file | ||||||
description: The reference fasta file | ||||||
- - fasta_fai: | ||||||
type: file | ||||||
description: The index of the reference fasta file | ||||||
- - dict: | ||||||
type: file | ||||||
description: The sequence dictionary file | ||||||
- - intervals: | ||||||
type: string | ||||||
description: One or more genomic intervals over which to operate | ||||||
pattern: ".intervals" | ||||||
output: | ||||||
- vcf: | ||||||
#Only when we have meta | ||||||
- meta: | ||||||
type: map | ||||||
description: | | ||||||
|
@@ -55,14 +67,14 @@ output: | |||||
type: file | ||||||
description: Tabix index file | ||||||
pattern: "*.vcf.gz.tbi" | ||||||
|
||||||
- versions: | ||||||
- versions.yml: | ||||||
- "versions.yml": | ||||||
type: file | ||||||
description: File containing software versions | ||||||
pattern: "versions.yml" | ||||||
|
||||||
authors: | ||||||
- "@mjcipriano" | ||||||
- "@ramprasadn" | ||||||
- "@Alveen-o" | ||||||
maintainers: | ||||||
- "@mjcipriano" | ||||||
- "@ramprasadn" | ||||||
- "@Alveen-o" |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should not be done here but instead in a setup block in the actual test. You should be able to just pass the path in genereal. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
process DOWNLOAD_TEST_DATA { | ||
output: | ||
path "*" | ||
|
||
script: | ||
""" | ||
wget -q https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz | ||
wget -q https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi | ||
wget -q https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta | ||
wget -q https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta.fai | ||
""" | ||
} | ||
|
||
workflow { | ||
DOWNLOAD_TEST_DATA() | ||
} |
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -1,64 +1,69 @@ | ||||||||||||||||||||||||
nextflow_process { | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
name "Test Process GATK4_SELECTVARIANTS" | ||||||||||||||||||||||||
script "modules/nf-core/gatk4/selectvariants/main.nf" | ||||||||||||||||||||||||
script "../main.nf" | ||||||||||||||||||||||||
process "GATK4_SELECTVARIANTS" | ||||||||||||||||||||||||
tag "modules" | ||||||||||||||||||||||||
tag "modules_nfcore" | ||||||||||||||||||||||||
tag "gatk4" | ||||||||||||||||||||||||
tag "gatk4/selectvariants" | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
test("selectvariants - vcf input") { | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
test("gatk4 selectvariants - vcf") { | ||||||||||||||||||||||||
tag "gatk4" | ||||||||||||||||||||||||
tag "modules_nfcore" | ||||||||||||||||||||||||
tag "modules" | ||||||||||||||||||||||||
tag "gatk4/selectvariants" | ||||||||||||||||||||||||
when { | ||||||||||||||||||||||||
params { | ||||||||||||||||||||||||
// define parameters here. Example: | ||||||||||||||||||||||||
// outdir = "tests/results" | ||||||||||||||||||||||||
} | ||||||||||||||||||||||||
process { | ||||||||||||||||||||||||
""" | ||||||||||||||||||||||||
input[0] = [ | ||||||||||||||||||||||||
input[0] = Channel.of([ | ||||||||||||||||||||||||
[ id:'test' ], // meta map | ||||||||||||||||||||||||
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf', checkIfExists: true), | ||||||||||||||||||||||||
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.idx', checkIfExists: true), | ||||||||||||||||||||||||
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.interval_list', checkIfExists: true) | ||||||||||||||||||||||||
Comment on lines
-22
to
-24
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It should work like this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. MAybe change from idx to tbi |
||||||||||||||||||||||||
] | ||||||||||||||||||||||||
file("${baseDir}/test-data/test.vcf.gz"), | ||||||||||||||||||||||||
file("${baseDir}/test-data/test.vcf.gz.tbi") | ||||||||||||||||||||||||
]) | ||||||||||||||||||||||||
input[1] = Channel.of([ | ||||||||||||||||||||||||
[ id:'test' ], | ||||||||||||||||||||||||
file("${baseDir}/test-data/genome.fasta"), | ||||||||||||||||||||||||
file("${baseDir}/test-data/genome.fasta.fai") | ||||||||||||||||||||||||
]) | ||||||||||||||||||||||||
""" | ||||||||||||||||||||||||
} | ||||||||||||||||||||||||
} | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
then { | ||||||||||||||||||||||||
assertAll( | ||||||||||||||||||||||||
{assert process.success}, | ||||||||||||||||||||||||
{assert path(process.out.vcf.get(0).get(1)).linesGzip.contains("##fileformat=VCFv4.2")} | ||||||||||||||||||||||||
{ assert process.success }, | ||||||||||||||||||||||||
{ assert snapshot( | ||||||||||||||||||||||||
file(process.out.vcf.get(0).get(1)).name, | ||||||||||||||||||||||||
file(process.out.vcf.get(0).get(2)).name, | ||||||||||||||||||||||||
file(process.out.versions).text | ||||||||||||||||||||||||
).match() } | ||||||||||||||||||||||||
Comment on lines
+31
to
+36
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||||||
) | ||||||||||||||||||||||||
} | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
|
||||||||||||||||||||||||
} | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
test("selectvariants - gz input") { | ||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You removed a whole test. Why? |
||||||||||||||||||||||||
test("gatk4 selectvariants - vcf - stub") { | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
options "-stub" | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
when { | ||||||||||||||||||||||||
process { | ||||||||||||||||||||||||
""" | ||||||||||||||||||||||||
input[0] = [ | ||||||||||||||||||||||||
[ id:'test' ], // meta map | ||||||||||||||||||||||||
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true), | ||||||||||||||||||||||||
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), | ||||||||||||||||||||||||
[] | ||||||||||||||||||||||||
] | ||||||||||||||||||||||||
input[0] = Channel.of([ | ||||||||||||||||||||||||
[ id:'test' ], | ||||||||||||||||||||||||
file("${baseDir}/test-data/test.vcf.gz"), | ||||||||||||||||||||||||
file("${baseDir}/test-data/test.vcf.gz.tbi") | ||||||||||||||||||||||||
]) | ||||||||||||||||||||||||
input[1] = Channel.of([ | ||||||||||||||||||||||||
[ id:'test' ], | ||||||||||||||||||||||||
file("${baseDir}/test-data/genome.fasta"), | ||||||||||||||||||||||||
file("${baseDir}/test-data/genome.fasta.fai") | ||||||||||||||||||||||||
]) | ||||||||||||||||||||||||
""" | ||||||||||||||||||||||||
} | ||||||||||||||||||||||||
} | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
then { | ||||||||||||||||||||||||
assertAll( | ||||||||||||||||||||||||
{assert process.success}, | ||||||||||||||||||||||||
{assert path(process.out.vcf.get(0).get(1)).linesGzip.contains("##fileformat=VCFv4.2")} | ||||||||||||||||||||||||
{ assert process.success }, | ||||||||||||||||||||||||
{ assert snapshot(process.out).match() } | ||||||||||||||||||||||||
) | ||||||||||||||||||||||||
} | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
} | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
{ | ||
"versions": { | ||
"content": [ | ||
[ | ||
|
||
] | ||
], | ||
"meta": { | ||
"nf-test": "0.9.2", | ||
"nextflow": "24.10.5" | ||
}, | ||
"timestamp": "2025-03-25T16:59:21.50748" | ||
}, | ||
"vcf": { | ||
"content": [ | ||
[ | ||
|
||
] | ||
], | ||
"meta": { | ||
"nf-test": "0.9.2", | ||
"nextflow": "24.10.5" | ||
}, | ||
"timestamp": "2025-03-25T16:59:21.48226" | ||
}, | ||
"tbi": { | ||
"content": [ | ||
[ | ||
|
||
] | ||
], | ||
"meta": { | ||
"nf-test": "0.9.2", | ||
"nextflow": "24.10.5" | ||
}, | ||
"timestamp": "2025-03-25T16:59:21.498775" | ||
}, | ||
"gatk4 selectvariants - vcf - stub": { | ||
"content": [ | ||
{ | ||
"0": [ | ||
|
||
], | ||
"1": [ | ||
|
||
], | ||
"2": [ | ||
|
||
], | ||
"tbi": [ | ||
|
||
], | ||
"vcf": [ | ||
|
||
], | ||
"versions": [ | ||
|
||
] | ||
} | ||
], | ||
"meta": { | ||
"nf-test": "0.9.2", | ||
"nextflow": "24.10.5" | ||
}, | ||
"timestamp": "2025-03-25T17:00:34.591574" | ||
} | ||
} |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. YOu should not neeed this here, please remove it. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
{ | ||
"name": "nf-core/gatk4/selectvariants", | ||
"homePage": "https://gatk.broadinstitute.org/hc/en-us/articles/360037055952-SelectVariants", | ||
"tags": { | ||
"gatk4-selectvariants": { | ||
"test_data": { | ||
"test_vcf": "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/test.vcf.gz", | ||
"test_vcf_tbi": "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/vcf/test.vcf.gz.tbi" | ||
} | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please pass them with metas