Skip to content

MarkDuplicates Input specificity and input refactor. #8118

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 16 additions & 17 deletions modules/nf-core/gatk4/markduplicates/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,25 +13,24 @@ process GATK4_MARKDUPLICATES {
path fasta_fai

output:
tuple val(meta), path("*cram"), emit: cram, optional: true
tuple val(meta), path("*bam"), emit: bam, optional: true
tuple val(meta), path("*.crai"), emit: crai, optional: true
tuple val(meta), path("*.bai"), emit: bai, optional: true
tuple val(meta), path("*.metrics"), emit: metrics
path "versions.yml", emit: versions
tuple val(meta), path("${prefix}.cram"), emit: cram, optional: true
tuple val(meta), path("${prefix}.bam"), emit: bam, optional: true
tuple val(meta), path("${prefix}*crai"), emit: crai, optional: true
tuple val(meta), path("${prefix}*bai"), emit: bai, optional: true
tuple val(meta), path("*.metrics"), emit: metrics
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}.bam"

// If the extension is CRAM, then change it to BAM
prefix_bam = prefix.tokenize('.')[-1] == 'cram' ? "${prefix.substring(0, prefix.lastIndexOf('.'))}.bam" : prefix
prefix = task.ext.prefix ?: "${meta.id}.md"
suffix = task.ext.suffix ?: "bam"

def input_list = bam.collect{"--INPUT $it"}.join(' ')
def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : ""
file_extension = bam.first().getExtension()
reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : ""

def avail_mem = 3072
if (!task.memory) {
Expand All @@ -46,17 +45,17 @@ process GATK4_MARKDUPLICATES {
gatk --java-options "-Xmx${avail_mem}M -XX:-UsePerfData" \\
MarkDuplicates \\
$input_list \\
--OUTPUT ${prefix_bam} \\
--OUTPUT ${prefix}.bam \\
--METRICS_FILE ${prefix}.metrics \\
--TMP_DIR . \\
${reference} \\
$args

# If cram files are wished as output, the run samtools for conversion
if [[ ${prefix} == *.cram ]]; then
samtools view -Ch -T ${fasta} -o ${prefix} ${prefix_bam}
rm ${prefix_bam}
samtools index ${prefix}
if [[ ${suffix} == cram ]]; then
samtools view -Ch -T ${fasta} -o ${prefix}.cram ${prefix}.bam
rm ${prefix}.bam
samtools index ${prefix}.cram
fi

cat <<-END_VERSIONS > versions.yml
Expand Down
43 changes: 29 additions & 14 deletions modules/nf-core/gatk4/markduplicates/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,49 +43,64 @@ output:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- "*cram":
type: file
description: Marked duplicates CRAM file
pattern: "*.{cram}"
- ${prefix}.cram:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
pattern: "*.{cram}"
- bam:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- "*bam":
type: file
description: Marked duplicates BAM file
pattern: "*.{bam}"
- ${prefix}.bam:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
pattern: "*.{bam}"
- crai:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- "*.crai":
type: file
description: CRAM index file
pattern: "*.{cram.crai}"
- ${prefix}*crai:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
pattern: "*.{cram.crai}"
- bai:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- "*.bai":
type: file
description: BAM index file
pattern: "*.{bam.bai}"
- ${prefix}*bai:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
pattern: "*.{bam.bai}"
- metrics:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
pattern: "*.{metrics.txt}"
- "*.metrics":
type: file
description: Duplicate metrics file generated by GATK
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
pattern: "*.{metrics.txt}"
- versions:
- versions.yml:
Expand Down
1 change: 0 additions & 1 deletion modules/nf-core/gatk4/markduplicates/tests/bam.config
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ process {

withName: GATK4_MARKDUPLICATES {
ext.args = '--CREATE_INDEX true'
ext.prefix = { "${meta.id}.bam" }
}

}
2 changes: 1 addition & 1 deletion modules/nf-core/gatk4/markduplicates/tests/cram.config
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ process {

withName: GATK4_MARKDUPLICATES {
ext.args = '--CREATE_INDEX true'
ext.prefix = { "${meta.id}.cram" }
ext.suffix = "cram"
}

}
6 changes: 3 additions & 3 deletions modules/nf-core/gatk4/markduplicates/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ nextflow_process {
then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.bam).match("bam") },
{ assert snapshot(bam(process.out.bam[0][1]).getReadsMD5())},
{ assert snapshot(process.out.bai).match("bai") },
{ assert snapshot(process.out.versions).match("versions") },
{ assert snapshot(file(process.out.metrics[0][1]).name).match("test.metrics") }
Expand Down Expand Up @@ -58,8 +58,8 @@ nextflow_process {
then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.bam).match("multi bam") },
{ assert snapshot(process.out.bai).match("multi bai") },
{ assert snapshot(bam(process.out.bam[0][1]).getReadsMD5())},
{ assert snapshot(file(process.out.bai[0][1]).name).match("multi bai") },
{ assert snapshot(process.out.versions).match("multi versions") },
{ assert snapshot(file(process.out.metrics[0][1]).name).match("multi test.metrics") }
)
Expand Down
118 changes: 37 additions & 81 deletions modules/nf-core/gatk4/markduplicates/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -1,49 +1,23 @@
{
"multi bam": {
"content": [
[
[
{
"id": "test",
"single_end": false
},
"test.bam:md5,8a808b1a94d2627c4d659a2151c4cb9f"
]
]
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.0"
},
"timestamp": "2024-02-13T15:21:36.059923"
},
"multi crai": {
"content": [
"test.cram.crai"
"test.md.cram.crai"
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.0"
"nf-test": "0.9.2",
"nextflow": "24.10.5"
},
"timestamp": "2023-12-12T17:43:37.780426007"
"timestamp": "2025-03-28T14:12:43.343356201"
},
"multi bai": {
"content": [
[
[
{
"id": "test",
"single_end": false
},
"test.bai:md5,38b99c5f771895ecf5324c3186b9d452"
]
]
"test.md.bai"
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.0"
"nf-test": "0.9.2",
"nextflow": "24.10.5"
},
"timestamp": "2024-02-13T15:21:36.09642"
"timestamp": "2025-03-28T14:12:29.424717865"
},
"versions": {
"content": [
Expand All @@ -52,20 +26,20 @@
]
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.0"
"nf-test": "0.9.2",
"nextflow": "24.10.5"
},
"timestamp": "2024-02-13T15:21:08.710549"
"timestamp": "2025-03-28T14:12:15.803964407"
},
"multi test.metrics": {
"content": [
"test.bam.metrics"
"test.md.metrics"
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.0"
"nf-test": "0.9.2",
"nextflow": "24.10.5"
},
"timestamp": "2023-12-12T17:43:11.732892667"
"timestamp": "2025-03-28T14:12:29.438583878"
},
"bai": {
"content": [
Expand All @@ -75,15 +49,15 @@
"id": "test",
"single_end": false
},
"test.bai:md5,26001bcdbce12e9f07557d8f7b8d360e"
"test.md.bai:md5,7a2b177be9e77a3a8ef1baa1afb5aef4"
]
]
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.0"
"nf-test": "0.9.2",
"nextflow": "24.10.5"
},
"timestamp": "2023-12-12T17:42:39.651888758"
"timestamp": "2025-03-28T14:12:15.788812713"
},
"multi cram versions": {
"content": [
Expand All @@ -92,10 +66,10 @@
]
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.0"
"nf-test": "0.9.2",
"nextflow": "24.10.5"
},
"timestamp": "2024-02-13T15:21:56.966376"
"timestamp": "2025-03-28T14:12:43.351899261"
},
"multi versions": {
"content": [
Expand All @@ -104,57 +78,39 @@
]
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.0"
"nf-test": "0.9.2",
"nextflow": "24.10.5"
},
"timestamp": "2024-02-13T15:21:36.138095"
"timestamp": "2025-03-28T14:12:29.432440299"
},
"multi cram test.metrics": {
"content": [
"test.cram.metrics"
"test.md.metrics"
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.0"
"nf-test": "0.9.2",
"nextflow": "24.10.5"
},
"timestamp": "2023-12-12T17:43:37.798977444"
"timestamp": "2025-03-28T14:12:43.361537975"
},
"multi cram": {
"content": [
"test.cram"
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.0"
},
"timestamp": "2023-12-12T17:43:37.771137858"
},
"bam": {
"content": [
[
[
{
"id": "test",
"single_end": false
},
"test.bam:md5,75d914ba8804eaf2acf02ab432197ec9"
]
]
"test.md.cram"
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.0"
"nf-test": "0.9.2",
"nextflow": "24.10.5"
},
"timestamp": "2024-02-13T15:21:08.645892"
"timestamp": "2025-03-28T14:12:43.332947793"
},
"test.metrics": {
"content": [
"test.bam.metrics"
"test.md.metrics"
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.0"
"nf-test": "0.9.2",
"nextflow": "24.10.5"
},
"timestamp": "2023-12-12T17:42:39.672508385"
"timestamp": "2025-03-28T14:12:15.810908919"
}
}
Loading