Skip to content

add gcta/gsmr tool #8140

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 16 commits into
base: master
Choose a base branch
from
Open
7 changes: 7 additions & 0 deletions modules/nf-core/gcta/gsmr/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::gcta=1.94.1"
60 changes: 60 additions & 0 deletions modules/nf-core/gcta/gsmr/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
process GCTA_GSMR {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/gcta:1.94.1--h9ee0642_0':
'biocontainers/gcta:1.94.1--h9ee0642_0' }"

input:
tuple val(meta) , path(exposure)
tuple val(meta2), path(outcome)
path(reference)

output:
tuple val(meta), val(meta2), path("*.log") , emit: log
tuple val(meta), val(meta2), path("*.gsmr") , emit: gsmr
tuple val(meta), val(meta2), path("*.eff_plot.gz") , emit: eff_plot, optional: true
tuple val(meta), val(meta2), path("*.mono.badsnsps"), emit: mono_badsnps, optional: true
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}_${meta2.id}"
"""
echo "${meta.id} ${exposure}" > ${meta.id}.input.txt
echo "${meta2.id} ${outcome}" > outcome.txt
file=\$(ls $reference | sed 's/\\.[^.]*\$//')
echo "${reference}/\$file" | head -n1 > reference.txt

gcta \\
$args \\
--mbfile reference.txt \\
--gsmr-file ${meta.id}.input.txt outcome.txt \\
--out "${prefix}"

cat <<-END_VERSIONS > versions.yml
"${task.process}":
gcta: \$(gcta 2>&1 | awk '/no analysis has been launched/ {exit 0} {print}' | sed -n 's/.*version \\(v[0-9.]*\\).*/\\1/p')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}_${meta2.id}"
"""
touch ${prefix}.log
touch ${prefix}.gsmr
touch ${prefix}.mono.badsnps
echo "" | gzip > ${prefix}.eff_plot.gz

cat <<-END_VERSIONS > versions.yml
"${task.process}":
gcta: \$(gcta 2>&1 | awk '/no analysis has been launched/ {exit 0} {print}' | sed -n 's/.*version \\(v[0-9.]*\\).*/\\1/p')
END_VERSIONS
"""
}
109 changes: 109 additions & 0 deletions modules/nf-core/gcta/gsmr/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
name: gcta_gsmr
description: Module that performs a Mendelian Randomization analysis with summary
data and plink bfile.
keywords:
- Mendelian Randomization
- GWAS
- gsmr
tools:
- gcta:
description: |
GCTA (Genome-wide Complex Trait Analysis) estimates the proportion of phenotypic variance explained
by all genome-wide SNPs for complex traits.
homepage: https://yanglab.westlake.edu.cn/software/gcta
documentation: https://yanglab.westlake.edu.cn/software/gcta/
tool_dev_url: https://github.com/jianyangqt/gcta
doi: 10.1038/s41467-017-02317-2
licence: ["GPL v3"]
identifier: biotools:gcta
input:
- - meta:
type: map
description: |
Groovy Map containing sample information for exposure
e.g. [ id:'sample1' ]
- exposure:
type: file
description: Exposure data file for GSMR analysis
pattern: "*.txt"
ontologies: []
- - meta2:
type: map
description: |
Groovy Map containing sample information for outcome
e.g. [ id:'sample2' ]
- outcome:
type: file
description: Outcome data file for GSMR analysis
pattern: "*.txt"
ontologies: []
- - reference:
type: file
description: Reference files (BED, BIM, FAM) for GCTA analysis
pattern: "*.{bed,bim,fam}"
multiple: true
ontologies: []
output:
- log:
- meta:
type: map
description: |
Groovy Map containing sample information for exposure
- meta2:
type: map
description: |
Groovy Map containing sample information for outcome
- "*.log":
type: file
description: Log file from GSMR analysis
pattern: "*.log"
- gsmr:
- meta:
type: map
description: |
Groovy Map containing sample information for exposure
- meta2:
type: map
description: |
Groovy Map containing sample information for outcome
- "*.gsmr":
type: file
description: Result file from GSMR analysis
pattern: "*.gsmr"
- eff_plot:
- meta:
type: map
description: |
Groovy Map containing sample information for exposure
- meta2:
type: map
description: |
Groovy Map containing sample information for outcome
- "*.eff_plot.gz":
type: file
description: Effect plot file from GSMR analysis (gzipped)
pattern: "*.eff_plot.gz"
optional: true
- mono_badsnps:
- meta:
type: map
description: |
Groovy Map containing sample information for exposure
- meta2:
type: map
description: |
Groovy Map containing sample information for outcome
- "*.mono.badsnsps":
type: file
description: Bad SNPs file from GSMR analysis
pattern: "*.mono.badsnps"
optional: true
- versions:
- versions.yml:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@juliaapolonio"
maintainers:
- "@juliaapolonio"
104 changes: 104 additions & 0 deletions modules/nf-core/gcta/gsmr/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
nextflow_process {

name "Test Process GCTA_GSMR"
script "../main.nf"
process "GCTA_GSMR"

tag "modules"
tag "modules_nfcore"
tag "gcta"
tag "gcta/gsmr"

test("homo_sapiens - gsmr") {

when {
process {
"""
input[0] = [
[ id:'test_exposure' ],
file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/gsmr/sumstats.tsv")
]
input[1] = [
[ id:'test_outcome' ],
file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/gsmr/sumstats_copy.tsv")
]

// Stage individual files
def bedFile = file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.bed")
def bimFile = file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.bim")
def famFile = file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.fam")

// Create a folder and move files into it
def referenceDir = file("reference")
referenceDir.mkdirs()
bedFile.copyTo(referenceDir.resolve("reference.bed"))
bimFile.copyTo(referenceDir.resolve("reference.bim"))
famFile.copyTo(referenceDir.resolve("reference.fam"))

// Pass the folder as input
input[2] = referenceDir
"""
}
}

then {
assert process.success
assert snapshot(
process.out.gsmr,
process.out.eff_plot,
process.out.mono_badsnps,
process.out.versions
).match()
assert path(process.out.versions.get(0)).text.contains("GSMR")
Comment on lines +46 to +52
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
assert snapshot(
process.out.gsmr,
process.out.eff_plot,
process.out.mono_badsnps,
process.out.versions
).match()
assert path(process.out.versions.get(0)).text.contains("GSMR")
assert snapshot(process.out).match()

That should work or is the md5sum of the log instable?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes the log is instable.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok then still this line is not neceassry :)
assert path(process.out.versions.get(0)).text.contains("GSMR")

}

}

test("homo_sapiens - gsmr - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[ id:'test_exposure' ],
file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/gsmr/sumstats.tsv")
]
input[1] = [
[ id:'test_outcome' ],
file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/gsmr/sumstats_copy.tsv")
]

// Stage individual files
def bedFile = file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.bed")
def bimFile = file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.bim")
def famFile = file("https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/modules/data/genomics/homo_sapiens/gsmr/bfile/bfile.fam")

// Create a folder and move files into it
def referenceDir = file("reference")
referenceDir.mkdirs()
bedFile.copyTo(referenceDir.resolve("reference.bed"))
bimFile.copyTo(referenceDir.resolve("reference.bim"))
famFile.copyTo(referenceDir.resolve("reference.fam"))

// Pass the folder as input
input[2] = referenceDir
"""
}
}

then {
assert process.success
assert snapshot(
process.out.gsmr,
process.out.eff_plot,
process.out.mono_badsnps,
process.out.versions
).match()

}

}

}
44 changes: 44 additions & 0 deletions modules/nf-core/gcta/gsmr/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{
"homo_sapiens - gsmr": {
"content": [
[
"test_exposure_test_outcome.gsmr:md5,f137c763773522ea37849d919f6de9aa"
],
[

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it expected that there is no eff_plot?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, gsmr only emits the eff_plot when there are significant IVs which is not the case for this test data.

],
[
"test_exposure_test_outcome.mono.badsnps:md5,0fe7dcfd9384c40361e7009ed28098cc"
],
[
"versions.yml:md5,9d63b9cb692159dbd3c08b55eca5212b"
]
],
"meta": {
"nf-test": "0.9.2",
"nextflow": "24.10.5"
},
"timestamp": "2025-03-26T16:57:12.009292265"
},
"homo_sapiens - gsmr - stub": {
"content": [
[
"test_exposure_test_outcome.gsmr:md5,d41d8cd98f00b204e9800998ecf8427e"
],
[
"test_exposure_test_outcome.eff_plot.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
],
[
"test_exposure_test_outcome.mono.badsnps:md5,d41d8cd98f00b204e9800998ecf8427e"
],
[
"versions.yml:md5,9d63b9cb692159dbd3c08b55eca5212b"
]
],
"meta": {
"nf-test": "0.9.2",
"nextflow": "24.10.5"
},
"timestamp": "2025-03-26T16:57:20.670192583"
}
}
Loading