Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions modules/nf-core/rmats/prep/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::rmats=4.3.0"
73 changes: 73 additions & 0 deletions modules/nf-core/rmats/prep/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
process RMATS_PREP {
tag "${meta.id}"
label 'process_single'

// TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below.
conda "${moduleDir}/environment.yml"
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
? 'https://depot.galaxyproject.org/singularity/rmats:4.3.0--py311hf2f0b74_5'
: 'biocontainers/rmats:4.3.0--py311hf2f0b74_5'}"

input:
// TODO nf-core: Update the information obtained from bio.tools and make sure that it is correct

tuple val(meta), path(genome_bam)
// TODO - post seems to need only the BAM *names*, not the actual files. Could we just get the first line of each file to get the names?
// for file in `ls multi_bam_rmats_prep_tmp/*.rmats`; do head -1 $file; done | tr '\n' ','
// TODO - for stats, it should be possible to parse the formula using patsy, but if we include PAIRADISE we might have R - just do this in R, first pass
path reference_gtf
val rmats_read_len

output:
// TODO nf-core: Update the information obtained from bio.tools and make sure that it is correct
tuple val(meta), path("*.rmats"), emit: prep_rmats_file
tuple val(meta), path("*outcomes_by_bam.txt"), emit: prep_read_outcomes_file
tuple val("${task.process}"), val('rmats'), eval('rmats.py --version | sed -e "s/v//g"'), emit: versions_rmats, topic: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
// TODO nf-core: Where possible, a command MUST be provided to obtain the version number of the software e.g. 1.10
// If the software is unable to output a version number on the command-line then it can be manually specified
// e.g. https://github.com/nf-core/modules/blob/master/modules/nf-core/homer/annotatepeaks/main.nf
// Each software used MUST provide the software name and version number in the YAML version file (versions.yml)

// --readLength READLENGTH
// The length of each read. Required parameter, with the
// value set according to the RNA-seq read length
// TODO - question. Does this definition mean I should change it by read length? If so, look at a samtools command to figure it out. Samtools stats!
// TODO - should I modify the prefix to include rmats_prep only in a subworkflow via modules.config? It seems so, see example at https://github.com/nf-core/rnaseq/blob/e049f51f0214b2aef7624b9dd496a404a7c34d14/conf/modules.config#L576
"""
echo ${genome_bam} > ${prefix}.prep.b1.txt

rmats.py \\
--task prep \\
${args} \\
--nthread ${task.cpus} \\
--b1 ${prefix}.prep.b1.txt \\
--gtf ${reference_gtf} \\
--readLength ${rmats_read_len} \\
--tmp ${prefix}_rmats_tmp \\
--od ${prefix}_rmats_prep

for file in `ls ${prefix}_rmats_tmp/*`
do
cp \${file} ${prefix}_prep_\$(basename \${file})
done
"""

// NOTES for post - post requires the rmats files to be in the tmp directory, otherwise it fails

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
echo ${args}

touch ${prefix}.rmats
touch ${prefix}_outcomes_by_bam.txt
"""
}
88 changes: 88 additions & 0 deletions modules/nf-core/rmats/prep/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
# # TODO nf-core: Add a description of the module and list keywords
name: "rmats_prep"
description: MATS is a computational tool to detect differential alternative splicing events from RNA-Seq data.
keywords:
- splicing
- RNA-Seq
- alternative splicing
- exon
- intron
- rMATS
tools:
## TODO nf-core: Add a description and other details for the software below
- "rmats":
description: "MATS is a computational tool to detect differential alternative
splicing events from RNA-Seq data."
homepage: "https://github.com/Xinglab/rmats-turbo"
documentation: "https://github.com/Xinglab/rmats-turbo/blob/v4.3.0/README.md"
doi: "10.1038/s41596-023-00944-2"
licence: ["FreeBSD for non-commercial use, see LICENSE file"]
identifier: biotools:rmats

input:
# TODO nf-core: Update the information obtained from bio.tools and make sure that it is correct
- - meta:
type: map
description: Groovy Map containing sample information. e.g. `[ id:'sample1', single_end:false, strandness:'auto']`
- genome_bam:
type: file
description: BAM file aligned to the genome
pattern: "*.{bam}"
ontologies:
- edam: http://edamontology.org/format_2572 # BAM
- reference_gtf:
type: file
description: Annotation GTF file
pattern: "*.{gtf}"
ontologies:
- edam: http://edamontology.org/format_2306 # GTF
- rmats_read_len:
type: integer
description: Read length in bases
output:
# TODO nf-core: Update the information obtained from bio.tools and make sure that it is correct
prep_rmats_file:
- - meta:
type: map
description: Groovy Map containing sample information. e.g. `[ id:'sample1'single_end:false, strandness:'auto']`
- "*.rmats":
type: file
description: text file containing rmats processed splice junctions
pattern: "*.rmats"
ontologies: []
prep_read_outcomes_file:
- - meta:
type: map
description: Groovy Map containing sample information. e.g. `[ id:'sample1'single_end:false, strandness:'auto']`
- "*outcomes_by_bam.txt":
type: file
description: text file containing the numbers of reads for each outcome (USED, NOT_PAIRED, etc.)
pattern: "*outcomes_by_bam.txt"
ontologies:
- edam: http://edamontology.org/format_2330
versions_rmats:
- - ${task.process}:
type: string
description: The name of the process
- rmats:
type: string
description: The name of the tool
- rmats.py --version | sed -e "s/v//g":
type: eval
description: The expression to obtain the version of the tool
topics:
versions:
- - ${task.process}:
type: string
description: The name of the process
- rmats:
type: string
description: The name of the tool
- rmats.py --version | sed -e "s/v//g":
type: eval
description: The expression to obtain the version of the tool
authors:
- "@akaviaLab"
maintainers:
- "@akaviaLab"
2 changes: 2 additions & 0 deletions modules/nf-core/rmats/prep/optional_parameters
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
--variable-read-length
--allow-clipping
74 changes: 74 additions & 0 deletions modules/nf-core/rmats/prep/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// TODO nf-core: Once you have added the required tests, please run the following command to build this file:
// nf-core modules test rmats/prep
nextflow_process {

name "Test Process RMATS_PREP"
script "../main.nf"
process "RMATS_PREP"

tag "modules"
tag "modules_nfcore"
tag "rmats"
tag "rmats/prep"

// TODO nf-core: Change the test name preferably indicating the test-data and file-format used
test("sarscov2 - bam") {

// TODO nf-core: If you are created a test for a chained module
// (the module requires running more than one process to generate the required output)
// add the 'setup' method here.
// You can find more information about how to use a 'setup' method in the docs (https://nf-co.re/docs/contributing/modules#steps-for-creating-nf-test-for-chained-modules).

when {
process {
"""
// TODO nf-core: define inputs of the process here. Example:

input[0] = [
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
//TODO nf-core: Add all required assertions to verify the test output.
// See https://nf-co.re/docs/contributing/tutorials/nf-test_assertions for more information and examples.
)
}

}

// TODO nf-core: Change the test name preferably indicating the test-data and file-format used but keep the " - stub" suffix.
test("sarscov2 - bam - stub") {

options "-stub"

when {
process {
"""
// TODO nf-core: define inputs of the process here. Example:

input[0] = [
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
//TODO nf-core: Add all required assertions to verify the test output.
)
}

}

}
15 changes: 15 additions & 0 deletions modules/nf-core/rmats/prep/tests/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
process {

withName: RMATS_PREP {
ext.args = {[
"--variable-read-length --allow-clipping",
meta.single_end ? '-t single' : '',
meta.strandness == "forward" ? "--libType fr-firststrand" : '',
meta.strandness == "reverse" ? "--libType fr-secondstrand" : '',
params.novel_splice_site ? "--novelSS" : "",
(params.novel_splice_site && params.minimum_intron_length) ? "--mil ${params.minimum_intron_length}" : "",
(params.novel_splice_site && params.max_exon_length) ? "--mel ${params.max_exon_length}" : "",
].join(' ').trim()}
}

}
Loading