Skip to content

new module starfusion build #8107

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Apr 24, 2025
108 changes: 57 additions & 51 deletions modules/nf-core/abacas/meta.yml
Original file line number Diff line number Diff line change
@@ -1,59 +1,65 @@
name: abacas
description: contiguate draft genome assembly
keywords:
- genome
- assembly
- contiguate
- genome
- assembly
- contiguate
tools:
- abacas:
description: |
ABACAS is intended to rapidly contiguate (align, order, orientate),
visualize and design primers to close gaps on shotgun assembled
contigs based on a reference sequence.
homepage: http://abacas.sourceforge.net/documentation.html
documentation: http://abacas.sourceforge.net/documentation.html
doi: "10.1093/bioinformatics/btp347"
licence: ["GPL v2-or-later"]
identifier: biotools:abacas
- abacas:
description: |
ABACAS is intended to rapidly contiguate (align, order, orientate),
visualize and design primers to close gaps on shotgun assembled
contigs based on a reference sequence.
homepage: http://abacas.sourceforge.net/documentation.html
documentation: http://abacas.sourceforge.net/documentation.html
doi: "10.1093/bioinformatics/btp347"
licence: ["GPL v2-or-later"]
identifier: biotools:abacas
input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- scaffold:
type: file
description: Fasta file containing scaffold
pattern: "*.{fasta,fa}"
- - fasta:
type: file
description: FASTA reference file
pattern: "*.{fasta,fa}"
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- scaffold:
type: file
description: Fasta file containing scaffold
pattern: "*.{fasta,fa}"
ontologies:
- edam: http://edamontology.org/format_1929 # FASTA
- - fasta:
type: file
description: FASTA reference file
pattern: "*.{fasta,fa}"
ontologies:
- edam: http://edamontology.org/format_1929 # FASTA
output:
- results:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- "*.abacas*":
type: file
description: |
List containing abacas output files
[ 'test.abacas.bin', 'test.abacas.fasta', 'test.abacas.gaps',
'test.abacas.gaps.tab', 'test.abacas.nucmer.delta',
'test.abacas.nucmer.filtered.delta', 'test.abacas.nucmer.tiling',
'test.abacas.tab', 'test.abacas.unused.contigs.out',
'test.abacas.MULTIFASTA.fa' ]
pattern: "*.{abacas}*"
- versions:
- versions.yml:
type: file
description: File containing software versions
pattern: "versions.yml"
- results:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- "*.abacas*":
type: file
description: |
List containing abacas output files
[ 'test.abacas.bin', 'test.abacas.fasta', 'test.abacas.gaps',
'test.abacas.gaps.tab', 'test.abacas.nucmer.delta',
'test.abacas.nucmer.filtered.delta', 'test.abacas.nucmer.tiling',
'test.abacas.tab', 'test.abacas.unused.contigs.out',
'test.abacas.MULTIFASTA.fa' ]
pattern: "*.{abacas}*"
ontologies: []
- versions:
- versions.yml:
type: file
description: File containing software versions
pattern: "versions.yml"
ontologies: []
authors:
- "@joseespinosa"
- "@drpatelh"
- "@joseespinosa"
- "@drpatelh"
maintainers:
- "@joseespinosa"
- "@drpatelh"
- "@joseespinosa"
- "@drpatelh"
8 changes: 8 additions & 0 deletions modules/nf-core/starfusion/build/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
channels:
- conda-forge
- bioconda
dependencies:
- bioconda::dfam=3.7
- bioconda::hmmer=3.4
- bioconda::minimap2=2.28
- bioconda::star-fusion=1.14.0
135 changes: 135 additions & 0 deletions modules/nf-core/starfusion/build/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
process STARFUSION_BUILD {
tag "$meta.id"
label 'process_high'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/be/bed86145102fdf7e381e1a506a4723676f98b4bbe1db5085d02213cef18525c9/data' :
'community.wave.seqera.io/library/dfam_hmmer_minimap2_star-fusion:aa3a8e3951498552'}"

input:
tuple val(meta), path(fasta)
tuple val(meta2), path(gtf)
path fusion_annot_lib
val dfam_species

output:
tuple val(meta), path("${prefix}_genome_lib_build_dir"), emit: reference
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
"""
prep_genome_lib.pl \\
--genome_fa $fasta \\
--gtf $gtf \\
--dfam_db ${dfam_species} \\
--pfam_db current \\
--fusion_annot_lib $fusion_annot_lib \\
--CPU $task.cpus \\
--output_dir ${prefix}_genome_lib_build_dir \\
${args}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
STAR-Fusion: \$(STAR-Fusion --version 2>&1 | grep -i 'version' | sed 's/STAR-Fusion version: //')
END_VERSIONS
"""

stub:
prefix = task.ext.prefix ?: "${meta.id}"
"""
mkdir -p ${prefix}_genome_lib_build_dir

touch ${prefix}_genome_lib_build_dir/AnnotFilterRule.pm
echo | gzip > ${prefix}_genome_lib_build_dir/blast_pairs.dat.gz
touch ${prefix}_genome_lib_build_dir/blast_pairs.idx

mkdir -p ${prefix}_genome_lib_build_dir/__chkpts
touch ${prefix}_genome_lib_build_dir/__chkpts/annotfiltrule_cp.ok
touch ${prefix}_genome_lib_build_dir/__chkpts/blast_pairs.idx.ok
touch ${prefix}_genome_lib_build_dir/__chkpts/cp_gene_blast_pairs.ok
touch ${prefix}_genome_lib_build_dir/__chkpts/cp_pfam_dat.ok
touch ${prefix}_genome_lib_build_dir/__chkpts/cp_ref_annot_cdna.ok
touch ${prefix}_genome_lib_build_dir/__chkpts/fusion_annot_lib.cp.ok
touch ${prefix}_genome_lib_build_dir/__chkpts/_fusion_annot_lib.idx.ok
touch ${prefix}_genome_lib_build_dir/__chkpts/index_pfam_hits.ok
touch ${prefix}_genome_lib_build_dir/__chkpts/index_ref_annot_cdna.ok
touch ${prefix}_genome_lib_build_dir/__chkpts/makeblastdb.ok
touch ${prefix}_genome_lib_build_dir/__chkpts/mm2_genome_idx.ok
touch ${prefix}_genome_lib_build_dir/__chkpts/mm2.splice_bed.ok
touch ${prefix}_genome_lib_build_dir/__chkpts/_prot_info_db.ok
touch ${prefix}_genome_lib_build_dir/__chkpts/ref_annot.cdsplus.dfam_masked.fa.cp.ok
touch ${prefix}_genome_lib_build_dir/__chkpts/ref_annot.cdsplus.dfam_masked.fa.idx.ok
touch ${prefix}_genome_lib_build_dir/__chkpts/ref_annot.gtf.gene_spans.ok
touch ${prefix}_genome_lib_build_dir/__chkpts/ref_annot.gtf.mini.sortu.ok
touch ${prefix}_genome_lib_build_dir/__chkpts/ref_annot.gtf.ok
touch ${prefix}_genome_lib_build_dir/__chkpts/ref_genome_fai.ok
touch ${prefix}_genome_lib_build_dir/__chkpts/ref_genome.fa.ok
touch ${prefix}_genome_lib_build_dir/__chkpts/trans.blast.dat.cp.ok
touch ${prefix}_genome_lib_build_dir/__chkpts/trans.blast.dat.index.ok
touch ${prefix}_genome_lib_build_dir/__chkpts/validate_ctat_genome_lib.ok

echo | gzip > ${prefix}_genome_lib_build_dir/fusion_annot_lib.gz
touch ${prefix}_genome_lib_build_dir/fusion_annot_lib.idx
touch ${prefix}_genome_lib_build_dir/pfam_domains.dbm
echo | gzip > ${prefix}_genome_lib_build_dir/PFAM.domtblout.dat.gz

touch ${prefix}_genome_lib_build_dir/ref_annot.cdna.fa
touch ${prefix}_genome_lib_build_dir/ref_annot.cdna.fa.idx
touch ${prefix}_genome_lib_build_dir/ref_annot.cds
touch ${prefix}_genome_lib_build_dir/ref_annot.cdsplus.fa
touch ${prefix}_genome_lib_build_dir/ref_annot.cdsplus.fa.idx
touch ${prefix}_genome_lib_build_dir/ref_annot.gtf
touch ${prefix}_genome_lib_build_dir/ref_annot.gtf.gene_spans
touch ${prefix}_genome_lib_build_dir/ref_annot.gtf.mini.sortu
touch ${prefix}_genome_lib_build_dir/ref_annot.gtf.mm2.splice.bed
touch ${prefix}_genome_lib_build_dir/ref_annot.pep
touch ${prefix}_genome_lib_build_dir/ref_annot.prot_info.dbm

touch ${prefix}_genome_lib_build_dir/ref_genome.fa
touch ${prefix}_genome_lib_build_dir/ref_genome.fa.fai
touch ${prefix}_genome_lib_build_dir/ref_genome.fa.mm2
touch ${prefix}_genome_lib_build_dir/ref_genome.fa.ndb
touch ${prefix}_genome_lib_build_dir/ref_genome.fa.nhr
touch ${prefix}_genome_lib_build_dir/ref_genome.fa.nin
touch ${prefix}_genome_lib_build_dir/ref_genome.fa.njs
touch ${prefix}_genome_lib_build_dir/ref_genome.fa.not
touch ${prefix}_genome_lib_build_dir/ref_genome.fa.nsq
touch ${prefix}_genome_lib_build_dir/ref_genome.fa.ntf
touch ${prefix}_genome_lib_build_dir/ref_genome.fa.nto

mkdir -p ${prefix}_genome_lib_build_dir/ref_genome.fa.star.idx
touch ${prefix}_genome_lib_build_dir/ref_genome.fa.star.idx/build.ok
touch ${prefix}_genome_lib_build_dir/ref_genome.fa.star.idx/chrLength.txt
touch ${prefix}_genome_lib_build_dir/ref_genome.fa.star.idx/chrNameLength.txt
touch ${prefix}_genome_lib_build_dir/ref_genome.fa.star.idx/chrName.txt
touch ${prefix}_genome_lib_build_dir/ref_genome.fa.star.idx/chrStart.txt
touch ${prefix}_genome_lib_build_dir/ref_genome.fa.star.idx/exonGeTrInfo.tab
touch ${prefix}_genome_lib_build_dir/ref_genome.fa.star.idx/exonInfo.tab
touch ${prefix}_genome_lib_build_dir/ref_genome.fa.star.idx/geneInfo.tab
touch ${prefix}_genome_lib_build_dir/ref_genome.fa.star.idx/Genome
touch ${prefix}_genome_lib_build_dir/ref_genome.fa.star.idx/genomeParameters.txt
touch ${prefix}_genome_lib_build_dir/ref_genome.fa.star.idx/Log.out
touch ${prefix}_genome_lib_build_dir/ref_genome.fa.star.idx/SA
touch ${prefix}_genome_lib_build_dir/ref_genome.fa.star.idx/SAindex
touch ${prefix}_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbInfo.txt
touch ${prefix}_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbList.fromGTF.out.tab
touch ${prefix}_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbList.out.tab
touch ${prefix}_genome_lib_build_dir/ref_genome.fa.star.idx/transcriptInfo.tab

touch ${prefix}_genome_lib_build_dir/trans.blast.align_coords.align_coords.dat
touch ${prefix}_genome_lib_build_dir/trans.blast.align_coords.align_coords.dbm
echo | gzip > ${prefix}_genome_lib_build_dir/trans.blast.dat.gz

cat <<-END_VERSIONS > versions.yml
"${task.process}":
STAR-Fusion: \$(STAR-Fusion --version 2>&1 | grep -i 'version' | sed 's/STAR-Fusion version: //')
END_VERSIONS
"""

}
74 changes: 74 additions & 0 deletions modules/nf-core/starfusion/build/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
name: starfusion_build
description: Download STAR-fusion genome resource required to run STAR-Fusion caller
keywords:
- download
- starfusion
- build

tools:
- star-fusion:
description: Fusion calling algorithm for RNAseq data
homepage: https://github.com/STAR-Fusion/
documentation: https://github.com/STAR-Fusion/STAR-Fusion/wiki/installing-star-fusion
tool_dev_url: https://github.com/STAR-Fusion/STAR-Fusion
doi: "10.1186/s13059-019-1842-9"
licence: ["GPL v3"]
identifier: ""

input:
- - meta:
type: map
description: Metadata map
required: true
- fasta:
type: file
description: Input FASTA file
pattern: "*.{fa,fasta}"
required: true
ontologies:
- edam: http://edamontology.org/format_1929 # FASTA
- - meta2:
type: map
description: Second metadata map
required: true
- gtf:
type: file
description: Input GTF (Gene Transfer Format) file
pattern: "*.gtf"
required: true
ontologies:
- edam: "http://edamontology.org/format_2306" # GTF
- - fusion_annot_lib:
type: file
description: Fusion annotation library file containing known fusion genes
required: true
ontologies:
- edam: "http://edamontology.org/topic_0203" # Gene fusion
- - dfam_species:
type: string
description: Dfam species name
output:
- reference:
- meta:
type: map
description: Metadata map
- ${prefix}_genome_lib_build_dir:
type: directory
description: Genome library build directory
pattern: "*_genome_lib_build_dir"
- versions:
- versions.yml:
type: file
description: File containing software versions
pattern: "versions.yml"
ontologies:
- edam: http://edamontology.org/format_3750 # YAML
authors:
- "@praveenraj2018"
- "@martings"
- "@alanmmobbs93"
- "@delfiterradas"
- "@sofiromano"

maintainers:
- "@praveenraj2018"
Loading
Loading