Skip to content

Commit 4f31bd8

Browse files
committed
Updating output directives and adding software information
1 parent fac2944 commit 4f31bd8

File tree

29 files changed

+121
-87
lines changed

29 files changed

+121
-87
lines changed

.groovylintrc.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
"design.ImplementationAsType": "off",
1919
"unnecessary.UnnecessaryPublicModifier": "off",
2020
"unnecessary.DuplicateStringLiteral": "off",
21-
"formatting.LineLength": "off",
22-
"convention.ImplicitClosureParameter": "off"
21+
"basic.DeadCode": "off",
22+
"formatting.LineLength": "off"
2323
}
2424
}

conf/modules.config

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
process {
2+
3+
publishDir = [
4+
path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
5+
mode: params.publish_dir_mode,
6+
enabled: true,
7+
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
8+
]
9+
10+
withName: BWAMEM2_MEM {
11+
publishDir = [
12+
path: { "${params.outdir}/bwamem2" },
13+
mode: params.publish_dir_mode,
14+
enabled: false
15+
]
16+
}
17+
withName: CUSTOM_DUMPSOFTWAREVERSIONS {
18+
publishDir = [
19+
path: { "${params.outdir}/custom" },
20+
mode: params.publish_dir_mode,
21+
enabled: false
22+
]
23+
}
24+
withName: FASTP {
25+
publishDir = [
26+
path: { "${params.outdir}/FASTP" },
27+
mode: params.publish_dir_mode,
28+
enabled: false
29+
]
30+
}
31+
withName: PTRIMMER {
32+
publishDir = [
33+
path: { "${params.outdir}/ptrimmer" },
34+
mode: params.publish_dir_mode,
35+
enabled: false
36+
]
37+
}
38+
withName: BIOBLOOMTOOLS_CATEGORIZER {
39+
publishDir = [
40+
path: { "${params.outdir}/biobloom" },
41+
mode: params.publish_dir_mode,
42+
enabled: false
43+
]
44+
}
45+
}

conf/resources.config

+3-3
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ params {
44

55
genomes {
66
tomato {
7-
fasta = "${params.reference_base}/tomato/Solanum_lycopersicum.SL3.0.dna.toplevel.fa"
8-
fai = "${params.reference_base}/tomato/Solanum_lycopersicum.SL3.0.dna.toplevel.fa.fai"
9-
dict = "${params.reference_base}/tomato/sSolanum_lycopersicum.SL3.0.dna.toplevel.dict"
7+
fasta = "${params.reference_base}/gmo-check/tomato/Solanum_lycopersicum.SL3.0.dna.toplevel.fa"
8+
fai = "${params.reference_base}/gmo-check/tomato/Solanum_lycopersicum.SL3.0.dna.toplevel.fa.fai"
9+
dict = "${params.reference_base}/gmo-check/tomato/sSolanum_lycopersicum.SL3.0.dna.toplevel.dict"
1010
amplicon_txt = "${baseDir}/assets/genomes/tomato/amplicon.txt"
1111
bed = "${baseDir}/assets/genomes/tomato/primers.bed"
1212
target_bed = "${baseDir}/assets/genomes/tomato/targets.bed"

docs/installation.md

+5-31
Original file line numberDiff line numberDiff line change
@@ -16,40 +16,14 @@ This pipeline requires locally stored genomes in fasta format. To build these, d
1616
nextflow run marchoeppner/gmo-check -profile standard,singularity --build_references --run_name build_refs --outdir /path/to/references
1717
```
1818

19-
If you do not have singularity on your system, you can also specify docker, podman or conda for software provisioning - see the [usage information](usage.md).
19+
where `/path/to/references` could be something like `/data/pipelines/references` or whatever is most appropriate on your system.
2020

21-
The path specified with `--outdir` can then be given to the pipeline during normal execution as `--reference_base`.
21+
If you do not have singularity on your system, you can also specify docker, podman or conda for software provisioning - see the [usage information](usage.md).
2222

23+
The path specified with `--outdir` can then be given to the pipeline during normal execution as `--reference_base`. Please note that the build process will create a pipeline-specific subfolder (`gmo-check`) that must not be given as part of the `--outdir` argument. Gmo-check is part of a collection of pipelines that use a shared reference directory and it will choose the appropriate subfolder by itself.
2324

2425
## Site-specific config file
2526

26-
This pipeline requires a site-specific configuration file to be able to talk to your local cluster or compute infrastructure. Nextflow supports a wide
27-
range of such infrastructures, including Slurm, LSF and SGE - but also Kubernetes and AWS. For more information, see [here](https://www.nextflow.io/docs/latest/executor.html).
28-
29-
Please see conf/lsh.config for an example of how to configure this pipeline for a Slurm queue.
30-
31-
All software is provided through either Conda environments or Docker containers. Consider a Docker-compatible container engine if at all possible (Docker, Singularity, Podman). Conda environments are built on the fly during pipeline execution and only for a given pipeline run, which tends to slow things down quite a bit. Details on how to specify singularity as your container engine are provided in the config file for our lsh system (lsh.config).
32-
33-
With this information in place, you will next have to create an new site-specific profile for your local environment in `nextflow.config` using the following format:
34-
35-
```
36-
37-
profiles {
38-
39-
your_profile {
40-
includeConfig 'conf/base.config'
41-
includeConfig 'conf/your_cluster.config'
42-
includeConfig 'conf/resources.config'
43-
}
44-
}
45-
46-
```
47-
48-
This would add a new profile, called `your_profile` which uses (and expects) conda to provide all software.
49-
50-
`base.config` Basic settings about resource usage for the individual pipeline stages.
51-
52-
`resources.config` Gives information about the files that are to be used during analysis for the individual human genome assemblies.
53-
54-
`your_cluster.config` Specifies which sort of resource manager to use and where to find e.g. local resources cluster file system (see below).
27+
If you run on anything other than a local system, this pipeline requires a site-specific configuration file to be able to talk to your cluster or compute infrastructure. Nextflow supports a wide range of such infrastructures, including Slurm, LSF and SGE - but also Kubernetes and AWS. For more information, see [here](https://www.nextflow.io/docs/latest/executor.html).
5528

29+
Site-specific config-files for our pipeline ecosystem are stored centrally on [github](https://github.com/marchoeppner/configs). Please talk to us if you want to add your system

docs/pipeline.md

+2
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
11
# Pipeline structure
2+
3+
![](images/pipeline_dag.png)

docs/software.md

+19-1
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,22 @@ Version 0.24, doi: 10.1093/bioinformatics/bty560, [PubMed](https://pubmed.ncbi.n
77
Version 1.19, doi: 10.1093/bioinformatics/btw354, [PubMed](https://pubmed.ncbi.nlm.nih.gov/27312411/) [github](https://github.com/MultiQC/MultiQC)
88

99
**Samtools**
10-
Version 1.19, doi: 10.1093/bioinformatics/btp352, [PubMed](https://pubmed.ncbi.nlm.nih.gov/19505943/) [github](https://github.com/samtools/samtools)
10+
Version 1.19, doi: 10.1093/bioinformatics/btp352, [PubMed](https://pubmed.ncbi.nlm.nih.gov/19505943/) [github](https://github.com/samtools/samtools)
11+
12+
**Vsearch**
13+
Version 2.27.0, doi: 10.7717/peerj.2584, [PubMed](https://pubmed.ncbi.nlm.nih.gov/27781170/) [github](https://github.com/torognes/vsearch)
14+
15+
**Ptrimmer**
16+
Version 1.3.3, doi: 10.1186/s12859-019-2854-x, [PubMed](https://pubmed.ncbi.nlm.nih.gov/31077131/) [github](https://github.com/DMU-lilab/pTrimmer)
17+
18+
**Bwa-mem2**
19+
Version 2.2.1, doi: 10.1109/IPDPS.2019.00041, [IEEE Explore](https://ieeexplore.ieee.org/document/8820962) [github](https://github.com/bwa-mem2/bwa-mem2)
20+
21+
**Freebayes**
22+
Version 1.3.6, [ArXiv](http://arxiv.org/abs/1207.3907) [github](https://github.com/freebayes/freebayes)
23+
24+
**Blast**
25+
Version 2.15, doi: 10.1016/S0022-2836(05)80360-2, [PubMed](https://pubmed.ncbi.nlm.nih.gov/2231712/) [NCBI] https://blast.ncbi.nlm.nih.gov/doc/blast-help/downloadblastdata.html
26+
27+
**Bedtools**
28+
Version 2.31.1, doi: 10.1093/bioinformatics/btq033, [PubMed](https://pubmed.ncbi.nlm.nih.gov/20110278/) [github](https://github.com/arq5x/bedtools2)

images/pipeline_dag.png

Loading

lib/WorkflowPipeline.groovy

+4-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ class WorkflowPipeline {
1616
log.info 'Cannot run the alignment workflow without genome references (--reference_base). Please check the documentation!'
1717
System.exit(1)
1818
}
19+
if ( !params.input && !params.build_references) {
20+
log.info "This pipeline requires a sample sheet as input (--input)"
21+
System.exit(1)
22+
}
1923
}
20-
2124
}

main.nf

+6-8
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,15 @@
22

33
nextflow.enable.dsl = 2
44

5-
// TODO: Update this block with a description and the name of the pipeline
65
/**
76
===============================
8-
Pipeline
7+
GMO-check Pipeline
98
===============================
109
11-
This Pipeline performs ....
10+
This Pipeline performs detection of genetic events in food and seed material(s) (GMO analysis).
1211
1312
### Homepage / git
14-
[email protected]:marchoeppner/pipeline.git
13+
[email protected]:marchoeppner/gmo-check.git
1514
1615
**/
1716

@@ -24,7 +23,6 @@ run_name = (params.run_name == false) ? "${workflow.sessionId}" : "${params.run_
2423

2524
WorkflowMain.initialise(workflow, params, log)
2625

27-
// TODO: Rename this and the file under lib/ to something matching this pipeline (e.g. WorkflowAmplicons)
2826
WorkflowPipeline.initialise(params, log)
2927

3028
include { GMO } from './workflows/gmo'
@@ -48,9 +46,9 @@ workflow.onComplete {
4846
log.info "Duration: $workflow.duration"
4947
log.info hline
5048

51-
summary["BlastDB"] = params.blastdb
52-
summary["Freebayes_min_alt_frac"] = params.freebayes_min_alternate_frac
53-
summary["Freebayes_min_alt_count"] = params.freebayes_min_alternate_count
49+
summary['BlastDB'] = params.blastdb
50+
summary['Freebayes_min_alt_frac'] = params.freebayes_min_alternate_frac
51+
summary['Freebayes_min_alt_count'] = params.freebayes_min_alternate_count
5452

5553
emailFields = [:]
5654
emailFields['version'] = workflow.manifest.version

modules/bedtools/coverage/main.nf

+3-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
process BEDTOOLS_COVERAGE {
2-
publishDir "${params.outdir}/${meta.sample_id}/BEDTOOLS", mode: 'copy'
32

43
label 'short_parallel'
54

@@ -11,16 +10,16 @@ process BEDTOOLS_COVERAGE {
1110
'quay.io/biocontainers/bedtools:2.31.1--hf5e1c6e_0' }"
1211

1312
input:
14-
tuple val(meta),path(bam),path(bai)
13+
tuple val(meta), path(bam), path(bai)
1514
path(bed)
1615

1716
output:
1817
tuple val(meta), path(coverage), emit: report
1918
path('versions.yml'), emit: versions
2019

2120
script:
22-
coverage = meta.sample_id + ".bedcov.txt"
23-
21+
coverage = meta.sample_id + '.bedcov.txt'
22+
2423
"""
2524
coverageBed -a $bed -b $bam > $coverage
2625

modules/biobloomtools/categorizer/main.nf

-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
process BIOBLOOMTOOLS_CATEGORIZER {
2-
publishDir "${params.outdir}/Processing/Bloomfilter", mode: 'copy'
32

43
label 'short_parallel'
54

modules/blast/blastn/main.nf

-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
process BLAST_BLASTN {
2-
publishDir "${params.outdir}/Processing/BlastN", mode: 'copy'
32

43
label 'short_parallel'
54

modules/blast/makeblastdb/main.nf

-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
process BLAST_MAKEBLASTDB {
22
tag "$fasta"
33

4-
publishDir "${params.outdir}/Processing/BlastDB", mode: 'copy'
5-
64
label 'short_parallel'
75

86
conda 'bioconda::blast=2.15'

modules/bwamem2/index/main.nf

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ process BWAMEM2_INDEX {
66
conda 'bioconda::samtools=1.19.2 bioconda::bwa-mem2=2.2.1'
77
container 'quay.io/biocontainers/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2cdf6bf1e92acbeb9b2834b1c58754167173a410-0'
88

9-
publishDir "${params.outdir}/${meta.id}", mode: 'copy'
9+
publishDir "${params.outdir}/gmo-check/${meta.id}", mode: 'copy'
1010

1111
input:
1212
tuple val(meta), path(fasta)

modules/freebayes/main.nf

-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
process FREEBAYES {
22
tag "${meta.sample_id}"
33

4-
publishDir "${params.outdir}/Processing/Freebayes", mode: 'copy'
5-
64
label 'medium_serial'
75

86
conda 'bioconda::freebayes=1.3.6'

modules/gunzip/main.nf

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ process GUNZIP {
33

44
label 'medium_serial'
55

6-
publishDir "${params.outdir}/${meta.id}", mode: 'copy'
6+
publishDir "${params.outdir}/gmo-check/${meta.id}", mode: 'copy'
77

88
conda 'sed=4.7'
99
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?

modules/helper/rules_to_bed.nf

+2-4
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,14 @@
11
process RULES_TO_BED {
2-
32
input:
43
path(json)
54

65
output:
76
path(bed), emit: bed
87

98
script:
10-
bed = "rules.txt"
9+
bed = 'rules.txt'
1110

1211
"""
1312
rules_to_bed.rb --json $json > $bed
1413
"""
15-
16-
}
14+
}

modules/helper/vcf_to_report.nf

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ process VCF_TO_REPORT {
44
publishDir "${params.outdir}/Reports/JSON", mode: 'copy'
55

66
input:
7-
tuple val(meta),path(vcf),path(coverage)
7+
tuple val(meta), path(vcf), path(coverage)
88
path(rules)
99

1010
output:

modules/samtools/ampliconclip/main.nf

-2
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@ process SAMTOOLS_AMPLICONCLIP {
66

77
tag "${meta.sample_id}"
88

9-
publishDir "${params.outdir}/${meta.sample_id}/BWA2", mode: 'copy'
10-
119
input:
1210
tuple val(meta), path(bam), path(bai)
1311
path(bed)

modules/samtools/dict/main.nf

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ process SAMTOOLS_DICT {
66
'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' :
77
'quay.io/biocontainers/samtools:1.19.2--h50ea8bc_0' }"
88

9-
publishDir "${params.outdir}/${meta.id}", mode: 'copy'
9+
publishDir "${params.outdir}/gmo-check/${meta.id}", mode: 'copy'
1010

1111
input:
1212
tuple val(meta), path(fasta)

modules/samtools/faidx/main.nf

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ process SAMTOOLS_FAIDX {
88
'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' :
99
'quay.io/biocontainers/samtools:1.19.2--h50ea8bc_0' }"
1010

11-
publishDir "${params.outdir}/${meta.id}", mode: 'copy'
11+
publishDir "${params.outdir}/gmo-check/${meta.id}", mode: 'copy'
1212

1313
input:
1414
tuple val(meta), path(fasta)

modules/samtools/index/main.nf

-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
process SAMTOOLS_INDEX {
2-
publishDir "${params.outdir}/${meta.sample_id}/BWA2", mode: 'copy'
32

43
conda 'bioconda::samtools=1.19.2'
54
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?

modules/samtools/markdup/main.nf

-2
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@ process SAMTOOLS_MARKDUP {
88

99
tag "${meta.sample_id}"
1010

11-
publishDir "${params.outdir}/${meta.sample_id}/", mode: 'copy'
12-
1311
input:
1412
tuple val(meta), path(merged_bam), path(merged_bam_index)
1513

modules/vsearch/fastqfilter/main.nf

-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
process VSEARCH_FASTQFILTER {
22
tag "${meta.sample_id}"
33

4-
publishDir "${params.outdir}/${meta.sample_id}/VSEARCH", mode: 'copy'
5-
64
label 'short_serial'
75

86
conda 'bioconda::vsearch=2.27.0'

modules/vsearch/fastqmerge/main.nf

-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
process VSEARCH_FASTQMERGE {
22
tag "${meta.sample_id}"
33

4-
publishDir "${params.outdir}/${meta.sample_id}/VSEARCH", mode: 'copy'
5-
64
label 'short_serial'
75

86
conda 'bioconda::vsearch=2.27.0'

modules/vsearch/fastxuniques/main.nf

-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
process VSEARCH_FASTXUNIQUES {
22
tag "${meta.sample_id}"
33

4-
publishDir "${params.outdir}/${meta.sample_id}/VSEARCH", mode: 'copy'
5-
64
label 'short_serial'
75

86
conda 'bioconda::vsearch=2.27.0'

0 commit comments

Comments
 (0)