diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
new file mode 100644
index 00000000..ea27a584
--- /dev/null
+++ b/.devcontainer/devcontainer.json
@@ -0,0 +1,27 @@
+{
+ "name": "nfcore",
+ "image": "nfcore/gitpod:latest",
+ "remoteUser": "gitpod",
+
+ // Configure tool-specific properties.
+ "customizations": {
+ // Configure properties specific to VS Code.
+ "vscode": {
+ // Set *default* container specific settings.json values on container create.
+ "settings": {
+ "python.defaultInterpreterPath": "/opt/conda/bin/python",
+ "python.linting.enabled": true,
+ "python.linting.pylintEnabled": true,
+ "python.formatting.autopep8Path": "/opt/conda/bin/autopep8",
+ "python.formatting.yapfPath": "/opt/conda/bin/yapf",
+ "python.linting.flake8Path": "/opt/conda/bin/flake8",
+ "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle",
+ "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle",
+ "python.linting.pylintPath": "/opt/conda/bin/pylint"
+ },
+
+ // Add the IDs of extensions you want installed when the container is created.
+ "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"]
+ }
+ }
+}
diff --git a/.gitattributes b/.gitattributes
index 050bb120..7a2dabc2 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,3 +1,4 @@
*.config linguist-language=nextflow
+*.nf.test linguist-language=nextflow
modules/nf-core/** linguist-generated
subworkflows/nf-core/** linguist-generated
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index 81e8f600..c58b4779 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -101,3 +101,19 @@ If you are using a new feature from core Nextflow, you may bump the minimum requ
### Images and figures
For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines).
+
+## GitHub Codespaces
+
+This repo includes a devcontainer configuration which will create a GitHub Codespaces for Nextflow development! This is an online developer environment that runs in your browser, complete with VSCode and a terminal.
+
+To get started:
+
+- Open the repo in [Codespaces](https://github.com/nf-core/ampliseq/codespaces)
+- Tools installed
+ - nf-core
+ - Nextflow
+
+Devcontainer specs:
+
+- [DevContainer config](.devcontainer/devcontainer.json)
+- [Dockerfile](.devcontainer/Dockerfile)
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
index b959c4ae..b783e190 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -42,7 +42,7 @@ body:
attributes:
label: System information
description: |
- * Nextflow version _(eg. 21.10.3)_
+ * Nextflow version _(eg. 22.10.1)_
* Hardware _(eg. HPC, Desktop, Cloud)_
* Executor _(eg. slurm, local, awsbatch)_
* Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter or Charliecloud)_
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2d25ab33..b00c99b6 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -11,6 +11,10 @@ on:
env:
NXF_ANSI_LOG: false
+concurrency:
+ group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}"
+ cancel-in-progress: true
+
jobs:
test:
name: Run pipeline with test data
@@ -20,11 +24,11 @@ jobs:
strategy:
matrix:
NXF_VER:
- - "21.10.3"
+ - "22.10.1"
- "latest-everything"
steps:
- name: Check out pipeline code
- uses: actions/checkout@v2
+ uses: actions/checkout@v3
- name: Install Nextflow
uses: nf-core/setup-nextflow@v1
diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml
index d3136905..ec23bef2 100644
--- a/.github/workflows/fix-linting.yml
+++ b/.github/workflows/fix-linting.yml
@@ -24,7 +24,7 @@ jobs:
env:
GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }}
- - uses: actions/setup-node@v2
+ - uses: actions/setup-node@v3
- name: Install Prettier
run: npm install -g prettier @prettier/plugin-php
@@ -34,9 +34,9 @@ jobs:
id: prettier_status
run: |
if prettier --check ${GITHUB_WORKSPACE}; then
- echo "::set-output name=result::pass"
+ echo "result=pass" >> $GITHUB_OUTPUT
else
- echo "::set-output name=result::fail"
+ echo "result=fail" >> $GITHUB_OUTPUT
fi
- name: Run 'prettier --write'
diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
index 8a5ce69b..858d622e 100644
--- a/.github/workflows/linting.yml
+++ b/.github/workflows/linting.yml
@@ -4,6 +4,8 @@ name: nf-core linting
# that the code meets the nf-core guidelines.
on:
push:
+ branches:
+ - dev
pull_request:
release:
types: [published]
@@ -12,9 +14,9 @@ jobs:
EditorConfig:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
- - uses: actions/setup-node@v2
+ - uses: actions/setup-node@v3
- name: Install editorconfig-checker
run: npm install -g editorconfig-checker
@@ -25,9 +27,9 @@ jobs:
Prettier:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
- - uses: actions/setup-node@v2
+ - uses: actions/setup-node@v3
- name: Install Prettier
run: npm install -g prettier
@@ -38,7 +40,7 @@ jobs:
PythonBlack:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
- name: Check code lints with Black
uses: psf/black@stable
@@ -69,12 +71,12 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Check out pipeline code
- uses: actions/checkout@v2
+ uses: actions/checkout@v3
- name: Install Nextflow
uses: nf-core/setup-nextflow@v1
- - uses: actions/setup-python@v3
+ - uses: actions/setup-python@v4
with:
python-version: "3.7"
architecture: "x64"
@@ -97,7 +99,7 @@ jobs:
- name: Upload linting log file artifact
if: ${{ always() }}
- uses: actions/upload-artifact@v2
+ uses: actions/upload-artifact@v3
with:
name: linting-logs
path: |
diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml
index 04758f61..0bbcd30f 100644
--- a/.github/workflows/linting_comment.yml
+++ b/.github/workflows/linting_comment.yml
@@ -18,7 +18,7 @@ jobs:
- name: Get PR number
id: pr_number
- run: echo "::set-output name=pr_number::$(cat linting-logs/PR_number.txt)"
+ run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT
- name: Post PR comment
uses: marocchino/sticky-pull-request-comment@v2
diff --git a/.prettierignore b/.prettierignore
index eb74a574..437d763d 100644
--- a/.prettierignore
+++ b/.prettierignore
@@ -1,5 +1,6 @@
email_template.html
adaptivecard.json
+slackreport.json
.nextflow*
work/
data/
@@ -8,3 +9,4 @@ results/
testing/
testing*
*.pyc
+bin/
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 20cdaa16..0df376d7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,41 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## nf-core/ampliseq version 2.5.0 - 2023-03-02
+
+### `Added`
+
+- [#518](https://github.com/nf-core/ampliseq/pull/518),[#534](https://github.com/nf-core/ampliseq/pull/534) - Add COIDB DADA2 reference taxonomy database
+- [#521](https://github.com/nf-core/ampliseq/pull/521) - Export svg in addition to pdf files for quality plots from DADA2
+- [#538](https://github.com/nf-core/ampliseq/pull/538) - Parameter `--diversity_rarefaction_depth` controls the minimum rarefaction depth for diversity analysis, this allows increasing the rarefaction depth at the cost of excluding low count samples. Parameter `--ancom_sample_min_count` sets the minimum sample counts to retain a sample for ANCOM analysis.
+
+### `Changed`
+
+- [#537](https://github.com/nf-core/ampliseq/pull/537) - Update output generated with option sbdi-export
+- [#541](https://github.com/nf-core/ampliseq/pull/541) - Remove adjustments of taxonomic levels for RDP & SILVA & GTDB & UNITE database for DADA2 taxonomic classification, reduced default of `--dada_tax_agglom_max` from 7 to 6
+- [#548](https://github.com/nf-core/ampliseq/pull/548) - `--filter_ssu` accepted any barrnap hit to a kingdom (domain) (any occurence in resulting gff) to choose an ASV, now only ASVs with the kingdom (domain) that has the lowest evalue are accepted.
+
+### `Fixed`
+
+- [#513](https://github.com/nf-core/ampliseq/pull/513) - Template update for nf-core/tools version 2.7.2
+- [#519](https://github.com/nf-core/ampliseq/pull/519) - Adding the pipeline reference to the MultiQC report
+- [#520](https://github.com/nf-core/ampliseq/pull/520),[#530](https://github.com/nf-core/ampliseq/pull/530) - Fix conda packages
+- [#531](https://github.com/nf-core/ampliseq/pull/531),[#546](https://github.com/nf-core/ampliseq/pull/546) - Update documentation
+- [#535](https://github.com/nf-core/ampliseq/pull/535) - Make sure barrnap runs with fasta input
+- [#544](https://github.com/nf-core/ampliseq/pull/544) - Adding module to fix header in fasta input if needed
+
+### `Dependencies`
+
+- [#528](https://github.com/nf-core/ampliseq/pull/528) - Updated QIIME2
+
+| Tool | Previous version | New version |
+| ------ | ---------------- | ----------- |
+| QIIME2 | 2022.8 | 2022.11 |
+
+### `Removed`
+
+- [#513](https://github.com/nf-core/ampliseq/pull/513) - Removed parameter `--enable_conda`.
+
## nf-core/ampliseq version 2.4.1 - 2022-12-07
### `Added`
diff --git a/CITATION.cff b/CITATION.cff
deleted file mode 100644
index 017666c0..00000000
--- a/CITATION.cff
+++ /dev/null
@@ -1,56 +0,0 @@
-cff-version: 1.2.0
-message: "If you use `nf-core tools` in your work, please cite the `nf-core` publication"
-authors:
- - family-names: Ewels
- given-names: Philip
- - family-names: Peltzer
- given-names: Alexander
- - family-names: Fillinger
- given-names: Sven
- - family-names: Patel
- given-names: Harshil
- - family-names: Alneberg
- given-names: Johannes
- - family-names: Wilm
- given-names: Andreas
- - family-names: Garcia
- given-names: Maxime Ulysse
- - family-names: Di Tommaso
- given-names: Paolo
- - family-names: Nahnsen
- given-names: Sven
-title: "The nf-core framework for community-curated bioinformatics pipelines."
-version: 2.4.1
-doi: 10.1038/s41587-020-0439-x
-date-released: 2022-05-16
-url: https://github.com/nf-core/tools
-prefered-citation:
- type: article
- authors:
- - family-names: Ewels
- given-names: Philip
- - family-names: Peltzer
- given-names: Alexander
- - family-names: Fillinger
- given-names: Sven
- - family-names: Patel
- given-names: Harshil
- - family-names: Alneberg
- given-names: Johannes
- - family-names: Wilm
- given-names: Andreas
- - family-names: Garcia
- given-names: Maxime Ulysse
- - family-names: Di Tommaso
- given-names: Paolo
- - family-names: Nahnsen
- given-names: Sven
- doi: 10.1038/s41587-020-0439-x
- journal: nature biotechnology
- start: 276
- end: 278
- title: "The nf-core framework for community-curated bioinformatics pipelines."
- issue: 3
- volume: 38
- year: 2020
- url: https://dx.doi.org/10.1038/s41587-020-0439-x
diff --git a/CITATIONS.md b/CITATIONS.md
index 100ba125..97604283 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -60,8 +60,13 @@
> Kõljalg U, Larsson KH, Abarenkov K, Nilsson RH, Alexander IJ, Eberhardt U, Erland S, Høiland K, Kjøller R, Larsson E, Pennanen T, Sen R, Taylor AF, Tedersoo L, Vrålstad T, Ursing BM. UNITE: a database providing web-based methods for the molecular identification of ectomycorrhizal fungi. New Phytol. 2005 Jun;166(3):1063-8. doi: 10.1111/j.1469-8137.2005.01376.x. PMID: 15869663.
- [MIDORI2 - a collection of reference databases](https://doi.org/10.1002/edn3.303/)
+
> Leray, M., Knowlton, N., & Machida, R. J. (2022). MIDORI2: A collection of quality controlled, preformatted, and regularly updated reference databases for taxonomic assignment of eukaryotic mitochondrial sequences. Environmental DNA, 4, 894– 907. https://doi.org/10.1002/edn3.303.
+ - [COIDB - CO1 Taxonomy Database](https://doi.org/10.17044/scilifelab.20514192.v2)
+
+ > Sundh J, Manoharan L, Iwaszkiewicz-Eggebrecht E, Miraldo A, Andersson A, Ronquist F. COI reference sequences from BOLD DB. doi: https://doi.org/10.17044/scilifelab.20514192.v2.
+
### Downstream analysis
- [QIIME2](https://pubmed.ncbi.nlm.nih.gov/31341288/)
diff --git a/README.md b/README.md
index a80c97fc..60f56f01 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@
## Introduction
-**nfcore/ampliseq** is a bioinformatics analysis pipeline used for amplicon sequencing, supporting denoising of any amplicon and, currently, taxonomic assignment of 16S, ITS and 18S amplicons. Supported is paired-end Illumina or single-end Illumina, PacBio and IonTorrent data. Default is the analysis of 16S rRNA gene amplicons sequenced paired-end with Illumina.
+**nfcore/ampliseq** is a bioinformatics analysis pipeline used for amplicon sequencing, supporting denoising of any amplicon and, currently, taxonomic assignment of 16S, ITS, CO1 and 18S amplicons. Supported is paired-end Illumina or single-end Illumina, PacBio and IonTorrent data. Default is the analysis of 16S rRNA gene amplicons sequenced paired-end with Illumina.
@@ -44,7 +44,7 @@ By default, the pipeline currently performs the following:
## Quick Start
-1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=21.10.3`)
+1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=22.10.1`)
2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_.
diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml
index 5a3192a3..cef829de 100644
--- a/assets/methods_description_template.yml
+++ b/assets/methods_description_template.yml
@@ -11,6 +11,7 @@ data: |
${workflow.commandLine}
References
+
Straub D, Blackwell N, Langarica-Fuentes A, Peltzer A, Nahnsen S, Kleindienst S. Interpretations of Environmental Microbial Community Studies Are Biased by the Selected 16S rRNA (Gene) Amplicon Sequencing Pipeline. Front Microbiol. 2020 Oct 23;11:550420. https://doi.org/10.3389/fmicb.2020.550420
Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. https://doi.org/10.1038/nbt.3820
Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. https://doi.org/10.1038/s41587-020-0439-x
diff --git a/assets/slackreport.json b/assets/slackreport.json
new file mode 100644
index 00000000..043d02f2
--- /dev/null
+++ b/assets/slackreport.json
@@ -0,0 +1,34 @@
+{
+ "attachments": [
+ {
+ "fallback": "Plain-text summary of the attachment.",
+ "color": "<% if (success) { %>good<% } else { %>danger<%} %>",
+ "author_name": "sanger-tol/readmapping v${version} - ${runName}",
+ "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico",
+ "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>",
+ "fields": [
+ {
+ "title": "Command used to launch the workflow",
+ "value": "```${commandLine}```",
+ "short": false
+ }
+ <%
+ if (!success) { %>
+ ,
+ {
+ "title": "Full error message",
+ "value": "```${errorReport}```",
+ "short": false
+ },
+ {
+ "title": "Pipeline configuration",
+ "value": "<% out << summary.collect{ k,v -> k == "hook_url" ? "_${k}_: (_hidden_)" : ( ( v.class.toString().contains('Path') || ( v.class.toString().contains('String') && v.contains('/') ) ) ? "_${k}_: `${v}`" : (v.class.toString().contains('DateTime') ? ("_${k}_: " + v.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM))) : "_${k}_: ${v}") ) }.join(",\n") %>",
+ "short": false
+ }
+ <% }
+ %>
+ ],
+ "footer": "Completed at <% out << dateComplete.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM)) %> (duration: ${duration})"
+ }
+ ]
+}
diff --git a/bin/add_full_sequence_to_taxfile.py b/bin/add_full_sequence_to_taxfile.py
index 48bda942..ca5e666e 100755
--- a/bin/add_full_sequence_to_taxfile.py
+++ b/bin/add_full_sequence_to_taxfile.py
@@ -25,7 +25,7 @@
if seq != "" and name != "":
seqs = seqs.append({"id": name, "sequence": seq}, ignore_index=True)
seq = ""
- name = line.lstrip(">").rstrip("\s+*\n")
+ name = line.lstrip(">").rstrip()
else:
seq = seq + line.rstrip("\n")
if seq != "" and name != "":
diff --git a/bin/add_sh_to_taxonomy.py b/bin/add_sh_to_taxonomy.py
index 6bca2ab5..629329a3 100755
--- a/bin/add_sh_to_taxonomy.py
+++ b/bin/add_sh_to_taxonomy.py
@@ -30,10 +30,10 @@
shtax = pd.read_csv(sys.argv[2], sep="\t", header=None, index_col=0, skiprows=None, compression="bz2")
# Change spaces to '_', to match UNITE databases
shtax.replace(" ", "_", regex=True, inplace=True)
-# Replace taxonid with Domain = "Eukaryota"
-shtax.loc[:, 1] = "Eukaryota"
# Remove genus from species name
shtax.loc[:, 8] = shtax.loc[:, 8].str.split("_", 1).str[1]
+# Add empty species_exact column to match format for ASV_tax_species.tsv
+shtax.loc[:, 9] = ""
# Read taxonomy table
# Determine number of taxonomy levels from header
@@ -84,7 +84,7 @@
tax = [""] * num_ranks
conf = m[1] / 100.0
if SH != "":
- tax_list = tax[0:num_ranks] + [SH] + [conf]
+ tax_list = tax[1 : num_ranks + 1] + [SH] + [conf]
taxtable.loc[taxtable["ASV_ID"] == prev_ASV, tax_entries] = tax_list
prev_ASV = ASV
maxid = -1
@@ -130,7 +130,7 @@
tax = [""] * num_ranks
conf = m[1] / 100.0
if SH != "":
- tax_list = tax[0:num_ranks] + [SH] + [conf]
+ tax_list = tax[1 : num_ranks + 1] + [SH] + [conf]
taxtable.loc[taxtable["ASV_ID"] == prev_ASV, tax_entries] = tax_list
diff --git a/bin/novaseq_err_pe.r b/bin/novaseq_err_pe.r
index 578c3ee5..030e8d5e 100755
--- a/bin/novaseq_err_pe.r
+++ b/bin/novaseq_err_pe.r
@@ -32,10 +32,16 @@ saveRDS(errR.md.full, paste0(run_id, "_2.md.err.rds"))
pdf(paste0(run_id, "_1.md.err.pdf"))
plotErrors(errF.md.full, nominalQ = TRUE)
dev.off()
+svg(paste0(run_id, "_1.md.err.svg"))
+plotErrors(errF.md.full, nominalQ = TRUE)
+dev.off()
pdf(paste0(run_id, "_2.md.err.pdf"))
plotErrors(errR.md.full, nominalQ = TRUE)
dev.off()
+svg(paste0(run_id, "_2.md.err.svg"))
+plotErrors(errR.md.full, nominalQ = TRUE)
+dev.off()
sink(file = paste0(run_id, "_1.md.err.convergence.txt"))
dada2:::checkConvergence(errF.md.full)
diff --git a/bin/novaseq_err_se.r b/bin/novaseq_err_se.r
index bbfcc6f9..0245ca55 100755
--- a/bin/novaseq_err_se.r
+++ b/bin/novaseq_err_se.r
@@ -24,6 +24,9 @@ saveRDS(errF.md.full, paste0(run_id, ".md.err.rds"))
pdf(paste0(run_id, ".md.err.pdf"))
plotErrors(errF.md.full, nominalQ = TRUE)
dev.off()
+svg(paste0(run_id, ".md.err.svg"))
+plotErrors(errF.md.full, nominalQ = TRUE)
+dev.off()
sink(file = paste0(run_id, ".md.err.convergence.txt"))
dada2:::checkConvergence(errF.md.full)
diff --git a/bin/sbdiexport.R b/bin/sbdiexport.R
index eb477d66..715871ac 100755
--- a/bin/sbdiexport.R
+++ b/bin/sbdiexport.R
@@ -3,7 +3,7 @@
# sbdiexport.R
#
# A script that collates data from Ampliseq to produce four tsv files as close
-# as possible to ready for submission to the Swedish Biodiversity Data
+# to ready for submission to the Swedish Biodiversity Data
# Infrastructure (SBDI) as possible.
#
# The script expects the following arguments: paired|single fwdprimer revprimer asvtable taxonomytable [metadata]
@@ -13,12 +13,13 @@
suppressPackageStartupMessages(library(tidyverse))
EVENT_COLS <- c(
- 'materialSampleID', 'eventDate', 'samplingProtocol', 'locationID', 'decimalLatitude',
- 'decimalLongitude', 'geodeticDatum', 'coordinateUncertaintyInMeters', 'recordedBy', 'country',
- 'municipality', 'verbatimLocality', 'minimumElevationInMeters', 'maximumElevationInMeters',
- 'minimumDepthInMeters', 'maximumDepthInMeters'
+ 'datasetID', 'institutionCode', 'institutionID', 'collectionCode', 'materialSampleID',
+ 'associatedSequences', 'fieldNumber', 'catalogNumber', 'references', 'eventDate', 'samplingProtocol',
+ 'locationID', 'decimalLatitude', 'decimalLongitude', 'geodeticDatum', 'coordinateUncertaintyInMeters',
+ 'recordedBy', 'country', 'municipality', 'verbatimLocality', 'minimumElevationInMeters',
+ 'maximumElevationInMeters', 'minimumDepthInMeters', 'maximumDepthInMeters'
)
-MIXS_COLS <- c(
+DNA_COLS <- c(
'sop', 'target_gene', 'target_subfragment', 'pcr_primer_name_forward',
'pcr_primer_name_reverse', 'env_broad_scale', 'env_local_scale', 'env_medium'
)
@@ -40,7 +41,29 @@ metadata <- args[6]
asvs <- read.delim(asvtable, sep = '\t', stringsAsFactors = FALSE)
n_samples <- length(colnames(asvs)) - 1
-taxonomy <- read.delim(taxtable, sep = '\t', stringsAsFactors = FALSE)
+# Read taxonomy table and make sure all expected columns are there
+taxonomy <- read.delim(taxtable, sep = '\t', stringsAsFactors = FALSE) %>%
+ mutate(Domain = if("Domain" %in% colnames(.)) Domain else '') %>%
+ mutate(Kingdom = if("Kingdom" %in% colnames(.)) Kingdom else '') %>%
+ mutate(Phylum = if("Phylum" %in% colnames(.)) Phylum else '') %>%
+ mutate(Class = if("Class" %in% colnames(.)) Class else '') %>%
+ mutate(Order = if("Order" %in% colnames(.)) Order else '') %>%
+ mutate(Family = if("Family" %in% colnames(.)) Family else '') %>%
+ mutate(Genus = if("Genus" %in% colnames(.)) Genus else '') %>%
+ mutate(Species = if("Species" %in% colnames(.)) Species else '') %>%
+ mutate(Species_exact = if("Species_exact" %in% colnames(.)) Species_exact else '') %>%
+ mutate(SH = if("SH" %in% colnames(.)) SH else '') %>%
+ relocate(Domain, .after = sequence) %>%
+ relocate(Kingdom, .after = Domain) %>%
+ relocate(Phylum, .after = Kingdom) %>%
+ relocate(Class, .after = Phylum) %>%
+ relocate(Order, .after = Class) %>%
+ relocate(Family, .after = Order) %>%
+ relocate(Genus, .after = Family) %>%
+ relocate(Species, .after = Genus) %>%
+ relocate(Species_exact, .after = Species) %>%
+ relocate(SH, .after = Species_exact)
+
# Read the metadata table if provided, otherwise create one
if ( ! is.na(metadata) ) {
@@ -51,17 +74,17 @@ if ( ! is.na(metadata) ) {
# Make sure it's congruent with the asv table
meta <- data.frame(
- 'event_id_alias' = colnames(asvs)[2:(n_samples+1)]
+ 'eventID' = colnames(asvs)[2:(n_samples+1)]
) %>%
- left_join(meta, by = c('event_id_alias' = 'ID')) %>%
- distinct(event_id_alias, .keep_all = TRUE) %>%
- arrange(event_id_alias)
+ left_join(meta, by = c('eventID' = 'ID')) %>%
+ distinct(eventID, .keep_all = TRUE) %>%
+ arrange(eventID)
# Write the event tab
event <- data.frame(
- 'event_id_alias' = colnames(asvs)[2:(n_samples+1)]
+ 'eventID' = colnames(asvs)[2:(n_samples+1)]
) %>%
- arrange(event_id_alias)
+ arrange(eventID)
for ( c in EVENT_COLS ) {
if ( c %in% colnames(meta) ) {
event[[c]] <- meta[[c]]
@@ -69,40 +92,42 @@ for ( c in EVENT_COLS ) {
event[[c]] <- character(n_samples)
}
}
+
+# Add links to ENA
+event$'materialSampleID' <- sub("^ERS", "https://www.ebi.ac.uk/ena/browser/view/ERS", event$'materialSampleID')
+event$'materialSampleID' <- sub("^SAMEA", "https://www.ebi.ac.uk/ena/browser/view/SAMEA", event$'materialSampleID')
+event$'associatedSequences' <- sub("^ERR", "https://www.ebi.ac.uk/ena/browser/view/ERR", event$'associatedSequences')
+
event %>%
- inner_join(
- asvs %>% pivot_longer(2:ncol(.), names_to = 'event_id_alias', values_to = 'count') %>%
- group_by(event_id_alias) %>% summarise(sampleSizeValue = sum(count), .groups = 'drop'),
- by = 'event_id_alias'
- ) %>%
- select(1:4, sampleSizeValue, 5:ncol(.)) %>%
write_tsv("event.tsv", na = '')
-# mixs
-mixs <- data.frame(
- 'event_id_alias' = colnames(asvs)[2:(n_samples+1)],
+# dna (previously mixs)
+dna <- data.frame(
+ 'eventID' = colnames(asvs)[2:(n_samples+1)],
'lib_layout' = rep(lib_layout, n_samples),
'pcr_primer_forward' = rep(fwd_primer_seq, n_samples),
'pcr_primer_reverse' = rep(rev_primer_seq, n_samples)
) %>%
- arrange(event_id_alias)
-for ( c in MIXS_COLS ) {
+ arrange(eventID)
+for( c in DNA_COLS ) {
if ( c %in% colnames(meta) ) {
- mixs[[c]] <- meta[[c]]
+ dna[[c]] <- meta[[c]]
+ } else if ( c %in% c("sop") ) {
+ dna[[c]] <- rep('https://nf-co.re/ampliseq',n_samples)
} else {
- mixs[[c]] <- character(n_samples)
+ dna[[c]] <- character(n_samples)
}
}
-mixs %>%
+dna %>%
relocate(lib_layout, .after = target_subfragment) %>%
relocate(pcr_primer_forward, pcr_primer_reverse, .after = pcr_primer_name_reverse) %>%
- write_tsv("mixs.tsv", na = '')
+ write_tsv("dna.tsv", na = '')
# emof
emof <- data.frame(
- 'event_id_alias' = colnames(asvs)[2:(n_samples+1)]
+ 'eventID' = colnames(asvs)[2:(n_samples+1)]
) %>%
- arrange(event_id_alias)
+ arrange(eventID)
for ( c in EMOF_COLS ) {
if ( c %in% colnames(meta) ) {
emof[[c]] <- meta[[c]]
@@ -115,9 +140,8 @@ emof %>% write_tsv("emof.tsv", na = '')
# asv-table
asvtax <- asvs %>%
inner_join(taxonomy, by = 'ASV_ID') %>%
- mutate(SH = if("SH" %in% colnames(.)) SH else '') %>%
- relocate(SH, .after = Species) %>%
rename_with(tolower, Domain:Species) %>%
+ mutate(across(domain:species, ~str_replace_all(.,' ','_'))) %>%
rename(
specificEpithet = species,
otu = SH,
@@ -127,10 +151,15 @@ asvtax <- asvs %>%
mutate(
domain = str_remove(domain, 'Reversed:_'),
associatedSequences = '',
- infraspecificEpithet = '',
- kingdom = ifelse(is.na(kingdom), 'Unassigned', kingdom)
+ kingdom = ifelse(is.na(kingdom), 'Unassigned', kingdom),
+ specificEpithet = ifelse(!(is.na(Species_exact) | Species_exact == ''), Species_exact, specificEpithet),
+ specificEpithet = ifelse( (!(is.na(genus) | genus == '')), str_replace(specificEpithet, paste('^',genus, '[_[:space:]]' ,sep=''), ''), specificEpithet),
+ specificEpithet = ifelse( str_detect(specificEpithet, '^[sS]p{1,2}.?$'), '', specificEpithet),
+ infraspecificEpithet = ifelse( str_detect(specificEpithet, '[_[:space:]]'), specificEpithet, ''),
+ infraspecificEpithet = str_replace(infraspecificEpithet, '^[^_[:space:]]*[_[:space:]]', ''),
+ specificEpithet = str_replace(specificEpithet, paste('[_[:space:]]', infraspecificEpithet ,sep=''), ''),
) %>%
relocate(otu, .after = infraspecificEpithet) %>%
- relocate(DNA_sequence:associatedSequences, .before = domain) %>%
- select(-confidence, -domain) %>%
+ relocate(associatedSequences, .before = domain) %>%
+ select(-confidence, -domain, -Species_exact) %>%
write_tsv("asv-table.tsv", na = '')
diff --git a/bin/sbdiexportreannotate.R b/bin/sbdiexportreannotate.R
index c2c4a382..0413e30b 100755
--- a/bin/sbdiexportreannotate.R
+++ b/bin/sbdiexportreannotate.R
@@ -6,7 +6,7 @@
# annotation tsv file as close to ready for submission to the Swedish
# Biodiversity Data Infrastructure (SBDI) as possible.
#
-# The script expects the following arguments: dbversion, ASV_tax_species.tsv
+# The script expects the following arguments: dbversion, ASV_tax_species.tsv, wfversion, predfile
#
# Author: daniel.lundin@lnu.se
@@ -16,13 +16,51 @@ suppressPackageStartupMessages(library(tidyverse))
args <- commandArgs(trailingOnly=TRUE)
dbversion <- args[1]
taxfile <- args[2]
+wfversion <- args[3]
+predfile <- args[4]
+# Read taxonomy table
taxonomy <- read.delim(taxfile, sep = '\t', stringsAsFactors = FALSE)
-taxonomy %>%
+# Read the predictions table if provided, otherwise create one
+if ( ! is.na(predfile) ) {
+ predictions <- read.delim(predfile, sep = '\t', stringsAsFactors = FALSE)
+} else {
+ predictions <- data.frame(ASV_ID = taxonomy$ASV_ID)
+}
+
+# Make sure it's congruent with the taxonomy table
+predictions <- data.frame(
+ 'ASV_ID' = taxonomy$ASV_ID
+) %>%
+ left_join(predictions, by = 'ASV_ID' ) %>%
+ distinct(ASV_ID, .keep_all = TRUE) %>%
+ arrange(ASV_ID)
+
+# Join tables and create missing columns
+taxtable <- taxonomy %>%
+ inner_join(predictions, by = 'ASV_ID') %>%
+ mutate(Domain = if("Domain" %in% colnames(.)) Domain else '') %>%
+ mutate(Kingdom = if("Kingdom" %in% colnames(.)) Kingdom else '') %>%
+ mutate(Phylum = if("Phylum" %in% colnames(.)) Phylum else '') %>%
+ mutate(Class = if("Class" %in% colnames(.)) Class else '') %>%
+ mutate(Order = if("Order" %in% colnames(.)) Order else '') %>%
+ mutate(Family = if("Family" %in% colnames(.)) Family else '') %>%
+ mutate(Genus = if("Genus" %in% colnames(.)) Genus else '') %>%
+ mutate(Species = if("Species" %in% colnames(.)) Species else '') %>%
+ mutate(Species_exact = if("Species_exact" %in% colnames(.)) Species_exact else '') %>%
mutate(SH = if("SH" %in% colnames(.)) SH else '') %>%
- relocate(SH, .after = Species) %>%
- rename_with(tolower, Domain:Species) %>%
+ relocate(Domain, .after = sequence) %>%
+ relocate(Kingdom, .after = Domain) %>%
+ relocate(Phylum, .after = Kingdom) %>%
+ relocate(Class, .after = Phylum) %>%
+ relocate(Order, .after = Class) %>%
+ relocate(Family, .after = Order) %>%
+ relocate(Genus, .after = Family) %>%
+ relocate(Species, .after = Genus) %>%
+ relocate(Species_exact, .after = Species) %>%
+ relocate(SH, .after = Species_exact) %>%
+ rename_with(tolower, Domain:Species_exact) %>%
rename(
asv_id_alias = ASV_ID,
asv_sequence = sequence,
@@ -30,10 +68,14 @@ taxonomy %>%
otu = SH,
annotation_confidence = confidence
) %>%
+ mutate(across(.fns = ~str_replace_all(.,' ','_'))) %>%
mutate(
- infraspecificEpithet = '',
- domain = str_remove(domain, 'Reversed:_'),
+ specificEpithet = ifelse(!(is.na(species_exact) | species_exact == ''), species_exact, specificEpithet),
+ specificEpithet = ifelse( (!(is.na(genus) | genus == '')), str_replace(specificEpithet, paste('^',genus, '[_[:space:]]' ,sep=''), ''), specificEpithet),
+ specificEpithet = ifelse( str_detect(specificEpithet, '^[sS]p{1,2}.?$'), '', specificEpithet),
+ annotation_confidence = ifelse((is.na(annotation_confidence) | annotation_confidence == ''), 0, annotation_confidence),
scientificName = case_when(
+ !(is.na(otu) | otu == '') ~ sprintf("%s", otu),
!(is.na(specificEpithet) | specificEpithet == '') ~ sprintf("%s %s", genus, specificEpithet),
!(is.na(genus) | genus == '') ~ sprintf("%s", genus),
!(is.na(family) | family == '') ~ sprintf("%s", family),
@@ -44,6 +86,7 @@ taxonomy %>%
TRUE ~ 'Unassigned'
),
taxonRank = case_when(
+ !(is.na(otu) | otu == '') ~ 'unranked',
!(is.na(specificEpithet) | specificEpithet == '') ~ 'species',
!(is.na(genus) | genus == '') ~ 'genus',
!(is.na(family) | family == '') ~ 'family',
@@ -53,18 +96,26 @@ taxonomy %>%
!(is.na(kingdom) | kingdom == '') ~ 'kingdom',
TRUE ~ 'kingdom'
),
+ domain = str_remove(domain, 'Reversed:_'),
+ infraspecificEpithet = ifelse( str_detect(specificEpithet, '[_[:space:]]'), specificEpithet, ''),
+ infraspecificEpithet = str_replace(infraspecificEpithet, '^[^_[:space:]]*[_[:space:]]', ''),
+ specificEpithet = str_replace(specificEpithet, paste('[_[:space:]]', infraspecificEpithet ,sep=''), ''),
date_identified = as.character(lubridate::today()),
reference_db = dbversion,
annotation_algorithm = case_when(
- (!(is.na(otu) | otu == '')) ~ 'Ampliseq:addsh',
- TRUE ~ 'DADA2:assignTaxonomy:addSpecies'
+ (!(is.na(otu) | otu == '')) ~ paste('Ampliseq',wfversion,'(https://nf-co.re/ampliseq) addsh', sep=' '),
+ (!(is.na(species_exact) | species_exact == '')) ~ paste('Ampliseq',wfversion,'(https://nf-co.re/ampliseq) DADA2:assignTaxonomy:addSpecies', sep=' '),
+ TRUE ~ paste('Ampliseq',wfversion,'(https://nf-co.re/ampliseq) DADA2:assignTaxonomy', sep=' ')
),
identification_references = 'https://docs.biodiversitydata.se/analyse-data/molecular-tools/#taxonomy-annotation',
- taxon_remarks = '',
+ taxon_remarks = ifelse(!(is.na(domain) | domain == ''), paste('Domain = \'',domain,'\'',sep=''),''),
kingdom = ifelse(is.na(kingdom), 'Unassigned', kingdom)
) %>%
relocate(asv_sequence, .after = asv_id_alias) %>%
- relocate(infraspecificEpithet:identification_references, .after = specificEpithet) %>%
- relocate(otu, .after = infraspecificEpithet) %>%
+ relocate(scientificName:taxonRank, .after = asv_sequence) %>%
+ relocate(infraspecificEpithet, .after = specificEpithet) %>%
+ relocate(annotation_confidence, .after = otu) %>%
+ relocate(date_identified:taxon_remarks, .after = annotation_confidence) %>%
select(-domain) %>%
+ select(-species_exact) %>%
write_tsv("annotation.tsv", na = '')
diff --git a/bin/summarize_barrnap.py b/bin/summarize_barrnap.py
new file mode 100755
index 00000000..97cb31bb
--- /dev/null
+++ b/bin/summarize_barrnap.py
@@ -0,0 +1,49 @@
+#!/usr/bin/env python3
+# @author Jeanette Tångrot
+# Takes a list of files with barrnap predictions (rrna.arc.gff, rrna.bac.gff, etc)
+# for ASV sequences, extracts evalues for each prediction and summarize the results
+# in a new file "summary.gff". Assumes that the same program/barrnap version is
+# used for all predictions.
+
+# import pandas as pd
+import sys
+
+# Initialize
+method = dict()
+evalues = dict()
+orgs = set()
+
+# Go through each file and store evalues for all predictions for each query sequence
+for file in sys.argv[1:]:
+ org = file.removeprefix("rrna.")
+ org = org.replace(".gff", "_eval")
+ orgs.add(org)
+ fh = open(file, mode="r")
+ for row in fh:
+ if row.startswith("#"):
+ continue
+ rowparts = row.split()
+ asv = rowparts[0]
+ method[asv] = rowparts[1]
+ if asv not in evalues:
+ evalues[asv] = dict()
+ evalues[asv][org] = rowparts[5]
+ fh.close()
+
+# Write results
+fh = open("summary.tsv", mode="w")
+orglist = list(orgs)
+header = list(orgs)
+header.insert(0, "ASV_ID")
+header.append("eval_method")
+fh.write("\t".join(header) + "\n")
+for asv, meth in method.items():
+ row = [asv]
+ for org in orglist:
+ if org in evalues[asv]:
+ row.append(evalues[asv][org])
+ else:
+ row.append("NA")
+ row.append(meth)
+ fh.write("\t".join(row) + "\n")
+fh.close()
diff --git a/bin/taxref_reformat_coidb.sh b/bin/taxref_reformat_coidb.sh
new file mode 100755
index 00000000..7bf1cf41
--- /dev/null
+++ b/bin/taxref_reformat_coidb.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+for f in $(ls);
+do
+ c=$(gunzip -c $f | head -1 | wc -m | egrep -o "[0-9]+")
+ echo -e "$f\t$c" >> tmp
+done
+
+assignTaxonomy=$(cat tmp | sort -k 2 -n -r | head -1 | cut -f1)
+gunzip -c $assignTaxonomy > assignTaxonomy.fna
+
+addSpecies=$(cat tmp | sort -k 2 -n | head -1 | cut -f1)
+gunzip -c $addSpecies > addSpecies.fna
diff --git a/bin/taxref_reformat_gtdb.sh b/bin/taxref_reformat_gtdb.sh
index 4adf9cee..14146a21 100755
--- a/bin/taxref_reformat_gtdb.sh
+++ b/bin/taxref_reformat_gtdb.sh
@@ -10,7 +10,7 @@ for f in *.tar.gz; do
done
# Write the assignTaxonomy() fasta file: assignTaxonomy.fna
-cat ar122*.fna bac120*.fna | sed '/^>/s/>[^ ]\+ \([^[]\+\) \[.*/>\1/' | sed '/^>/s/ \[.*//' | sed 's/[a-z]__//g' | sed '/^>/s/\(Archaea\)\|\(Bacteria\)/&;&/' > assignTaxonomy.fna
+cat ar122*.fna bac120*.fna | sed '/^>/s/>[^ ]\+ \([^[]\+\) \[.*/>\1/' | sed '/^>/s/ \[.*//' | sed 's/[a-z]__//g' > assignTaxonomy.fna
# Write the addSpecies() fasta file: addSpecies.fna
cat ar122*.fna bac120*.fna | sed '/^>/s/>\([^ ]\+\) .*;s__\([^[]\+\) \[.*/>\1 \2/' > addSpecies.fna
diff --git a/bin/taxref_reformat_standard.sh b/bin/taxref_reformat_standard.sh
index 9d583942..e9585a81 100755
--- a/bin/taxref_reformat_standard.sh
+++ b/bin/taxref_reformat_standard.sh
@@ -1,9 +1,8 @@
#!/bin/sh
# Uses preformatted databases from DADA2 (https://benjjneb.github.io/dada2/training.html)
-# The file for taxonomy assignment, identified by containing "train" in the name,
-# gets the first field duplicated:
-gunzip -c *train*gz | sed 's/>\([^;]*\)/>\1;\1/' > assignTaxonomy.fna
+# The file for taxonomy assignment, identified by containing "train" in the name
+gunzip -c *train*gz > assignTaxonomy.fna
# and the file for add species, identified by containing "species" in the name, is renamed
mv *species*gz addSpecies.fna.gz
diff --git a/bin/taxref_reformat_unite.sh b/bin/taxref_reformat_unite.sh
index 75c74976..1aa4547f 100755
--- a/bin/taxref_reformat_unite.sh
+++ b/bin/taxref_reformat_unite.sh
@@ -8,7 +8,7 @@ tar xzf *gz
# Remove leading "k__" and the like, remove ranks classified as "unknown",
# and replace space with underscore to create assignTaxonomy.fna
-cat */*[[:digit:]].fasta | sed '/^>/s/;[ks]__.*//' | sed '/^>/s/[a-z]__unidentified//g' | sed '/^>/s/[a-z]__//g' | sed '/^>/s/ /_/g' | sed 's/>.*|/&Eukaryota;/' > assignTaxonomy.fna
+cat */*[[:digit:]].fasta | sed '/^>/s/;k__.*//' | sed '/^>/s/[a-z]__unidentified//g' | sed '/^>/s/[a-z]__//g' | sed '/^>/s/ /_/g' > assignTaxonomy.fna
# Reformat to addSpecies format
sed 's/>\([^|]\+\)|\([^|]\+|[^|]\+\)|.*/>\2 \1/' assignTaxonomy.fna | sed '/^>/s/_/ /g' > addSpecies.fna
diff --git a/bin/trunclen.py b/bin/trunclen.py
index 7a6e6792..5b25927f 100755
--- a/bin/trunclen.py
+++ b/bin/trunclen.py
@@ -23,6 +23,7 @@
# extract maximum read count
fraction_reads = int(max(reads) * rmin)
+
# iterate through values and find first value that falls below threshold
def function(values, cutoff):
trunc = len(values)
diff --git a/conf/modules.config b/conf/modules.config
index 77c0be4f..d0bf626a 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -125,6 +125,11 @@ process {
mode: params.publish_dir_mode,
pattern: "*{.pdf,plotQualityProfile.txt}"
],
+ [
+ path: { "${params.outdir}/dada2/QC/svg" },
+ mode: params.publish_dir_mode,
+ pattern: "*{.svg}"
+ ],
[
path: { "${params.outdir}/dada2/args" },
mode: params.publish_dir_mode,
@@ -168,6 +173,11 @@ process {
mode: params.publish_dir_mode,
pattern: "*{.pdf}"
],
+ [
+ path: { "${params.outdir}/dada2/QC/svg" },
+ mode: params.publish_dir_mode,
+ pattern: "*{.svg}"
+ ],
[
path: { "${params.outdir}/dada2/args" },
mode: params.publish_dir_mode,
@@ -188,6 +198,11 @@ process {
mode: params.publish_dir_mode,
pattern: "*{.pdf,convergence.txt}"
],
+ [
+ path: { "${params.outdir}/dada2/QC/svg" },
+ mode: params.publish_dir_mode,
+ pattern: "*{.svg}"
+ ],
[
path: { "${params.outdir}/dada2/args" },
mode: params.publish_dir_mode,
@@ -203,9 +218,16 @@ process {
withName: NOVASEQ_ERR {
publishDir = [
- path: { "${params.outdir}/dada2/QC" },
- mode: params.publish_dir_mode,
- pattern: "*{.pdf,convergence.txt}"
+ [
+ path: { "${params.outdir}/dada2/QC" },
+ mode: params.publish_dir_mode,
+ pattern: "*{.pdf,convergence.txt}"
+ ],
+ [
+ path: { "${params.outdir}/dada2/QC/svg" },
+ mode: params.publish_dir_mode,
+ pattern: "*{.svg}"
+ ]
]
}
@@ -263,6 +285,14 @@ process {
]
}
+ withName: BARRNAPSUMMARY {
+ publishDir = [
+ path: { "${params.outdir}/barrnap" },
+ mode: params.publish_dir_mode,
+ pattern: "summary.tsv"
+ ]
+ }
+
withName: FILTER_SSU {
publishDir = [
path: { "${params.outdir}/barrnap" },
@@ -485,6 +515,11 @@ process {
]
}
+ withName: QIIME2_FILTERSAMPLES_ANCOM {
+ ext.args = { "--p-where \'${filter}<>\"\"\' --p-min-frequency ${params.ancom_sample_min_count}" }
+ ext.prefix = { "$filter" }
+ }
+
withName: QIIME2_ALPHARAREFACTION {
publishDir = [
path: { "${params.outdir}/qiime2" },
diff --git a/conf/ref_databases.config b/conf/ref_databases.config
index 5fa2d29c..a8b55ba2 100644
--- a/conf/ref_databases.config
+++ b/conf/ref_databases.config
@@ -6,144 +6,166 @@
* Please also reflect all changes in 'nextflow_schema.json'
* Each entry requires as a minimum: title, file, citation, fmtscript
* Optional entries are: taxlevels, shfile
+ * taxlevels default in "dada2_taxonomy.nf" and "dada2_addspecies.nf": "Kingdom,Phylum,Class,Order,Family,Genus,Species"
*/
params {
dada_ref_databases {
- 'midori2-co1=gb250' {
+ 'coidb' {
+ title = "COIDB - CO1 Taxonomy Database - Release 221216"
+ file = [ "https://figshare.scilifelab.se/ndownloader/files/38787072", "https://figshare.scilifelab.se/ndownloader/files/38787069" ]
+ citation = "Sundh J, Manoharan L, Iwaszkiewicz-Eggebrecht E, Miraldo A, Andersson A, Ronquist F. COI reference sequences from BOLD DB. doi: https://doi.org/10.17044/scilifelab.20514192.v2"
+ fmtscript = "taxref_reformat_coidb.sh"
+ dbversion = "COIDB 221216 (https://doi.org/10.17044/scilifelab.20514192.v2)"
+ }
+ 'coidb=221216' {
+ title = "COIDB - CO1 Taxonomy Database - Release 221216"
+ file = [ "https://figshare.scilifelab.se/ndownloader/files/38787072", "https://figshare.scilifelab.se/ndownloader/files/38787069" ]
+ citation = "Sundh J, Manoharan L, Iwaszkiewicz-Eggebrecht E, Miraldo A, Andersson A, Ronquist F. COI reference sequences from BOLD DB. doi: https://doi.org/10.17044/scilifelab.20514192.v2"
+ fmtscript = "taxref_reformat_coidb.sh"
+ dbversion = "COIDB 221216 (https://doi.org/10.17044/scilifelab.20514192.v2)"
+ }
+ 'midori2-co1' {
title = "MIDORI2 - CO1 Taxonomy Database - Release GB250"
file = [ "http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz" ]
citation = "Machida RJ, Leray M, Ho SL, Knowlton N. Metazoan mitochondrial gene sequence reference datasets for taxonomic assignment of environmental samples. Sci Data. 2017 Mar 14;4:170027. doi: 10.1038/sdata.2017.27. PMID: 28291235; PMCID: PMC5349245."
fmtscript = "taxref_reformat_midori2.sh"
- dbversion = 'midori2-co1=gb250'
+ dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)"
taxlevels = "Phylum,Class,Order,Family,Genus,Species"
}
- 'midori2-co1' {
+ 'midori2-co1=gb250' {
title = "MIDORI2 - CO1 Taxonomy Database - Release GB250"
file = [ "http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz" ]
citation = "Machida RJ, Leray M, Ho SL, Knowlton N. Metazoan mitochondrial gene sequence reference datasets for taxonomic assignment of environmental samples. Sci Data. 2017 Mar 14;4:170027. doi: 10.1038/sdata.2017.27. PMID: 28291235; PMCID: PMC5349245."
fmtscript = "taxref_reformat_midori2.sh"
- dbversion = 'midori2-co1=gb250'
+ dbversion = "MIDORI2-CO1 GB250 (http://reference-midori.info/download/Databases/GenBank250/DADA2_sp/uniq/MIDORI2_UNIQ_NUC_SP_GB250_CO1_DADA2.fasta.gz)"
taxlevels = "Phylum,Class,Order,Family,Genus,Species"
}
- 'gtdb=R05-RS95' {
- title = "GTDB - Genome Taxonomy Database - Release R05-RS95"
- file = [ "https://data.ace.uq.edu.au/public/gtdb/data/releases/release95/95.0/genomic_files_reps/bac120_ssu_reps_r95.tar.gz", "https://data.ace.uq.edu.au/public/gtdb/data/releases/release95/95.0/genomic_files_reps/ar122_ssu_reps_r95.tar.gz" ]
- citation = "Parks DH, Chuvochina M, Waite DW, Rinke C, Skarshewski A, Chaumeil PA, Hugenholtz P. A standardized bacterial taxonomy based on genome phylogeny substantially revises the tree of life. Nat Biotechnol. 2018 Nov;36(10):996-1004. doi: 10.1038/nbt.4229. Epub 2018 Aug 27. PMID: 30148503."
- fmtscript = "taxref_reformat_gtdb.sh"
- dbversion = 'gtdb=R05-RS95'
- }
- 'gtdb=R06-RS202' {
- title = "GTDB - Genome Taxonomy Database - Release R06-RS202"
- file = [ "https://data.ace.uq.edu.au/public/gtdb/data/releases/release202/202.0/genomic_files_reps/bac120_ssu_reps_r202.tar.gz", "https://data.ace.uq.edu.au/public/gtdb/data/releases/release202/202.0/genomic_files_reps/ar122_ssu_reps_r202.tar.gz" ]
+ 'gtdb' {
+ title = "GTDB - Genome Taxonomy Database - Release R07-RS207"
+ file = [ "https://data.ace.uq.edu.au/public/gtdb/data/releases/release207/207.0/genomic_files_reps/bac120_ssu_reps_r207.tar.gz", "https://data.ace.uq.edu.au/public/gtdb/data/releases/release207/207.0/genomic_files_reps/ar53_ssu_reps_r207.tar.gz" ]
citation = "Parks DH, Chuvochina M, Waite DW, Rinke C, Skarshewski A, Chaumeil PA, Hugenholtz P. A standardized bacterial taxonomy based on genome phylogeny substantially revises the tree of life. Nat Biotechnol. 2018 Nov;36(10):996-1004. doi: 10.1038/nbt.4229. Epub 2018 Aug 27. PMID: 30148503."
fmtscript = "taxref_reformat_gtdb.sh"
- dbversion = 'gtdb=R06-RS202'
+ dbversion = "GTDB R07-RS207 (https://data.ace.uq.edu.au/public/gtdb/data/releases/release207/207.0)"
}
'gtdb=R07-RS207' {
title = "GTDB - Genome Taxonomy Database - Release R07-RS207"
file = [ "https://data.ace.uq.edu.au/public/gtdb/data/releases/release207/207.0/genomic_files_reps/bac120_ssu_reps_r207.tar.gz", "https://data.ace.uq.edu.au/public/gtdb/data/releases/release207/207.0/genomic_files_reps/ar53_ssu_reps_r207.tar.gz" ]
citation = "Parks DH, Chuvochina M, Waite DW, Rinke C, Skarshewski A, Chaumeil PA, Hugenholtz P. A standardized bacterial taxonomy based on genome phylogeny substantially revises the tree of life. Nat Biotechnol. 2018 Nov;36(10):996-1004. doi: 10.1038/nbt.4229. Epub 2018 Aug 27. PMID: 30148503."
fmtscript = "taxref_reformat_gtdb.sh"
- dbversion = 'gtdb=R07-RS207'
+ dbversion = "GTDB R07-RS207 (https://data.ace.uq.edu.au/public/gtdb/data/releases/release207/207.0)"
}
- 'gtdb' {
- title = "GTDB - Genome Taxonomy Database - Release R07-RS207"
- file = [ "https://data.ace.uq.edu.au/public/gtdb/data/releases/release207/207.0/genomic_files_reps/bac120_ssu_reps_r207.tar.gz", "https://data.ace.uq.edu.au/public/gtdb/data/releases/release207/207.0/genomic_files_reps/ar53_ssu_reps_r207.tar.gz" ]
+ 'gtdb=R06-RS202' {
+ title = "GTDB - Genome Taxonomy Database - Release R06-RS202"
+ file = [ "https://data.ace.uq.edu.au/public/gtdb/data/releases/release202/202.0/genomic_files_reps/bac120_ssu_reps_r202.tar.gz", "https://data.ace.uq.edu.au/public/gtdb/data/releases/release202/202.0/genomic_files_reps/ar122_ssu_reps_r202.tar.gz" ]
citation = "Parks DH, Chuvochina M, Waite DW, Rinke C, Skarshewski A, Chaumeil PA, Hugenholtz P. A standardized bacterial taxonomy based on genome phylogeny substantially revises the tree of life. Nat Biotechnol. 2018 Nov;36(10):996-1004. doi: 10.1038/nbt.4229. Epub 2018 Aug 27. PMID: 30148503."
fmtscript = "taxref_reformat_gtdb.sh"
- dbversion = 'gtdb=R07-RS207'
+ dbversion = "GTDB R06-RS202 (https://data.ace.uq.edu.au/public/gtdb/data/releases/release202/202.0/)"
+ }
+ 'gtdb=R05-RS95' {
+ title = "GTDB - Genome Taxonomy Database - Release R05-RS95"
+ file = [ "https://data.ace.uq.edu.au/public/gtdb/data/releases/release95/95.0/genomic_files_reps/bac120_ssu_reps_r95.tar.gz", "https://data.ace.uq.edu.au/public/gtdb/data/releases/release95/95.0/genomic_files_reps/ar122_ssu_reps_r95.tar.gz" ]
+ citation = "Parks DH, Chuvochina M, Waite DW, Rinke C, Skarshewski A, Chaumeil PA, Hugenholtz P. A standardized bacterial taxonomy based on genome phylogeny substantially revises the tree of life. Nat Biotechnol. 2018 Nov;36(10):996-1004. doi: 10.1038/nbt.4229. Epub 2018 Aug 27. PMID: 30148503."
+ fmtscript = "taxref_reformat_gtdb.sh"
+ dbversion = "GTDB R05-RS95 (https://data.ace.uq.edu.au/public/gtdb/data/releases/release95/95.0/)"
}
'pr2' {
title = "PR2 - Protist Reference Ribosomal Database - Version 4.14.0"
file = [ "https://github.com/pr2database/pr2database/releases/download/v4.14.0/pr2_version_4.14.0_SSU_dada2.fasta.gz", "https://github.com/pr2database/pr2database/releases/download/v4.14.0/pr2_version_4.14.0_SSU_UTAX.fasta.gz" ]
citation = "Guillou L, Bachar D, Audic S, Bass D, Berney C, Bittner L, Boutte C, Burgaud G, de Vargas C, Decelle J, Del Campo J, Dolan JR, Dunthorn M, Edvardsen B, Holzmann M, Kooistra WH, Lara E, Le Bescot N, Logares R, Mahé F, Massana R, Montresor M, Morard R, Not F, Pawlowski J, Probert I, Sauvadet AL, Siano R, Stoeck T, Vaulot D, Zimmermann P, Christen R. The Protist Ribosomal Reference database (PR2): a catalog of unicellular eukaryote small sub-unit rRNA sequences with curated taxonomy. Nucleic Acids Res. 2013 Jan;41(Database issue):D597-604. doi: 10.1093/nar/gks1160. Epub 2012 Nov 27. PMID: 23193267; PMCID: PMC3531120."
fmtscript = "taxref_reformat_pr2.sh"
- dbversion = 'pr2=4.14.0'
+ dbversion = "PR2 v4.14.0 (https://github.com/pr2database/pr2database/releases/tag/v4.14.0)"
+ taxlevels = "Domain,Kingdom,Phylum,Class,Order,Family,Genus,Species"
}
'pr2=4.14.0' {
title = "PR2 - Protist Reference Ribosomal Database - Version 4.14.0"
file = [ "https://github.com/pr2database/pr2database/releases/download/v4.14.0/pr2_version_4.14.0_SSU_dada2.fasta.gz", "https://github.com/pr2database/pr2database/releases/download/v4.14.0/pr2_version_4.14.0_SSU_UTAX.fasta.gz" ]
citation = "Guillou L, Bachar D, Audic S, Bass D, Berney C, Bittner L, Boutte C, Burgaud G, de Vargas C, Decelle J, Del Campo J, Dolan JR, Dunthorn M, Edvardsen B, Holzmann M, Kooistra WH, Lara E, Le Bescot N, Logares R, Mahé F, Massana R, Montresor M, Morard R, Not F, Pawlowski J, Probert I, Sauvadet AL, Siano R, Stoeck T, Vaulot D, Zimmermann P, Christen R. The Protist Ribosomal Reference database (PR2): a catalog of unicellular eukaryote small sub-unit rRNA sequences with curated taxonomy. Nucleic Acids Res. 2013 Jan;41(Database issue):D597-604. doi: 10.1093/nar/gks1160. Epub 2012 Nov 27. PMID: 23193267; PMCID: PMC3531120."
fmtscript = "taxref_reformat_pr2.sh"
- dbversion = 'pr2=4.14.0'
+ dbversion = "PR2 v4.14.0 (https://github.com/pr2database/pr2database/releases/tag/v4.14.0)"
+ taxlevels = "Domain,Kingdom,Phylum,Class,Order,Family,Genus,Species"
}
'pr2=4.13.0' {
title = "PR2 - Protist Reference Ribosomal Database - Version 4.13.0"
file = [ "https://github.com/pr2database/pr2database/releases/download/v4.13.0/pr2_version_4.13.0_18S_dada2.fasta.gz", "https://github.com/pr2database/pr2database/releases/download/v4.13.0/pr2_version_4.13.0_18S_UTAX.fasta.gz" ]
citation = "Guillou L, Bachar D, Audic S, Bass D, Berney C, Bittner L, Boutte C, Burgaud G, de Vargas C, Decelle J, Del Campo J, Dolan JR, Dunthorn M, Edvardsen B, Holzmann M, Kooistra WH, Lara E, Le Bescot N, Logares R, Mahé F, Massana R, Montresor M, Morard R, Not F, Pawlowski J, Probert I, Sauvadet AL, Siano R, Stoeck T, Vaulot D, Zimmermann P, Christen R. The Protist Ribosomal Reference database (PR2): a catalog of unicellular eukaryote small sub-unit rRNA sequences with curated taxonomy. Nucleic Acids Res. 2013 Jan;41(Database issue):D597-604. doi: 10.1093/nar/gks1160. Epub 2012 Nov 27. PMID: 23193267; PMCID: PMC3531120."
fmtscript = "taxref_reformat_pr2.sh"
- dbversion = 'pr2=4.13.0'
+ dbversion = "PR2 v4.13.0 (https://github.com/pr2database/pr2database/releases/tag/v4.13.0)"
+ taxlevels = "Domain,Kingdom,Phylum,Class,Order,Family,Genus,Species"
}
- 'rdp=18' {
+ 'rdp' {
title = "RDP - Ribosomal Database Project - RDP trainset 18/release 11.5"
file = [ "https://zenodo.org/record/4310151/files/rdp_train_set_18.fa.gz", "https://zenodo.org/record/4310151/files/rdp_species_assignment_18.fa.gz" ]
citation = "Cole JR, Wang Q, Fish JA, Chai B, McGarrell DM, Sun Y, Brown CT, Porras-Alfaro A, Kuske CR, Tiedje JM. Ribosomal Database Project: data and tools for high throughput rRNA analysis. Nucleic Acids Res. 2014 Jan;42(Database issue):D633-42. doi: 10.1093/nar/gkt1244. Epub 2013 Nov 27. PMID: 24288368; PMCID: PMC3965039."
fmtscript = "taxref_reformat_standard.sh"
- dbversion = 'rdp=18/11.5'
+ dbversion = "RDP 18/11.5 (https://zenodo.org/record/4310151/)"
}
- 'rdp' {
+ 'rdp=18' {
title = "RDP - Ribosomal Database Project - RDP trainset 18/release 11.5"
file = [ "https://zenodo.org/record/4310151/files/rdp_train_set_18.fa.gz", "https://zenodo.org/record/4310151/files/rdp_species_assignment_18.fa.gz" ]
citation = "Cole JR, Wang Q, Fish JA, Chai B, McGarrell DM, Sun Y, Brown CT, Porras-Alfaro A, Kuske CR, Tiedje JM. Ribosomal Database Project: data and tools for high throughput rRNA analysis. Nucleic Acids Res. 2014 Jan;42(Database issue):D633-42. doi: 10.1093/nar/gkt1244. Epub 2013 Nov 27. PMID: 24288368; PMCID: PMC3965039."
fmtscript = "taxref_reformat_standard.sh"
- dbversion = 'rdp=18/11.5'
+ dbversion = "RDP 18/11.5 (https://zenodo.org/record/4310151/)"
}
'sbdi-gtdb' {
title = "SBDI-GTDB - Sativa curated 16S GTDB database - Release R07-RS207-1"
file = [ "https://scilifelab.figshare.com/ndownloader/files/36980767", "https://scilifelab.figshare.com/ndownloader/files/36980788" ]
citation = "Lundin D, Andersson A. SBDI Sativa curated 16S GTDB database. FigShare. doi: 10.17044/scilifelab.14869077.v4"
fmtscript = "taxref_reformat_sbdi-gtdb.sh"
- dbversion = 'sbdi-gtdb=R07-RS207-1'
+ dbversion = "SBDI-GTDB-R07-RS207-1 (https://scilifelab.figshare.com/articles/dataset/SBDI_Sativa_curated_16S_GTDB_database/14869077/4)"
+ taxlevels = "Domain,Kingdom,Phylum,Class,Order,Family,Genus,Species"
}
'sbdi-gtdb=R07-RS207-1' {
title = "SBDI-GTDB - Sativa curated 16S GTDB database - Release R07-RS207-1"
file = [ "https://scilifelab.figshare.com/ndownloader/files/36980767", "https://scilifelab.figshare.com/ndownloader/files/36980788" ]
citation = "Lundin D, Andersson A. SBDI Sativa curated 16S GTDB database. FigShare. doi: 10.17044/scilifelab.14869077.v4"
fmtscript = "taxref_reformat_sbdi-gtdb.sh"
- dbversion = 'sbdi-gtdb=R07-RS207-1'
+ dbversion = "SBDI-GTDB-R07-RS207-1 (https://scilifelab.figshare.com/articles/dataset/SBDI_Sativa_curated_16S_GTDB_database/14869077/4)"
+ taxlevels = "Domain,Kingdom,Phylum,Class,Order,Family,Genus,Species"
}
'sbdi-gtdb=R06-RS202-3' {
title = "SBDI-GTDB - Sativa curated 16S GTDB database - Release R06-RS202-1"
file = [ "https://scilifelab.figshare.com/ndownloader/files/31370437", "https://scilifelab.figshare.com/ndownloader/files/31370434" ]
citation = "Lundin D, Andersson A. SBDI Sativa curated 16S GTDB database. FigShare. doi: 10.17044/scilifelab.14869077.v3"
fmtscript = "taxref_reformat_sbdi-gtdb.sh"
- dbversion = 'sbdi-gtdb=R06-RS202-3'
+ dbversion = "SBDI-GTDB-R06-RS202-3 (https://scilifelab.figshare.com/articles/dataset/SBDI_Sativa_curated_16S_GTDB_database/14869077/3)"
+ taxlevels = "Domain,Kingdom,Phylum,Class,Order,Family,Genus,Species"
}
'sbdi-gtdb=R06-RS202-1' {
title = "SBDI-GTDB - Sativa curated 16S GTDB database - Release R06-RS202-1"
file = [ "https://scilifelab.figshare.com/ndownloader/files/28624479", "https://scilifelab.figshare.com/ndownloader/files/28624482" ]
citation = "Lundin D, Andersson A. SBDI Sativa curated 16S GTDB database. FigShare. doi: 10.17044/scilifelab.14869077.v1"
fmtscript = "taxref_reformat_sbdi-gtdb.sh"
- dbversion = 'sbdi-gtdb=R06-RS202-1'
+ dbversion = "SBDI-GTDB-R06-RS202-1 (https://scilifelab.figshare.com/articles/dataset/SBDI_Sativa_curated_16S_GTDB_database/14869077/1)"
+ taxlevels = "Domain,Kingdom,Phylum,Class,Order,Family,Genus,Species"
}
'silva' {
title = "Silva 138.1 prokaryotic SSU"
file = [ "https://zenodo.org/record/4587955/files/silva_nr99_v138.1_wSpecies_train_set.fa.gz", "https://zenodo.org/record/4587955/files/silva_species_assignment_v138.1.fa.gz" ]
citation = "Quast C, Pruesse E, Yilmaz P, Gerken J, Schweer T, Yarza P, Peplies J, Glöckner FO. The SILVA ribosomal RNA gene database project: improved data processing and web-based tools. Nucleic Acids Res. 2013 Jan;41(Database issue):D590-6. doi: 10.1093/nar/gks1219. Epub 2012 Nov 28. PMID: 23193283; PMCID: PMC3531112."
fmtscript = "taxref_reformat_standard.sh"
- dbversion = 'silva=138.1'
+ dbversion = "SILVA v138.1 (https://zenodo.org/record/4587955)"
}
'silva=138' {
title = "Silva 138.1 prokaryotic SSU"
file = [ "https://zenodo.org/record/4587955/files/silva_nr99_v138.1_wSpecies_train_set.fa.gz", "https://zenodo.org/record/4587955/files/silva_species_assignment_v138.1.fa.gz" ]
citation = "Quast C, Pruesse E, Yilmaz P, Gerken J, Schweer T, Yarza P, Peplies J, Glöckner FO. The SILVA ribosomal RNA gene database project: improved data processing and web-based tools. Nucleic Acids Res. 2013 Jan;41(Database issue):D590-6. doi: 10.1093/nar/gks1219. Epub 2012 Nov 28. PMID: 23193283; PMCID: PMC3531112."
fmtscript = "taxref_reformat_standard.sh"
- dbversion = 'silva=138.1'
+ dbversion = "SILVA v138.1 (https://zenodo.org/record/4587955)"
}
'silva=132' {
title = "Silva Project's version 132 release"
file = [ "https://zenodo.org/record/1172783/files/silva_nr_v132_train_set.fa.gz", "https://zenodo.org/record/1172783/files/silva_species_assignment_v132.fa.gz" ]
citation = "Quast C, Pruesse E, Yilmaz P, Gerken J, Schweer T, Yarza P, Peplies J, Glöckner FO. The SILVA ribosomal RNA gene database project: improved data processing and web-based tools. Nucleic Acids Res. 2013 Jan;41(Database issue):D590-6. doi: 10.1093/nar/gks1219. Epub 2012 Nov 28. PMID: 23193283; PMCID: PMC3531112."
fmtscript = "taxref_reformat_standard.sh"
- dbversion = 'silva=132'
+ dbversion = "SILVA v132 (https://zenodo.org/record/1172783)"
}
'unite-fungi' {
title = "UNITE general FASTA release for Fungi - Version 8.3"
file = [ "https://files.plutof.ut.ee/public/orig/7B/23/7B235835FAF5C85D7B01E40FEF17F687914CB81A182554C5BD95E3168328E604.tgz" ]
citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2021): UNITE general FASTA release for Fungi. UNITE Community. 10.15156/BIO/1280049"
fmtscript = "taxref_reformat_unite.sh"
- dbversion = 'unite-fungi=8.3'
+ dbversion = "UNITE-fungi v8.3 (https://doi.org/10.15156/BIO/1280049)"
shfile = [ "https://scilifelab.figshare.com/ndownloader/files/34497977", "https://scilifelab.figshare.com/ndownloader/files/34497980"]
}
'unite-fungi=8.3' {
@@ -151,7 +173,7 @@ params {
file = [ "https://files.plutof.ut.ee/public/orig/7B/23/7B235835FAF5C85D7B01E40FEF17F687914CB81A182554C5BD95E3168328E604.tgz" ]
citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2021): UNITE general FASTA release for Fungi. UNITE Community. 10.15156/BIO/1280049"
fmtscript = "taxref_reformat_unite.sh"
- dbversion = 'unite-fungi=8.3'
+ dbversion = "UNITE-fungi v8.3 (https://doi.org/10.15156/BIO/1280049)"
shfile = [ "https://scilifelab.figshare.com/ndownloader/files/34497977", "https://scilifelab.figshare.com/ndownloader/files/34497980"]
}
'unite-fungi=8.2' {
@@ -159,7 +181,7 @@ params {
file = [ "https://files.plutof.ut.ee/public/orig/E7/28/E728E2CAB797C90A01CD271118F574B8B7D0DAEAB7E81193EB89A2AC769A0896.gz" ]
citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2020): UNITE general FASTA release for Fungi. UNITE Community. 10.15156/BIO/786368"
fmtscript = "taxref_reformat_unite.sh"
- dbversion = 'unite-fungi=8.2'
+ dbversion = "UNITE-fungi v8.2 (https://doi.org/10.15156/BIO/786368)"
shfile = [ "https://scilifelab.figshare.com/ndownloader/files/34497971", "https://scilifelab.figshare.com/ndownloader/files/34497974"]
}
'unite-alleuk' {
@@ -167,7 +189,7 @@ params {
file = [ "https://files.plutof.ut.ee/public/orig/E5/F5/E5F5E426DEC78BA2F7EC530621DDBD3F10564A09CBC2A5C4D3B3CBE7E37C5E1A.tgz" ]
citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2021): UNITE general FASTA release for eukaryotes. UNITE Community. 10.15156/BIO/1280127"
fmtscript = "taxref_reformat_unite.sh"
- dbversion = 'unite-alleuk=8.3'
+ dbversion = "UNITE-alleuk v8.3 (https://doi.org/10.15156/BIO/1280127)"
shfile = [ "https://scilifelab.figshare.com/ndownloader/files/34994575", "https://scilifelab.figshare.com/ndownloader/files/34994578"]
}
'unite-alleuk=8.3' {
@@ -175,7 +197,7 @@ params {
file = [ "https://files.plutof.ut.ee/public/orig/E5/F5/E5F5E426DEC78BA2F7EC530621DDBD3F10564A09CBC2A5C4D3B3CBE7E37C5E1A.tgz" ]
citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2021): UNITE general FASTA release for eukaryotes. UNITE Community. 10.15156/BIO/1280127"
fmtscript = "taxref_reformat_unite.sh"
- dbversion = 'unite-alleuk=8.3'
+ dbversion = "UNITE-alleuk v8.3 (https://doi.org/10.15156/BIO/1280127)"
shfile = [ "https://scilifelab.figshare.com/ndownloader/files/34994575", "https://scilifelab.figshare.com/ndownloader/files/34994578"]
}
'unite-alleuk=8.2' {
@@ -183,7 +205,7 @@ params {
file = [ "https://files.plutof.ut.ee/public/orig/F9/ED/F9EDE36E5209F469056675EBD672425BC06EACB7FE0C0D18F5A13E4CA632DCFA.gz" ]
citation = "Abarenkov, Kessy; Zirk, Allan; Piirmann, Timo; Pöhönen, Raivo; Ivanov, Filipp; Nilsson, R. Henrik; Kõljalg, Urmas (2020): UNITE general FASTA release for eukaryotes. UNITE Community. 10.15156/BIO/786370"
fmtscript = "taxref_reformat_unite.sh"
- dbversion = 'unite-alleuk=8.2'
+ dbversion = "UNITE-alleuk v8.2 (https://doi.org/10.15156/BIO/786370)"
shfile = [ "https://scilifelab.figshare.com/ndownloader/files/34994569", "https://scilifelab.figshare.com/ndownloader/files/34994572"]
}
}
@@ -192,14 +214,14 @@ params {
//SILVA for QIIME2 v2021.2, see https://docs.qiime2.org/2021.2/data-resources/#silva-16s-18s-rrna
'silva=138' {
title = "QIIME2 pre-formatted SILVA dereplicated at 99% similarity - Version 138"
- file = [ "https://data.qiime2.org/2022.8/common/silva-138-99-seqs.qza", "https://data.qiime2.org/2022.8/common/silva-138-99-tax.qza" ]
+ file = [ "https://data.qiime2.org/2022.11/common/silva-138-99-seqs.qza", "https://data.qiime2.org/2022.11/common/silva-138-99-tax.qza" ]
citation = "https://www.arb-silva.de/; Bokulich, N.A., Robeson, M., Dillon, M.R. bokulich-lab/RESCRIPt. Zenodo. http://doi.org/10.5281/zenodo.3891931"
license = "https://www.arb-silva.de/silva-license-information/"
fmtscript = "taxref_reformat_qiime_silva138.sh"
}
'silva' {
title = "QIIME2 pre-formatted SILVA dereplicated at 99% similarity - Version 138"
- file = [ "https://data.qiime2.org/2022.8/common/silva-138-99-seqs.qza", "https://data.qiime2.org/2022.8/common/silva-138-99-tax.qza" ]
+ file = [ "https://data.qiime2.org/2022.11/common/silva-138-99-seqs.qza", "https://data.qiime2.org/2022.11/common/silva-138-99-tax.qza" ]
citation = "https://www.arb-silva.de/; Bokulich, N.A., Robeson, M., Dillon, M.R. bokulich-lab/RESCRIPt. Zenodo. http://doi.org/10.5281/zenodo.3891931"
license = "https://www.arb-silva.de/silva-license-information/"
fmtscript = "taxref_reformat_qiime_silva138.sh"
@@ -231,7 +253,7 @@ params {
}
'greengenes85' {
title = "Greengenes 16S - Version 13_8 - clustered at 85% similarity - for testing purposes only"
- file = [ "https://data.qiime2.org/2022.8/tutorials/training-feature-classifiers/85_otus.fasta", "https://data.qiime2.org/2022.8/tutorials/training-feature-classifiers/85_otu_taxonomy.txt" ]
+ file = [ "https://data.qiime2.org/2022.11/tutorials/training-feature-classifiers/85_otus.fasta", "https://data.qiime2.org/2022.11/tutorials/training-feature-classifiers/85_otu_taxonomy.txt" ]
citation = "McDonald, D., Price, M., Goodrich, J. et al. An improved Greengenes taxonomy with explicit ranks for ecological and evolutionary analyses of bacteria and archaea. ISME J 6, 610–618 (2012). https://doi.org/10.1038/ismej.2011.139"
fmtscript = "taxref_reformat_qiime_greengenes85.sh"
}
diff --git a/conf/test.config b/conf/test.config
index 1c8a3927..1da1bc4d 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -28,6 +28,7 @@ params {
cut_dada_ref_taxonomy = true
qiime_ref_taxonomy = "greengenes85"
max_len_asv = 255
+ filter_ssu = "bac"
//this is to remove low abundance ASVs to reduce runtime of downstream processes
min_samples = 2
@@ -43,4 +44,6 @@ params {
sbdiexport = true
qiime_adonis_formula = "treatment1,mix8"
+
+ diversity_rarefaction_depth = 500
}
diff --git a/conf/test_doubleprimers.config b/conf/test_doubleprimers.config
index 1409ca64..6b275dc8 100644
--- a/conf/test_doubleprimers.config
+++ b/conf/test_doubleprimers.config
@@ -23,7 +23,6 @@ params {
FW_primer = "NNNNCCTAHGGGRBGCAGCAG"
RV_primer = "GACTACHVGGGTATCTAATCC"
double_primer = true
- filter_ssu = "mito"
dada_ref_taxonomy = false
input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet_double_primer.tsv"
trunc_qmin = 30
diff --git a/conf/test_fasta.config b/conf/test_fasta.config
index 61e9e6ce..78babb74 100644
--- a/conf/test_fasta.config
+++ b/conf/test_fasta.config
@@ -22,7 +22,7 @@ params {
// Input data
input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/testdata/ASV_seqs.fasta"
dada_ref_taxonomy = "rdp=18"
- dada_assign_taxlevels = "K,D,P,C,O,F,Genus"
+ dada_assign_taxlevels = "K,P,C,O,F,Genus"
skip_qiime = true
}
diff --git a/conf/test_pacbio_its.config b/conf/test_pacbio_its.config
index 5d6b797a..e4e4e2d2 100644
--- a/conf/test_pacbio_its.config
+++ b/conf/test_pacbio_its.config
@@ -24,9 +24,11 @@ params {
RV_primer = "TCCTGAGGGAAACTTCG"
dada_ref_taxonomy = "unite-fungi"
input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet_pacbio_ITS.tsv"
+ metadata = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Metadata_pacbio_ITS.tsv"
pacbio = true
max_ee = 12
cut_its = "full"
addsh = true
skip_qiime = true
+ sbdiexport = true
}
diff --git a/docs/images/ampliseq_workflow.png b/docs/images/ampliseq_workflow.png
index dd7510e4..eca455ac 100644
Binary files a/docs/images/ampliseq_workflow.png and b/docs/images/ampliseq_workflow.png differ
diff --git a/docs/images/ampliseq_workflow.svg b/docs/images/ampliseq_workflow.svg
index 6a38b5a5..ae56ffbe 100644
--- a/docs/images/ampliseq_workflow.svg
+++ b/docs/images/ampliseq_workflow.svg
@@ -1,23 +1,23 @@
diff --git a/docs/output.md b/docs/output.md
index c311ed2a..42d4eeec 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -20,7 +20,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
- [Barrnap](#barrnap) - Predict ribosomal RNA sequences and optional filtering
- [Length filter](#length-filter) - Optionally, ASV can be filtered by length thresholds
- [ITSx](#itsx) - Optionally, the ITS region can be extracted
-- [Taxonomic classification with DADA2](#taxonomic-classification-with-DADA2) - Taxonomic classification of (filtered) ASVs
+- [Taxonomic classification with DADA2](#taxonomic-classification-with-dada2) - Taxonomic classification of (filtered) ASVs
- [assignSH](#assignsh) - Optionally, a UNITE species hypothesis (SH) can be added to the taxonomy
- [QIIME2](#qiime2) - Secondary analysis
- [Taxonomic classification](#taxonomic-classification) - Taxonomical classification of ASVs
@@ -29,8 +29,6 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
- [Barplot](#barplot) - Interactive barplot
- [Alpha diversity rarefaction curves](#alpha-diversity-rarefaction-curves) - Rarefaction curves for quality control
- [Diversity analysis](#diversity-analysis) - High level overview with different diversity indices
- - [Alpha diversity indices](#alpha-diversity-indices) - Diversity within samples
- - [Beta diversity indices](#beta-diversity-indices) - Diversity between samples (e.g. PCoA plots)
- [ANCOM](#ancom) - Differential abundance analysis
- [PICRUSt2](#picrust2) - Predict the functional potential of a bacterial community
- [Read count report](#read-count-report) - Report of read counts during various steps of the pipeline
@@ -145,10 +143,11 @@ Optionally, ASV sequences can be filtered for rRNA sequences identified by Barrn
Output files
- `barrnap/`
- - `ASV_seqs.ssu.fasta`: Fasta file with filtered ASV sequences.
- - `AASV_table.ssu.tsv`: Counts for each filtered ASV sequence.
- `rrna..gff`: GFF3 output for rRNA matches per kingdom, where kingdom is one of `bac,arc,mito,euk`.
- - `stats.ssu.tsv`: Tracking read numbers through filtering, for each sample.
+ - `summary.tsv`: Summary of evalues for each ASV and kingdom
+ - `ASV_seqs.ssu.fasta`: Fasta file with filtered ASV sequences, only if `--filter_ssu` is set.
+ - `ASV_table.ssu.tsv`: Counts for each filtered ASV sequence, only if `--filter_ssu` is set.
+ - `stats.ssu.tsv`: Tracking read numbers through filtering, for each sample, only if `--filter_ssu` is set.
@@ -163,7 +162,7 @@ The minimum ASV length threshold can be set by `--min_len_asv` and the maximum l
- `asv_length_filter/`
- `ASV_seqs.len.fasta`: Fasta file with filtered ASV sequences.
- - `AASV_table.len.tsv`: Counts for each filtered ASV sequence.
+ - `ASV_table.len.tsv`: Counts for each filtered ASV sequence.
- `ASV_len_orig.tsv`: ASV length distribution before filtering.
- `ASV_len_filt.tsv`: ASV length distribution after filtering.
- `stats.len.tsv`: Tracking read numbers through filtering, for each sample.
@@ -330,7 +329,7 @@ Produces rarefaction plots for several alpha diversity indices, and is primarily
#### Diversity analysis
-Diversity measures summarize important sample features (alpha diversity) or differences between samples (beta diversity). To do so, sample data is first rarefied to the minimum number of counts per sample. Also, a phylogenetic tree of all ASVs is computed to provide phylogenetic information.
+Diversity measures summarize important sample features (alpha diversity) or differences between samples (beta diversity). To do so, sample data is first rarefied to the minimum number of counts per sample. Parameter `--diversity_rarefaction_depth` can increase the rarefaction depth at the cost of excluding low count samples. Also, a phylogenetic tree of all ASVs is computed to provide phylogenetic information.
Output files
@@ -387,7 +386,7 @@ Furthermore, ADONIS permutation-based statistical test in vegan-R determine whet
#### ANCOM
-Analysis of Composition of Microbiomes ([ANCOM](https://www.ncbi.nlm.nih.gov/pubmed/26028277)) is applied to identify features that are differentially abundant across sample groups. A key assumption made by ANCOM is that few taxa (less than about 25%) will be differentially abundant between groups otherwise the method will be inaccurate.
+Analysis of Composition of Microbiomes ([ANCOM](https://www.ncbi.nlm.nih.gov/pubmed/26028277)) is applied to identify features that are differentially abundant across sample groups. A key assumption made by ANCOM is that few taxa (less than about 25%) will be differentially abundant between groups otherwise the method will be inaccurate. Parameter `--ancom_sample_min_count` sets the minimum sample counts to retain a sample for ANCOM analysis.
ANCOM is applied to each suitable or specified metadata column for 5 taxonomic levels (2-6).
diff --git a/docs/usage.md b/docs/usage.md
index f84a30c9..71d3fd3c 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -174,15 +174,19 @@ Please note the following requirements:
- May contain the header `reverseReads` and `run`
- Sample IDs must be unique
- Sample IDs must not contain a dot `.`
-- Sample IDs starting with a number are not allowed when using metadata (because these strings will be modified)
+- Sample IDs may not start with a number
- FastQ files must be compressed (`.fastq.gz`, `.fq.gz`)
- Within one samplesheet, only one type of raw data should be specified (same amplicon & sequencing method)
An [example samplesheet](../assets/samplesheet.tsv) has been provided with the pipeline.
+> **Please note:** All characters other than letters, numbers and underline in Sample IDs will be converted to dots `.`. Avoid those conversions, because they might make summary files not merging correctly and will fail to match to metadata (which can be adjusted though).
+
#### ASV/OTU fasta input
-When pointing at a file ending with `.fasta`, `.fna` or `.fa`, the containing ASV/OTU sequences will be taxonomically classified. All other pipeline steps will be skipped.
+When pointing at a file ending with `.fasta`, `.fna` or `.fa`, the containing ASV/OTU sequences will be taxonomically classified.
+Most of the steps of the pipeline will be skipped, but ITSx & Barrnap & length filtering can be applied before taxonomic classification.
+The sequence header line may contain a description, that will be kept as part of the sequence name. However, tabs will be changed into spaces.
```bash
--input 'path/to/amplicon_sequences.fasta'
@@ -214,7 +218,7 @@ Please note the following requirements:
- The path must be enclosed in quotes
- The metadata file has to follow the QIIME2 specifications (https://docs.qiime2.org/2021.2/tutorials/metadata/)
-The metadata file must be tab-separated with a header line. The first column in the tab-separated metadata file is the sample identifier column (required header: ID) and defines the sample or feature IDs associated with the dataset. Metadata files are not required to have additional metadata columns, i.e. a file containing only an ID column is a valid QIIME 2 metadata file. Additional columns defining metadata associated with each sample or feature ID are optional. NB: without additional columns there might be no groupings for the downstream analyses.
+The metadata file must be tab-separated with a header line. The first column in the tab-separated metadata file is the sample identifier column (required header: ID) and defines the sample or feature IDs associated with the dataset. In addition to the sample identifier column, metadata files are required to have additional metadata columns.
Sample identifiers should be 36 characters long or less, and also contain only ASCII alphanumeric characters (i.e. in the range of [a-z], [A-Z], or [0-9]), or the dash (-) character. For downstream analysis, by default all numeric columns, blanks or NA are removed, and only columns with multiple different values but not all unique are selected.
@@ -232,9 +236,9 @@ nextflow pull nf-core/ampliseq
It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since.
-First, go to the [nf-core/ampliseq releases page](https://github.com/nf-core/ampliseq/releases) and find the latest version number - numeric only (eg. `2.0.0`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 2.0.0`.
+First, go to the [nf-core/ampliseq releases page](https://github.com/nf-core/ampliseq/releases) and find the latest pipeline version - numeric only (eg. `2.4.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 2.4.1`. Of course, you can switch to another version by changing the number after the `-r` flag.
-This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future.
+This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports.
## Core Nextflow arguments
@@ -244,7 +248,7 @@ This version number will be logged in reports when you run the pipeline, so that
Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments.
-Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below. When using Biocontainers, most of these software packaging methods pull Docker containers from quay.io e.g [FastQC](https://quay.io/repository/biocontainers/fastqc) except for Singularity which directly downloads Singularity images via https hosted by the [Galaxy project](https://depot.galaxyproject.org/singularity/) and Conda which downloads and installs software locally from [Bioconda](https://bioconda.github.io/).
+Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below.
> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported.
@@ -253,8 +257,11 @@ The pipeline also dynamically loads configurations from [https://github.com/nf-c
Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important!
They are loaded in sequence, so later profiles can overwrite earlier profiles.
-If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended.
+If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment.
+- `test`
+ - A profile with a complete configuration for automated testing
+ - Includes links to test data so needs no other parameters
- `docker`
- A generic configuration profile to be used with [Docker](https://docker.com/)
- `singularity`
@@ -267,9 +274,6 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof
- A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/)
- `conda`
- A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud.
-- `test`, `test_multi`, `test_full`, `test_pacbio_its`, `test_iontorrent`, `test_doubleprimers`, `test_reftaxcustom`, `test_single`, `test_novaseq`
- - A profile with a complete configuration for automated testing
- - Includes links to test data so needs no other parameters
### `-resume`
@@ -318,8 +322,14 @@ Work dir:
Tip: you can replicate the issue by changing to the process work dir and entering the command `bash .command.run`
```
+#### For beginners
+
+A first step to bypass this error, you could try to increase the amount of CPUs, memory, and time for the whole pipeline. Therefor you can try to increase the resource for the parameters `--max_cpus`, `--max_memory`, and `--max_time`. Based on the error above, you have to increase the amount of memory. Therefore you can go to the [parameter documentation of rnaseq](https://nf-co.re/rnaseq/3.9/parameters) and scroll down to the `show hidden parameter` button to get the default value for `--max_memory`. In this case 128GB, you than can try to run your pipeline again with `--max_memory 200GB -resume` to skip all process, that were already calculated. If you can not increase the resource of the complete pipeline, you can try to adapt the resource for a single process as mentioned below.
+
+#### Advanced option on process level
+
To bypass this error you would need to find exactly which resources are set by the `STAR_ALIGN` process. The quickest way is to search for `process STAR_ALIGN` in the [nf-core/rnaseq Github repo](https://github.com/nf-core/rnaseq/search?q=process+STAR_ALIGN).
-We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so, based on the search results, the file we want is `modules/nf-core/software/star/align/main.nf`.
+We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so, based on the search results, the file we want is `modules/nf-core/star/align/main.nf`.
If you click on the link to that file you will notice that there is a `label` directive at the top of the module that is set to [`label process_high`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/modules/nf-core/software/star/align/main.nf#L9).
The [Nextflow `label`](https://www.nextflow.io/docs/latest/process.html#label) directive allows us to organise workflow processes in separate groups which can be referenced in a configuration file to select and configure subset of processes having similar computing requirements.
The default values for the `process_high` label are set in the pipeline's [`base.config`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L33-L37) which in this case is defined as 72GB.
@@ -338,7 +348,7 @@ process {
>
> If you get a warning suggesting that the process selector isn't recognised check that the process name has been specified correctly.
-### Updating containers
+### Updating containers (advanced users)
The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. If for some reason you need to use a different version of a particular tool with the pipeline then you just need to identify the `process` name and override the Nextflow `container` definition for that process using the `withName` declaration. For example, in the [nf-core/viralrecon](https://nf-co.re/viralrecon) pipeline a tool called [Pangolin](https://github.com/cov-lineages/pangolin) has been used during the COVID-19 pandemic to assign lineages to SARS-CoV-2 genome sequenced samples. Given that the lineage assignments change quite frequently it doesn't make sense to re-release the nf-core/viralrecon everytime a new version of Pangolin has been released. However, you can override the default container used by the pipeline by creating a custom config file and passing it as a command-line argument via `-c custom.config`.
diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy
index b3d092f8..33cd4f6e 100755
--- a/lib/NfcoreSchema.groovy
+++ b/lib/NfcoreSchema.groovy
@@ -46,7 +46,6 @@ class NfcoreSchema {
'quiet',
'syslog',
'v',
- 'version',
// Options for `nextflow run` command
'ansi',
diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy
index 27feb009..25a0a74a 100755
--- a/lib/NfcoreTemplate.groovy
+++ b/lib/NfcoreTemplate.groovy
@@ -32,6 +32,25 @@ class NfcoreTemplate {
}
}
+ //
+ // Generate version string
+ //
+ public static String version(workflow) {
+ String version_string = ""
+
+ if (workflow.manifest.version) {
+ def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : ''
+ version_string += "${prefix_v}${workflow.manifest.version}"
+ }
+
+ if (workflow.commitId) {
+ def git_shortsha = workflow.commitId.substring(0, 7)
+ version_string += "-g${git_shortsha}"
+ }
+
+ return version_string
+ }
+
//
// Construct and send completion email
//
@@ -61,7 +80,7 @@ class NfcoreTemplate {
misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp
def email_fields = [:]
- email_fields['version'] = workflow.manifest.version
+ email_fields['version'] = NfcoreTemplate.version(workflow)
email_fields['runName'] = workflow.runName
email_fields['success'] = workflow.success
email_fields['dateComplete'] = workflow.complete
@@ -146,10 +165,10 @@ class NfcoreTemplate {
}
//
- // Construct and send adaptive card
- // https://adaptivecards.io
+ // Construct and send a notification to a web server as JSON
+ // e.g. Microsoft Teams and Slack
//
- public static void adaptivecard(workflow, params, summary_params, projectDir, log) {
+ public static void IM_notification(workflow, params, summary_params, projectDir, log) {
def hook_url = params.hook_url
def summary = [:]
@@ -170,7 +189,7 @@ class NfcoreTemplate {
misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp
def msg_fields = [:]
- msg_fields['version'] = workflow.manifest.version
+ msg_fields['version'] = NfcoreTemplate.version(workflow)
msg_fields['runName'] = workflow.runName
msg_fields['success'] = workflow.success
msg_fields['dateComplete'] = workflow.complete
@@ -178,13 +197,16 @@ class NfcoreTemplate {
msg_fields['exitStatus'] = workflow.exitStatus
msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None')
msg_fields['errorReport'] = (workflow.errorReport ?: 'None')
- msg_fields['commandLine'] = workflow.commandLine
+ msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "")
msg_fields['projectDir'] = workflow.projectDir
msg_fields['summary'] = summary << misc_fields
// Render the JSON template
def engine = new groovy.text.GStringTemplateEngine()
- def hf = new File("$projectDir/assets/adaptivecard.json")
+ // Different JSON depending on the service provider
+ // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format
+ def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json"
+ def hf = new File("$projectDir/assets/${json_path}")
def json_template = engine.createTemplate(hf).make(msg_fields)
def json_message = json_template.toString()
@@ -209,7 +231,7 @@ class NfcoreTemplate {
if (workflow.stats.ignoredCount == 0) {
log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-"
} else {
- log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed successfully, but with errored process(es) ${colors.reset}-"
+ log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-"
}
} else {
log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-"
@@ -297,6 +319,7 @@ class NfcoreTemplate {
//
public static String logo(workflow, monochrome_logs) {
Map colors = logColours(monochrome_logs)
+ String workflow_version = NfcoreTemplate.version(workflow)
String.format(
"""\n
${dashedLine(monochrome_logs)}
@@ -305,7 +328,7 @@ class NfcoreTemplate {
${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset}
${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset}
${colors.green}`._,._,\'${colors.reset}
- ${colors.purple} ${workflow.manifest.name} v${workflow.manifest.version}${colors.reset}
+ ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset}
${dashedLine(monochrome_logs)}
""".stripIndent()
)
diff --git a/lib/WorkflowAmpliseq.groovy b/lib/WorkflowAmpliseq.groovy
index 16187dd3..0162d5a4 100755
--- a/lib/WorkflowAmpliseq.groovy
+++ b/lib/WorkflowAmpliseq.groovy
@@ -10,8 +10,6 @@ class WorkflowAmpliseq {
// Check and validate parameters
//
public static void initialise(params, log) {
- if (params.enable_conda) { log.warn "Conda is enabled (`--enable_conda`), any steps involving QIIME2 are not available. Use a container engine instead of conda to enable all software." }
-
if ( params.pacbio || params.iontorrent || params.single_end ) {
if (params.trunclenr) { log.warn "Unused parameter: `--trunclenr` is ignored because the data is single end." }
} else if (params.trunclenf && !params.trunclenr) {
@@ -100,9 +98,9 @@ class WorkflowAmpliseq {
System.exit(1)
}
- String[] sbdi_incompatible_databases = ["midori2-co1=gb250","midori2-co1","rdp=18","rdp","sbdi-gtdb","sbdi-gtdb=R06-RS202-3","sbdi-gtdb=R06-RS202-1","silva=132","unite-fungi","unite-fungi=8.3","unite-fungi=8.2","unite-alleuk","unite-alleuk=8.3","unite-alleuk=8.2"]
- if ( params.sbdiexport && Arrays.stream(sbdi_incompatible_databases).anyMatch(entry -> params.dada_ref_taxonomy.toString().equals(entry)) ) {
- log.error "Incompatible parameters: `--sbdiexport` does not work with the chosen databse of `--dada_ref_taxonomy`, because the expected taxonomic levels do not match."
+ String[] sbdi_compatible_databases = ["coidb","coidb=221216","gtdb","gtdb=R07-RS207","gtdb=R06-RS202","gtdb=R05-RS95","midori2-co1","midori2-co1=gb250","pr2","pr2=4.14.0","pr2=4.13.0","rdp","rdp=18","sbdi-gtdb","sbdi-gtdb=R07-RS207-1","silva","silva=138","silva=132","unite-fungi","unite-fungi=8.3","unite-fungi=8.2","unite-alleuk","unite-alleuk=8.3","unite-alleuk=8.2"]
+ if ( params.sbdiexport && !Arrays.stream(sbdi_compatible_databases).anyMatch(entry -> params.dada_ref_taxonomy.toString().equals(entry)) ) {
+ log.error "Incompatible parameters: `--sbdiexport` does not work with the chosen database of `--dada_ref_taxonomy`, because the expected taxonomic levels do not match."
System.exit(1)
}
diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy
index 48cb7d8f..530940b9 100755
--- a/lib/WorkflowMain.groovy
+++ b/lib/WorkflowMain.groovy
@@ -20,7 +20,7 @@ class WorkflowMain {
}
//
- // Print help to screen if required
+ // Generate help string
//
public static String help(workflow, params, log) {
def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv -profile docker"
@@ -33,7 +33,7 @@ class WorkflowMain {
}
//
- // Print parameter summary log to screen
+ // Generate parameter summary log string
//
public static String paramsSummaryLog(workflow, params, log) {
def summary_log = ''
@@ -62,20 +62,26 @@ class WorkflowMain {
qiimereftaxonomyExistsError(params, log)
}
- // Validate workflow parameters via the JSON schema
- if (params.validate_params) {
- NfcoreSchema.validateParameters(workflow, params, log)
+ // Print workflow version and exit on --version
+ if (params.version) {
+ String workflow_version = NfcoreTemplate.version(workflow)
+ log.info "${workflow.manifest.name} ${workflow_version}"
+ System.exit(0)
}
// Print parameter summary log to screen
-
log.info paramsSummaryLog(workflow, params, log)
+ // Validate workflow parameters via the JSON schema
+ if (params.validate_params) {
+ NfcoreSchema.validateParameters(workflow, params, log)
+ }
+
// Check that a -profile or Nextflow config has been provided to run the pipeline
NfcoreTemplate.checkConfigProvided(workflow, log)
// Check that conda channels are set-up correctly
- if (params.enable_conda) {
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
Utils.checkCondaChannels(log)
}
diff --git a/modules.json b/modules.json
index 6f958233..c3b153ee 100644
--- a/modules.json
+++ b/modules.json
@@ -7,23 +7,28 @@
"nf-core": {
"custom/dumpsoftwareversions": {
"branch": "master",
- "git_sha": "8022c68e7403eecbd8ba9c49496f69f8c49d50f0"
+ "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+ "installed_by": ["modules"]
},
"cutadapt": {
"branch": "master",
- "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
+ "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+ "installed_by": ["modules"]
},
"fastqc": {
"branch": "master",
- "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
+ "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+ "installed_by": ["modules"]
},
"multiqc": {
"branch": "master",
- "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
+ "git_sha": "ee80d14721e76e2e079103b8dcd5d57129e584ba",
+ "installed_by": ["modules"]
},
"vsearch/usearchglobal": {
"branch": "master",
- "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905"
+ "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+ "installed_by": ["modules"]
}
}
}
diff --git a/modules/local/assignsh.nf b/modules/local/assignsh.nf
index a1e0d9c5..72dceb39 100644
--- a/modules/local/assignsh.nf
+++ b/modules/local/assignsh.nf
@@ -2,7 +2,7 @@ process ASSIGNSH {
tag "${asvtable}"
label 'process_low'
- conda (params.enable_conda ? "pandas=1.1.5" : null)
+ conda "conda-forge::pandas=1.1.5"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/pandas:1.1.5':
'quay.io/biocontainers/pandas:1.1.5' }"
diff --git a/modules/local/barrnap.nf b/modules/local/barrnap.nf
index 2667657a..0341e6e8 100644
--- a/modules/local/barrnap.nf
+++ b/modules/local/barrnap.nf
@@ -2,7 +2,7 @@ process BARRNAP {
tag "${fasta}"
label 'process_low'
- conda (params.enable_conda ? "bioconda::barrnap=0.9" : null)
+ conda "bioconda::barrnap=0.9"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/barrnap:0.9--hdfd78af_4' :
'quay.io/biocontainers/barrnap:0.9--hdfd78af_4' }"
diff --git a/modules/local/barrnapsummary.nf b/modules/local/barrnapsummary.nf
new file mode 100644
index 00000000..13a38385
--- /dev/null
+++ b/modules/local/barrnapsummary.nf
@@ -0,0 +1,29 @@
+process BARRNAPSUMMARY {
+ label 'process_single'
+
+ conda "conda-forge::python=3.9"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/python:3.9' :
+ 'quay.io/biocontainers/python:3.9' }"
+
+ input:
+ path predictions
+
+ output:
+ path "summary.tsv" , emit: summary
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+
+ """
+ summarize_barrnap.py $predictions
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ python: \$( python --version )
+ END_VERSIONS
+ """
+}
diff --git a/modules/local/combine_table.nf b/modules/local/combine_table.nf
index 26988049..6fdd9962 100644
--- a/modules/local/combine_table.nf
+++ b/modules/local/combine_table.nf
@@ -1,7 +1,7 @@
process COMBINE_TABLE {
label 'process_low'
- conda (params.enable_conda ? "bioconductor::biostrings=2.58.0" : null)
+ conda "bioconda::bioconductor-biostrings=2.58.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bioconductor-biostrings:2.58.0--r40h037d062_0' :
'quay.io/biocontainers/bioconductor-biostrings:2.58.0--r40h037d062_0' }"
diff --git a/modules/local/cutadapt_summary.nf b/modules/local/cutadapt_summary.nf
index 0e9b4df9..842eaf8e 100644
--- a/modules/local/cutadapt_summary.nf
+++ b/modules/local/cutadapt_summary.nf
@@ -2,7 +2,7 @@ process CUTADAPT_SUMMARY {
tag "${name}"
label 'process_low'
- conda (params.enable_conda ? "conda-forge::python=3.8.3" : null)
+ conda "conda-forge::python=3.8.3"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/python:3.8.3' :
'quay.io/biocontainers/python:3.8.3' }"
diff --git a/modules/local/cutadapt_summary_merge.nf b/modules/local/cutadapt_summary_merge.nf
index 8e9a2554..1df4bc3d 100644
--- a/modules/local/cutadapt_summary_merge.nf
+++ b/modules/local/cutadapt_summary_merge.nf
@@ -2,7 +2,7 @@ process CUTADAPT_SUMMARY_MERGE {
tag "${files}"
label 'process_low'
- conda (params.enable_conda ? "bioconductor-dada2=1.22.0" : null)
+ conda "bioconda::bioconductor-dada2=1.22.0 conda-forge::r-digest=0.6.30"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bioconductor-dada2:1.22.0--r41h399db7b_0' :
'quay.io/biocontainers/bioconductor-dada2:1.22.0--r41h399db7b_0' }"
diff --git a/modules/local/dada2_addspecies.nf b/modules/local/dada2_addspecies.nf
index 65f6a418..a002148b 100644
--- a/modules/local/dada2_addspecies.nf
+++ b/modules/local/dada2_addspecies.nf
@@ -3,7 +3,7 @@ process DADA2_ADDSPECIES {
label 'process_high'
label 'single_cpu'
- conda (params.enable_conda ? "bioconductor-dada2=1.22.0" : null)
+ conda "bioconda::bioconductor-dada2=1.22.0 conda-forge::r-digest=0.6.30"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bioconductor-dada2:1.22.0--r41h399db7b_0' :
'quay.io/biocontainers/bioconductor-dada2:1.22.0--r41h399db7b_0' }"
@@ -26,7 +26,7 @@ process DADA2_ADDSPECIES {
def args = task.ext.args ?: ''
def taxlevels = taxlevels_input ?
'c("' + taxlevels_input.split(",").join('","') + '")' :
- 'c("Domain", "Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")'
+ 'c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")'
def seed = task.ext.seed ?: '100'
"""
#!/usr/bin/env Rscript
diff --git a/modules/local/dada2_denoising.nf b/modules/local/dada2_denoising.nf
index f480591e..5e94c335 100644
--- a/modules/local/dada2_denoising.nf
+++ b/modules/local/dada2_denoising.nf
@@ -3,7 +3,7 @@ process DADA2_DENOISING {
label 'process_medium'
label 'process_long'
- conda (params.enable_conda ? "bioconductor-dada2=1.22.0" : null)
+ conda "bioconda::bioconductor-dada2=1.22.0 conda-forge::r-digest=0.6.30"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bioconductor-dada2:1.22.0--r41h399db7b_0' :
'quay.io/biocontainers/bioconductor-dada2:1.22.0--r41h399db7b_0' }"
diff --git a/modules/local/dada2_err.nf b/modules/local/dada2_err.nf
index 1b68beff..790bc159 100644
--- a/modules/local/dada2_err.nf
+++ b/modules/local/dada2_err.nf
@@ -2,7 +2,7 @@ process DADA2_ERR {
tag "$meta.run"
label 'process_medium'
- conda (params.enable_conda ? "bioconductor-dada2=1.22.0" : null)
+ conda "bioconda::bioconductor-dada2=1.22.0 conda-forge::r-digest=0.6.30"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bioconductor-dada2:1.22.0--r41h399db7b_0' :
'quay.io/biocontainers/bioconductor-dada2:1.22.0--r41h399db7b_0' }"
@@ -13,6 +13,7 @@ process DADA2_ERR {
output:
tuple val(meta), path("*.err.rds"), emit: errormodel
tuple val(meta), path("*.err.pdf"), emit: pdf
+ tuple val(meta), path("*.err.svg"), emit: svg
tuple val(meta), path("*.err.log"), emit: log
tuple val(meta), path("*.err.convergence.txt"), emit: convergence
path "versions.yml" , emit: versions
@@ -43,10 +44,16 @@ process DADA2_ERR {
pdf("${meta.run}_1.err.pdf")
plotErrors(errF, nominalQ = TRUE)
dev.off()
+ svg("${meta.run}_1.err.svg")
+ plotErrors(errF, nominalQ = TRUE)
+ dev.off()
pdf("${meta.run}_2.err.pdf")
plotErrors(errR, nominalQ = TRUE)
dev.off()
+ svg("${meta.run}_2.err.svg")
+ plotErrors(errR, nominalQ = TRUE)
+ dev.off()
sink(file = "${meta.run}_1.err.convergence.txt")
dada2:::checkConvergence(errF)
@@ -75,6 +82,9 @@ process DADA2_ERR {
pdf("${meta.run}.err.pdf")
plotErrors(errF, nominalQ = TRUE)
dev.off()
+ svg("${meta.run}.err.svg")
+ plotErrors(errF, nominalQ = TRUE)
+ dev.off()
sink(file = "${meta.run}.err.convergence.txt")
dada2:::checkConvergence(errF)
diff --git a/modules/local/dada2_filtntrim.nf b/modules/local/dada2_filtntrim.nf
index f6d0396a..26982a51 100644
--- a/modules/local/dada2_filtntrim.nf
+++ b/modules/local/dada2_filtntrim.nf
@@ -2,7 +2,7 @@ process DADA2_FILTNTRIM {
tag "$meta.id"
label 'process_medium'
- conda (params.enable_conda ? "bioconductor-dada2=1.22.0" : null)
+ conda "bioconda::bioconductor-dada2=1.22.0 conda-forge::r-digest=0.6.30"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bioconductor-dada2:1.22.0--r41h399db7b_0' :
'quay.io/biocontainers/bioconductor-dada2:1.22.0--r41h399db7b_0' }"
diff --git a/modules/local/dada2_merge.nf b/modules/local/dada2_merge.nf
index 895af22f..edf5a47a 100644
--- a/modules/local/dada2_merge.nf
+++ b/modules/local/dada2_merge.nf
@@ -1,7 +1,7 @@
process DADA2_MERGE {
label 'process_low'
- conda (params.enable_conda ? "bioconductor-dada2=1.22.0" : null)
+ conda "bioconda::bioconductor-dada2=1.22.0 conda-forge::r-digest=0.6.30"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bioconductor-dada2:1.22.0--r41h399db7b_0' :
'quay.io/biocontainers/bioconductor-dada2:1.22.0--r41h399db7b_0' }"
diff --git a/modules/local/dada2_quality.nf b/modules/local/dada2_quality.nf
index 867a1870..4e64772d 100644
--- a/modules/local/dada2_quality.nf
+++ b/modules/local/dada2_quality.nf
@@ -2,7 +2,7 @@ process DADA2_QUALITY {
tag "$meta"
label 'process_low'
- conda (params.enable_conda ? "bioconductor-dada2=1.22.0" : null)
+ conda "bioconda::bioconductor-dada2=1.22.0 conda-forge::r-digest=0.6.30"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bioconductor-dada2:1.22.0--r41h399db7b_0' :
'quay.io/biocontainers/bioconductor-dada2:1.22.0--r41h399db7b_0' }"
@@ -11,7 +11,8 @@ process DADA2_QUALITY {
tuple val(meta), path(reads)
output:
- path "*_qual_stats.pdf" , emit: pdf
+ path "*_qual_stats.pdf" , emit: pdf
+ path "*_qual_stats.svg" , emit: svg
tuple val(meta), path("*_qual_stats.tsv"), emit: tsv
path "versions.yml" , emit: versions
path "*.args.txt" , emit: args
@@ -65,6 +66,9 @@ process DADA2_QUALITY {
pdf(paste0("${prefix}_qual_stats",".pdf"))
plot
dev.off()
+ svg(paste0("${prefix}_qual_stats",".svg"))
+ plot
+ dev.off()
write.table(paste0('plotQualityProfile\t$args\nmax_files\t',max_files), file = "${prefix}_plotQualityProfile.args.txt", row.names = FALSE, col.names = FALSE, quote = FALSE, na = '')
writeLines(c("\\"${task.process}\\":", paste0(" R: ", paste0(R.Version()[c("major","minor")], collapse = ".")),paste0(" dada2: ", packageVersion("dada2")),paste0(" ShortRead: ", packageVersion("ShortRead")) ), "versions.yml")
diff --git a/modules/local/dada2_rmchimera.nf b/modules/local/dada2_rmchimera.nf
index 20f5466c..f3fa542e 100644
--- a/modules/local/dada2_rmchimera.nf
+++ b/modules/local/dada2_rmchimera.nf
@@ -2,7 +2,7 @@ process DADA2_RMCHIMERA {
tag "$meta.run"
label 'process_medium'
- conda (params.enable_conda ? "bioconductor-dada2=1.22.0" : null)
+ conda "bioconda::bioconductor-dada2=1.22.0 conda-forge::r-digest=0.6.30"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bioconductor-dada2:1.22.0--r41h399db7b_0' :
'quay.io/biocontainers/bioconductor-dada2:1.22.0--r41h399db7b_0' }"
diff --git a/modules/local/dada2_stats.nf b/modules/local/dada2_stats.nf
index ce64e2ba..05ac3737 100644
--- a/modules/local/dada2_stats.nf
+++ b/modules/local/dada2_stats.nf
@@ -2,7 +2,7 @@ process DADA2_STATS {
tag "$meta.run"
label 'process_low'
- conda (params.enable_conda ? "bioconductor-dada2=1.22.0" : null)
+ conda "bioconda::bioconductor-dada2=1.22.0 conda-forge::r-digest=0.6.30"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bioconductor-dada2:1.22.0--r41h399db7b_0' :
'quay.io/biocontainers/bioconductor-dada2:1.22.0--r41h399db7b_0' }"
diff --git a/modules/local/dada2_taxonomy.nf b/modules/local/dada2_taxonomy.nf
index 46cdb1df..c9a65381 100644
--- a/modules/local/dada2_taxonomy.nf
+++ b/modules/local/dada2_taxonomy.nf
@@ -2,7 +2,7 @@ process DADA2_TAXONOMY {
tag "${fasta},${database}"
label 'process_high'
- conda (params.enable_conda ? "bioconductor-dada2=1.22.0" : null)
+ conda "bioconda::bioconductor-dada2=1.22.0 conda-forge::r-digest=0.6.30"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bioconductor-dada2:1.22.0--r41h399db7b_0' :
'quay.io/biocontainers/bioconductor-dada2:1.22.0--r41h399db7b_0' }"
@@ -26,7 +26,7 @@ process DADA2_TAXONOMY {
def args = task.ext.args ?: ''
def taxlevels = taxlevels_input ?
'c("' + taxlevels_input.split(",").join('","') + '")' :
- 'c("Domain", "Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")'
+ 'c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")'
def seed = task.ext.seed ?: '100'
"""
#!/usr/bin/env Rscript
diff --git a/modules/local/filter_len_asv.nf b/modules/local/filter_len_asv.nf
index 388a200d..c7fc41d2 100644
--- a/modules/local/filter_len_asv.nf
+++ b/modules/local/filter_len_asv.nf
@@ -2,7 +2,7 @@ process FILTER_LEN_ASV {
tag "${fasta}"
label 'process_low'
- conda (params.enable_conda ? "bioconductor::biostrings=2.58.0" : null)
+ conda "bioconda::bioconductor-biostrings=2.58.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bioconductor-biostrings:2.58.0--r40h037d062_0' :
'quay.io/biocontainers/bioconductor-biostrings:2.58.0--r40h037d062_0' }"
@@ -25,6 +25,8 @@ process FILTER_LEN_ASV {
script:
def min_len_asv = params.min_len_asv ?: '1'
def max_len_asv = params.max_len_asv ?: '1000000'
+
+ def read_table = table ? "table <- read.table(file = '$table', sep = '\t', comment.char = '', header=TRUE)" : "table <- data.frame(matrix(ncol = 1, nrow = 0))"
"""
#!/usr/bin/env Rscript
@@ -32,7 +34,7 @@ process FILTER_LEN_ASV {
suppressPackageStartupMessages(library(Biostrings))
#read abundance file, first column is ASV_ID
- table <- read.table(file = "$table", sep = '\t', comment.char = "", header=TRUE)
+ $read_table
colnames(table)[1] <- "ASV_ID"
#read fasta file of ASV sequences
diff --git a/modules/local/filter_ssu.nf b/modules/local/filter_ssu.nf
index ec740553..21b04705 100644
--- a/modules/local/filter_ssu.nf
+++ b/modules/local/filter_ssu.nf
@@ -2,7 +2,7 @@ process FILTER_SSU {
tag "${fasta}"
label 'process_low'
- conda (params.enable_conda ? "bioconductor::biostrings=2.58.0" : null)
+ conda "bioconductor::biostrings=2.58.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bioconductor-biostrings:2.58.0--r40h037d062_0' :
'quay.io/biocontainers/bioconductor-biostrings:2.58.0--r40h037d062_0' }"
@@ -10,7 +10,7 @@ process FILTER_SSU {
input:
path(fasta)
path(table)
- path(lists)
+ path(barrnap_summary)
output:
path( "stats.ssu.tsv" ) , emit: stats
@@ -29,22 +29,23 @@ process FILTER_SSU {
#load packages
suppressPackageStartupMessages(library(Biostrings))
- #use only selected kingdom
- dir.create("./selection")
kingdom <- as.list(strsplit("$kingdom", ",")[[1]])
- for (x in kingdom) {
- file.copy(paste0(x,".matches.txt"), paste0("./selection/",x,".matches.txt"))
- }
- files = list.files(path = "./selection", pattern="*matches.txt", full.names = TRUE)
- #error if (all) file(s) is/are empty
- if ( all(file.size(files) == 0L) ) stop("Chosen kingdom(s) by --filter_ssu has no matches. Please choose a diffferent kingdom or omit filtering.")
- files = files[file.size(files) != 0L]
+ df = read.table("$barrnap_summary", header = TRUE, sep = "\t", stringsAsFactors = FALSE)
+ # keep only ASV_ID & eval columns & sort
+ df <- subset(df, select = c(ASV_ID,mito_eval,euk_eval,arc_eval,bac_eval))
- #read positive ID lists
- list = do.call(rbind, lapply(files, function(x) read.csv(x, stringsAsFactors = FALSE, header = FALSE)))
- list = unique(list)
- colnames(list)[1] <- "ID"
+ # choose kingdom (column) with lowest evalue
+ df[is.na(df)] <- 1
+ df\$result = colnames(df[,2:5])[apply(df[,2:5],1,which.min)]
+ df\$result = gsub("_eval", "", df\$result)
+
+ # filter ASVs
+ df_filtered = subset(df, df\$result %in% kingdom)
+ id_filtered = subset(df_filtered, select = c(ASV_ID))
+
+ #error if all ASVs are removed
+ if ( nrow(df_filtered) == 0 ) stop("Chosen kingdom(s) by --filter_ssu has no matches. Please choose a different kingdom (domain) or omit filtering.")
#read abundance file, first column is ASV_ID
table <- read.table(file = "$table", sep = '\t', comment.char = "", header=TRUE)
@@ -55,12 +56,12 @@ process FILTER_SSU {
seq <- data.frame(ID=names(seq), sequence=paste(seq))
#check if all ids match
- if(!all(list\$ID %in% seq\$ID)) {stop(paste(paste(files,sep=","),"and","$fasta","dont share all IDs, exit."), call.=FALSE)}
- if(!all(list\$ID %in% table\$ASV_ID)) {stop(paste(paste(files,sep=","),"and","$table","dont share all IDs, exit"), call.=FALSE)}
+ if(!all(id_filtered\$ID %in% seq\$ID)) {stop(paste(paste(files,sep=","),"and","$fasta","dont share all IDs, exit."), call.=FALSE)}
+ if(!all(id_filtered\$ID %in% table\$ASV_ID)) {stop(paste(paste(files,sep=","),"and","$table","dont share all IDs, exit"), call.=FALSE)}
#merge
- filtered_table <- merge(table, list, by.x="ASV_ID", by.y="ID", all.x=FALSE, all.y=TRUE)
- filtered_seq <- merge(seq, list, by.x="ID", by.y="ID", all.x=FALSE, all.y=TRUE)
+ filtered_table <- merge(table, id_filtered, by.x="ASV_ID", by.y="ASV_ID", all.x=FALSE, all.y=TRUE)
+ filtered_seq <- merge(seq, id_filtered, by.x="ID", by.y="ASV_ID", all.x=FALSE, all.y=TRUE)
#write
write.table(filtered_table, file = "ASV_table.ssu.tsv", row.names=FALSE, sep="\t", col.names = TRUE, quote = FALSE, na = '')
diff --git a/modules/local/filter_stats.nf b/modules/local/filter_stats.nf
index 679efee1..55a737c9 100644
--- a/modules/local/filter_stats.nf
+++ b/modules/local/filter_stats.nf
@@ -1,7 +1,7 @@
process FILTER_STATS {
label 'process_low'
- conda (params.enable_conda ? "pandas=1.1.5" : null)
+ conda "conda-forge::pandas=1.1.5"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/pandas:1.1.5' :
'quay.io/biocontainers/pandas:1.1.5' }"
diff --git a/modules/local/format_fastainput.nf b/modules/local/format_fastainput.nf
new file mode 100644
index 00000000..b97b3184
--- /dev/null
+++ b/modules/local/format_fastainput.nf
@@ -0,0 +1,28 @@
+process FORMAT_FASTAINPUT {
+ label 'process_low'
+
+ conda "conda-forge::sed=4.7"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
+ 'ubuntu:20.04' }"
+
+ input:
+ path(fastain)
+
+ output:
+ path "input.mod.fasta" , emit: fasta
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ """
+ cat $fastain | sed '/^>/s/\t/ /g' > input.mod.fasta
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ sed: \$(sed --version 2>&1 | sed -n 1p | sed 's/sed (GNU sed) //')
+ END_VERSIONS
+ """
+}
diff --git a/modules/local/format_taxonomy.nf b/modules/local/format_taxonomy.nf
index 093bb6d9..8e085a47 100644
--- a/modules/local/format_taxonomy.nf
+++ b/modules/local/format_taxonomy.nf
@@ -1,7 +1,7 @@
process FORMAT_TAXONOMY {
label 'process_low'
- conda (params.enable_conda ? "conda-forge::sed=4.7" : null)
+ conda "conda-forge::sed=4.7"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' :
'biocontainers/biocontainers:v1.2.0_cv1' }"
diff --git a/modules/local/format_taxonomy_qiime.nf b/modules/local/format_taxonomy_qiime.nf
index 32c5dcec..8ed2ba1a 100644
--- a/modules/local/format_taxonomy_qiime.nf
+++ b/modules/local/format_taxonomy_qiime.nf
@@ -2,7 +2,7 @@
process FORMAT_TAXONOMY_QIIME {
label 'process_low'
- conda (params.enable_conda ? "conda-forge::sed=4.7" : null)
+ conda "conda-forge::sed=4.7"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' :
'biocontainers/biocontainers:v1.2.0_cv1' }"
diff --git a/modules/local/format_taxresults.nf b/modules/local/format_taxresults.nf
index 49cf908f..ebe0115d 100644
--- a/modules/local/format_taxresults.nf
+++ b/modules/local/format_taxresults.nf
@@ -1,7 +1,7 @@
process FORMAT_TAXRESULTS {
label 'process_low'
- conda (params.enable_conda ? "pandas=1.1.5" : null)
+ conda "conda-forge::pandas=1.1.5"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/pandas:1.1.5' :
'quay.io/biocontainers/pandas:1.1.5' }"
diff --git a/modules/local/itsx_cutasv.nf b/modules/local/itsx_cutasv.nf
index 2c6e4a08..cb863be9 100644
--- a/modules/local/itsx_cutasv.nf
+++ b/modules/local/itsx_cutasv.nf
@@ -1,7 +1,7 @@
process ITSX_CUTASV {
label 'process_medium'
- conda (params.enable_conda ? "bioconda::itsx=1.1.3" : null)
+ conda "bioconda::itsx=1.1.3"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/itsx:1.1.3--hdfd78af_1' :
'quay.io/biocontainers/itsx:1.1.3--hdfd78af_1' }"
diff --git a/modules/local/merge_stats.nf b/modules/local/merge_stats.nf
index 170c10f6..d15ab7b3 100644
--- a/modules/local/merge_stats.nf
+++ b/modules/local/merge_stats.nf
@@ -1,7 +1,7 @@
process MERGE_STATS {
label 'process_low'
- conda (params.enable_conda ? "bioconductor-dada2=1.22.0" : null)
+ conda "bioconda::bioconductor-dada2=1.22.0 conda-forge::r-digest=0.6.30"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bioconductor-dada2:1.22.0--r41h399db7b_0' :
'quay.io/biocontainers/bioconductor-dada2:1.22.0--r41h399db7b_0' }"
diff --git a/modules/local/metadata_all.nf b/modules/local/metadata_all.nf
index 1e647806..8570e6b1 100644
--- a/modules/local/metadata_all.nf
+++ b/modules/local/metadata_all.nf
@@ -2,7 +2,7 @@ process METADATA_ALL {
tag "$metadata"
label 'process_low'
- conda (params.enable_conda ? "bioconductor-dada2=1.22.0" : null)
+ conda "bioconda::bioconductor-dada2=1.22.0 conda-forge::r-digest=0.6.30"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bioconductor-dada2:1.22.0--r41h399db7b_0' :
'quay.io/biocontainers/bioconductor-dada2:1.22.0--r41h399db7b_0' }"
diff --git a/modules/local/metadata_pairwise.nf b/modules/local/metadata_pairwise.nf
index 3c1421e6..cce98e51 100644
--- a/modules/local/metadata_pairwise.nf
+++ b/modules/local/metadata_pairwise.nf
@@ -2,7 +2,7 @@ process METADATA_PAIRWISE {
tag "$metadata"
label 'process_low'
- conda (params.enable_conda ? "bioconductor-dada2=1.22.0" : null)
+ conda "bioconda::bioconductor-dada2=1.22.0 conda-forge::r-digest=0.6.30"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bioconductor-dada2:1.22.0--r41h399db7b_0' :
'quay.io/biocontainers/bioconductor-dada2:1.22.0--r41h399db7b_0' }"
diff --git a/modules/local/novaseq_err.nf b/modules/local/novaseq_err.nf
index bc688a3e..2db53cbc 100644
--- a/modules/local/novaseq_err.nf
+++ b/modules/local/novaseq_err.nf
@@ -2,7 +2,7 @@ process NOVASEQ_ERR {
tag "$meta.run"
label 'process_medium'
- conda (params.enable_conda ? "bioconductor-dada2=1.22.0" : null)
+ conda "bioconda::bioconductor-dada2=1.22.0 conda-forge::r-digest=0.6.30"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bioconductor-dada2:1.22.0--r41h399db7b_0' :
'quay.io/biocontainers/bioconductor-dada2:1.22.0--r41h399db7b_0' }"
@@ -12,6 +12,7 @@ process NOVASEQ_ERR {
output:
tuple val(meta), path("*.md.err.rds"), emit: errormodel
tuple val(meta), path("*.md.err.pdf"), emit: pdf
+ tuple val(meta), path("*.md.err.svg"), emit: svg
tuple val(meta), path("*.md.err.convergence.txt"), emit: convergence
path "versions.yml" , emit: versions
diff --git a/modules/local/picrust.nf b/modules/local/picrust.nf
index f582b568..16192665 100644
--- a/modules/local/picrust.nf
+++ b/modules/local/picrust.nf
@@ -2,7 +2,7 @@ process PICRUST {
tag "${seq},${abund}"
label 'process_medium'
- conda (params.enable_conda ? "bioconda::picrust2=2.5.0" : null)
+ conda "bioconda::picrust2=2.5.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/picrust2:2.5.0--pyhdfd78af_0' :
'quay.io/biocontainers/picrust2:2.5.0--pyhdfd78af_0' }"
diff --git a/modules/local/qiime2_alphararefaction.nf b/modules/local/qiime2_alphararefaction.nf
index b6ba562d..9d656840 100644
--- a/modules/local/qiime2_alphararefaction.nf
+++ b/modules/local/qiime2_alphararefaction.nf
@@ -1,8 +1,12 @@
process QIIME2_ALPHARAREFACTION {
label 'process_low'
- conda (params.enable_conda ? { exit 1 "QIIME2 has no conda package" } : null)
- container "quay.io/qiime2/core:2022.8"
+ container "quay.io/qiime2/core:2022.11"
+
+ // Exit if running this module with -profile conda / -profile mamba
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ exit 1, "QIIME2 does not support Conda. Please use Docker / Singularity / Podman instead."
+ }
input:
path(metadata)
diff --git a/modules/local/qiime2_ancom_asv.nf b/modules/local/qiime2_ancom_asv.nf
index 2af65c8e..322b414e 100644
--- a/modules/local/qiime2_ancom_asv.nf
+++ b/modules/local/qiime2_ancom_asv.nf
@@ -5,8 +5,12 @@ process QIIME2_ANCOM_ASV {
label 'process_long'
label 'error_ignore'
- conda (params.enable_conda ? { exit 1 "QIIME2 has no conda package" } : null)
- container "quay.io/qiime2/core:2022.8"
+ container "quay.io/qiime2/core:2022.11"
+
+ // Exit if running this module with -profile conda / -profile mamba
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ exit 1, "QIIME2 does not support Conda. Please use Docker / Singularity / Podman instead."
+ }
input:
tuple path(metadata), path(table)
diff --git a/modules/local/qiime2_ancom_tax.nf b/modules/local/qiime2_ancom_tax.nf
index e0829fb8..9f5392ef 100644
--- a/modules/local/qiime2_ancom_tax.nf
+++ b/modules/local/qiime2_ancom_tax.nf
@@ -3,8 +3,12 @@ process QIIME2_ANCOM_TAX {
label 'process_medium'
label 'single_cpu'
- conda (params.enable_conda ? { exit 1 "QIIME2 has no conda package" } : null)
- container "quay.io/qiime2/core:2022.8"
+ container "quay.io/qiime2/core:2022.11"
+
+ // Exit if running this module with -profile conda / -profile mamba
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ exit 1, "QIIME2 does not support Conda. Please use Docker / Singularity / Podman instead."
+ }
input:
tuple path(metadata), path(table), path(taxonomy) ,val(taxlevel)
diff --git a/modules/local/qiime2_barplot.nf b/modules/local/qiime2_barplot.nf
index b86d1ddc..3e83ab02 100644
--- a/modules/local/qiime2_barplot.nf
+++ b/modules/local/qiime2_barplot.nf
@@ -1,8 +1,12 @@
process QIIME2_BARPLOT {
label 'process_low'
- conda (params.enable_conda ? { exit 1 "QIIME2 has no conda package" } : null)
- container "quay.io/qiime2/core:2022.8"
+ container "quay.io/qiime2/core:2022.11"
+
+ // Exit if running this module with -profile conda / -profile mamba
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ exit 1, "QIIME2 does not support Conda. Please use Docker / Singularity / Podman instead."
+ }
input:
path(metadata)
diff --git a/modules/local/qiime2_classify.nf b/modules/local/qiime2_classify.nf
index 2a1396bf..f5a4824d 100644
--- a/modules/local/qiime2_classify.nf
+++ b/modules/local/qiime2_classify.nf
@@ -2,8 +2,12 @@ process QIIME2_CLASSIFY {
tag "${repseq},${trained_classifier}"
label 'process_high'
- conda (params.enable_conda ? { exit 1 "QIIME2 has no conda package" } : null)
- container "quay.io/qiime2/core:2022.8"
+ container "quay.io/qiime2/core:2022.11"
+
+ // Exit if running this module with -profile conda / -profile mamba
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ exit 1, "QIIME2 does not support Conda. Please use Docker / Singularity / Podman instead."
+ }
input:
path(trained_classifier)
diff --git a/modules/local/qiime2_diversity_adonis.nf b/modules/local/qiime2_diversity_adonis.nf
index e845bd3d..25bc95f8 100644
--- a/modules/local/qiime2_diversity_adonis.nf
+++ b/modules/local/qiime2_diversity_adonis.nf
@@ -2,8 +2,12 @@ process QIIME2_DIVERSITY_ADONIS {
tag "${core.baseName} - ${formula}"
label 'process_low'
- conda (params.enable_conda ? { exit 1 "QIIME2 has no conda package" } : null)
- container "quay.io/qiime2/core:2022.8"
+ container "quay.io/qiime2/core:2022.11"
+
+ // Exit if running this module with -profile conda / -profile mamba
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ exit 1, "QIIME2 does not support Conda. Please use Docker / Singularity / Podman instead."
+ }
input:
tuple path(metadata), path(core), val(formula)
diff --git a/modules/local/qiime2_diversity_alpha.nf b/modules/local/qiime2_diversity_alpha.nf
index e03adf98..dff59e3e 100644
--- a/modules/local/qiime2_diversity_alpha.nf
+++ b/modules/local/qiime2_diversity_alpha.nf
@@ -2,8 +2,12 @@ process QIIME2_DIVERSITY_ALPHA {
tag "${core.baseName}"
label 'process_low'
- conda (params.enable_conda ? { exit 1 "QIIME2 has no conda package" } : null)
- container "quay.io/qiime2/core:2022.8"
+ container "quay.io/qiime2/core:2022.11"
+
+ // Exit if running this module with -profile conda / -profile mamba
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ exit 1, "QIIME2 does not support Conda. Please use Docker / Singularity / Podman instead."
+ }
input:
tuple path(metadata), path(core)
diff --git a/modules/local/qiime2_diversity_beta.nf b/modules/local/qiime2_diversity_beta.nf
index 16c7cd5a..f6fc5ee7 100644
--- a/modules/local/qiime2_diversity_beta.nf
+++ b/modules/local/qiime2_diversity_beta.nf
@@ -2,8 +2,12 @@ process QIIME2_DIVERSITY_BETA {
tag "${core.baseName} - ${category}"
label 'process_low'
- conda (params.enable_conda ? { exit 1 "QIIME2 has no conda package" } : null)
- container "quay.io/qiime2/core:2022.8"
+ container "quay.io/qiime2/core:2022.11"
+
+ // Exit if running this module with -profile conda / -profile mamba
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ exit 1, "QIIME2 does not support Conda. Please use Docker / Singularity / Podman instead."
+ }
input:
tuple path(metadata), path(core), val(category)
diff --git a/modules/local/qiime2_diversity_betaord.nf b/modules/local/qiime2_diversity_betaord.nf
index 797abbb6..7b2699a4 100644
--- a/modules/local/qiime2_diversity_betaord.nf
+++ b/modules/local/qiime2_diversity_betaord.nf
@@ -2,8 +2,12 @@ process QIIME2_DIVERSITY_BETAORD {
tag "${core.baseName}"
label 'process_low'
- conda (params.enable_conda ? { exit 1 "QIIME2 has no conda package" } : null)
- container "quay.io/qiime2/core:2022.8"
+ container "quay.io/qiime2/core:2022.11"
+
+ // Exit if running this module with -profile conda / -profile mamba
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ exit 1, "QIIME2 does not support Conda. Please use Docker / Singularity / Podman instead."
+ }
input:
tuple path(metadata), path(core)
diff --git a/modules/local/qiime2_diversity_core.nf b/modules/local/qiime2_diversity_core.nf
index 057d5bcb..ebfbff2f 100644
--- a/modules/local/qiime2_diversity_core.nf
+++ b/modules/local/qiime2_diversity_core.nf
@@ -1,14 +1,19 @@
process QIIME2_DIVERSITY_CORE {
label 'process_low'
- conda (params.enable_conda ? { exit 1 "QIIME2 has no conda package" } : null)
- container "quay.io/qiime2/core:2022.8"
+ container "quay.io/qiime2/core:2022.11"
+
+ // Exit if running this module with -profile conda / -profile mamba
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ exit 1, "QIIME2 does not support Conda. Please use Docker / Singularity / Podman instead."
+ }
input:
path(metadata)
path(table)
path(tree)
path(stats)
+ val(mindepth)
output:
path("diversity_core/*_pcoa_results.qza") , emit: pcoa
@@ -25,6 +30,7 @@ process QIIME2_DIVERSITY_CORE {
export XDG_CONFIG_HOME="\${PWD}/HOME"
mindepth=\$(count_table_minmax_reads.py $stats minimum 2>&1)
+ if [ \"\$mindepth\" -lt \"$mindepth\" ]; then mindepth=$mindepth; fi
if [ \"\$mindepth\" -gt \"10000\" ]; then echo \$mindepth >\"Use the sampling depth of \$mindepth for rarefaction.txt\" ; fi
if [ \"\$mindepth\" -lt \"10000\" -a \"\$mindepth\" -gt \"5000\" ]; then echo \$mindepth >\"WARNING The sampling depth of \$mindepth is quite small for rarefaction.txt\" ; fi
if [ \"\$mindepth\" -lt \"5000\" -a \"\$mindepth\" -gt \"1000\" ]; then echo \$mindepth >\"WARNING The sampling depth of \$mindepth is very small for rarefaction.txt\" ; fi
diff --git a/modules/local/qiime2_export_absolute.nf b/modules/local/qiime2_export_absolute.nf
index e436c21f..624547d5 100644
--- a/modules/local/qiime2_export_absolute.nf
+++ b/modules/local/qiime2_export_absolute.nf
@@ -1,8 +1,12 @@
process QIIME2_EXPORT_ABSOLUTE {
label 'process_low'
- conda (params.enable_conda ? { exit 1 "QIIME2 has no conda package" } : null)
- container "quay.io/qiime2/core:2022.8"
+ container "quay.io/qiime2/core:2022.11"
+
+ // Exit if running this module with -profile conda / -profile mamba
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ exit 1, "QIIME2 does not support Conda. Please use Docker / Singularity / Podman instead."
+ }
input:
path(table)
diff --git a/modules/local/qiime2_export_relasv.nf b/modules/local/qiime2_export_relasv.nf
index 52b5b0e3..a5b81388 100644
--- a/modules/local/qiime2_export_relasv.nf
+++ b/modules/local/qiime2_export_relasv.nf
@@ -1,8 +1,12 @@
process QIIME2_EXPORT_RELASV {
label 'process_low'
- conda (params.enable_conda ? { exit 1 "QIIME2 has no conda package" } : null)
- container "quay.io/qiime2/core:2022.8"
+ container "quay.io/qiime2/core:2022.11"
+
+ // Exit if running this module with -profile conda / -profile mamba
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ exit 1, "QIIME2 does not support Conda. Please use Docker / Singularity / Podman instead."
+ }
input:
path(table)
diff --git a/modules/local/qiime2_export_reltax.nf b/modules/local/qiime2_export_reltax.nf
index 4b813a61..8f090b07 100644
--- a/modules/local/qiime2_export_reltax.nf
+++ b/modules/local/qiime2_export_reltax.nf
@@ -1,8 +1,12 @@
process QIIME2_EXPORT_RELTAX {
label 'process_low'
- conda (params.enable_conda ? { exit 1 "QIIME2 has no conda package" } : null)
- container "quay.io/qiime2/core:2022.8"
+ container "quay.io/qiime2/core:2022.11"
+
+ // Exit if running this module with -profile conda / -profile mamba
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ exit 1, "QIIME2 does not support Conda. Please use Docker / Singularity / Podman instead."
+ }
input:
path(table)
diff --git a/modules/local/qiime2_extract.nf b/modules/local/qiime2_extract.nf
index 0be7baec..6f686906 100644
--- a/modules/local/qiime2_extract.nf
+++ b/modules/local/qiime2_extract.nf
@@ -3,8 +3,12 @@ process QIIME2_EXTRACT {
label 'process_low'
label 'single_cpu'
- conda (params.enable_conda ? { exit 1 "QIIME2 has no conda package" } : null)
- container "quay.io/qiime2/core:2022.8"
+ container "quay.io/qiime2/core:2022.11"
+
+ // Exit if running this module with -profile conda / -profile mamba
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ exit 1, "QIIME2 does not support Conda. Please use Docker / Singularity / Podman instead."
+ }
input:
tuple val(meta), path(database)
diff --git a/modules/local/qiime2_featuretable_group.nf b/modules/local/qiime2_featuretable_group.nf
index aeddbb73..71e9a9b2 100644
--- a/modules/local/qiime2_featuretable_group.nf
+++ b/modules/local/qiime2_featuretable_group.nf
@@ -2,8 +2,12 @@ process QIIME2_FEATURETABLE_GROUP {
tag "${category}"
label 'process_low'
- conda (params.enable_conda ? { exit 1 "QIIME2 has no conda package" } : null)
- container "quay.io/qiime2/core:2022.8"
+ container "quay.io/qiime2/core:2022.11"
+
+ // Exit if running this module with -profile conda / -profile mamba
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ exit 1, "QIIME2 does not support Conda. Please use Docker / Singularity / Podman instead."
+ }
input:
tuple path(table), path(metadata), val(category)
diff --git a/modules/local/qiime2_filterasv.nf b/modules/local/qiime2_filterasv.nf
deleted file mode 100644
index 8fa690d5..00000000
--- a/modules/local/qiime2_filterasv.nf
+++ /dev/null
@@ -1,33 +0,0 @@
-process QIIME2_FILTERASV {
- tag "${category}"
- label 'process_low'
-
- conda (params.enable_conda ? { exit 1 "QIIME2 has no conda package" } : null)
- container "quay.io/qiime2/core:2022.8"
-
- input:
- tuple path(metadata), path(table), val(category)
-
- output:
- path("*.qza") , emit: qza
- path "versions.yml" , emit: versions
-
- when:
- task.ext.when == null || task.ext.when
-
- script:
- """
- export XDG_CONFIG_HOME="\${PWD}/HOME"
-
- qiime feature-table filter-samples \\
- --i-table ${table} \\
- --m-metadata-file ${metadata} \\
- --p-where \"${category}<>\'\'\" \\
- --o-filtered-table ${category}.qza
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- qiime2: \$( qiime --version | sed '1!d;s/.* //' )
- END_VERSIONS
- """
-}
diff --git a/modules/local/qiime2_filtersamples.nf b/modules/local/qiime2_filtersamples.nf
new file mode 100644
index 00000000..6a4a7310
--- /dev/null
+++ b/modules/local/qiime2_filtersamples.nf
@@ -0,0 +1,39 @@
+process QIIME2_FILTERSAMPLES {
+ tag "${filter}"
+ label 'process_low'
+
+ container "quay.io/qiime2/core:2022.11"
+
+ // Exit if running this module with -profile conda / -profile mamba
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ exit 1, "QIIME2 does not support Conda. Please use Docker / Singularity / Podman instead."
+ }
+
+ input:
+ tuple path(metadata), path(table), val(filter)
+
+ output:
+ path("*.qza") , emit: qza
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: "--p-where \'${filter}<>\"\"\'"
+ def prefix = task.ext.prefix ?: "${filter}"
+ """
+ export XDG_CONFIG_HOME="\${PWD}/HOME"
+
+ qiime feature-table filter-samples \\
+ --i-table ${table} \\
+ --m-metadata-file ${metadata} \\
+ $args \\
+ --o-filtered-table ${prefix}.qza
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ qiime2: \$( qiime --version | sed '1!d;s/.* //' )
+ END_VERSIONS
+ """
+}
diff --git a/modules/local/qiime2_filtertaxa.nf b/modules/local/qiime2_filtertaxa.nf
index d6770168..0a25803e 100644
--- a/modules/local/qiime2_filtertaxa.nf
+++ b/modules/local/qiime2_filtertaxa.nf
@@ -2,8 +2,12 @@ process QIIME2_FILTERTAXA {
tag "taxa:${exclude_taxa};min-freq:${min_frequency};min-samples:${min_samples}"
label 'process_low'
- conda (params.enable_conda ? { exit 1 "QIIME2 has no conda package" } : null)
- container "quay.io/qiime2/core:2022.8"
+ container "quay.io/qiime2/core:2022.11"
+
+ // Exit if running this module with -profile conda / -profile mamba
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ exit 1, "QIIME2 does not support Conda. Please use Docker / Singularity / Podman instead."
+ }
input:
path(table)
diff --git a/modules/local/qiime2_inasv.nf b/modules/local/qiime2_inasv.nf
index 19e4e6ef..348aea87 100644
--- a/modules/local/qiime2_inasv.nf
+++ b/modules/local/qiime2_inasv.nf
@@ -2,8 +2,12 @@ process QIIME2_INASV {
tag "${asv}"
label 'process_low'
- conda (params.enable_conda ? { exit 1 "QIIME2 has no conda package" } : null)
- container "quay.io/qiime2/core:2022.8"
+ container "quay.io/qiime2/core:2022.11"
+
+ // Exit if running this module with -profile conda / -profile mamba
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ exit 1, "QIIME2 does not support Conda. Please use Docker / Singularity / Podman instead."
+ }
input:
path(asv)
diff --git a/modules/local/qiime2_inseq.nf b/modules/local/qiime2_inseq.nf
index c615ca46..a0504053 100644
--- a/modules/local/qiime2_inseq.nf
+++ b/modules/local/qiime2_inseq.nf
@@ -2,8 +2,12 @@ process QIIME2_INSEQ {
tag "${seq}"
label 'process_low'
- conda (params.enable_conda ? { exit 1 "QIIME2 has no conda package" } : null)
- container "quay.io/qiime2/core:2022.8"
+ container "quay.io/qiime2/core:2022.11"
+
+ // Exit if running this module with -profile conda / -profile mamba
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ exit 1, "QIIME2 does not support Conda. Please use Docker / Singularity / Podman instead."
+ }
input:
path(seq)
diff --git a/modules/local/qiime2_intax.nf b/modules/local/qiime2_intax.nf
index 286d04be..0e6c69e1 100644
--- a/modules/local/qiime2_intax.nf
+++ b/modules/local/qiime2_intax.nf
@@ -2,8 +2,12 @@ process QIIME2_INTAX {
tag "${tax}"
label 'process_low'
- conda (params.enable_conda ? { exit 1 "QIIME2 has no conda package" } : null)
- container "quay.io/qiime2/core:2022.8"
+ container "quay.io/qiime2/core:2022.11"
+
+ // Exit if running this module with -profile conda / -profile mamba
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ exit 1, "QIIME2 does not support Conda. Please use Docker / Singularity / Podman instead."
+ }
input:
path(tax) //ASV_tax_species.tsv
diff --git a/modules/local/qiime2_train.nf b/modules/local/qiime2_train.nf
index da7fbf8f..254118f8 100644
--- a/modules/local/qiime2_train.nf
+++ b/modules/local/qiime2_train.nf
@@ -3,8 +3,12 @@ process QIIME2_TRAIN {
label 'process_high'
label 'single_cpu'
- conda (params.enable_conda ? { exit 1 "QIIME2 has no conda package" } : null)
- container "quay.io/qiime2/core:2022.8"
+ container "quay.io/qiime2/core:2022.11"
+
+ // Exit if running this module with -profile conda / -profile mamba
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ exit 1, "QIIME2 does not support Conda. Please use Docker / Singularity / Podman instead."
+ }
input:
tuple val(meta), path(qza)
diff --git a/modules/local/qiime2_tree.nf b/modules/local/qiime2_tree.nf
index 626a0f49..5fc32fed 100644
--- a/modules/local/qiime2_tree.nf
+++ b/modules/local/qiime2_tree.nf
@@ -1,8 +1,12 @@
process QIIME2_TREE {
label 'process_medium'
- conda (params.enable_conda ? { exit 1 "QIIME2 has no conda package" } : null)
- container "quay.io/qiime2/core:2022.8"
+ container "quay.io/qiime2/core:2022.11"
+
+ // Exit if running this module with -profile conda / -profile mamba
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ exit 1, "QIIME2 does not support Conda. Please use Docker / Singularity / Podman instead."
+ }
input:
path(repseq)
diff --git a/modules/local/rename_raw_data_files.nf b/modules/local/rename_raw_data_files.nf
index 49a10c3d..3f277c7a 100644
--- a/modules/local/rename_raw_data_files.nf
+++ b/modules/local/rename_raw_data_files.nf
@@ -2,7 +2,7 @@ process RENAME_RAW_DATA_FILES {
tag "$meta.id"
label 'process_low'
- conda (params.enable_conda ? "conda-forge::sed=4.7" : null)
+ conda "conda-forge::sed=4.7"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
'ubuntu:20.04' }"
diff --git a/modules/local/sbdiexport.nf b/modules/local/sbdiexport.nf
index 0d037bd1..c2c347cb 100644
--- a/modules/local/sbdiexport.nf
+++ b/modules/local/sbdiexport.nf
@@ -2,7 +2,7 @@ process SBDIEXPORT {
tag "${asvtable},${taxonomytable},${metadata}"
label 'process_low'
- conda (params.enable_conda ? "bioconda::r-tidyverse=1.2.1" : null)
+ conda "conda-forge::r-tidyverse=1.2.1"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/r-tidyverse:1.2.1' :
'quay.io/biocontainers/r-tidyverse:1.2.1' }"
diff --git a/modules/local/sbdiexportreannotate.nf b/modules/local/sbdiexportreannotate.nf
index aae48eb2..8036fafc 100644
--- a/modules/local/sbdiexportreannotate.nf
+++ b/modules/local/sbdiexportreannotate.nf
@@ -2,13 +2,14 @@ process SBDIEXPORTREANNOTATE {
tag "${taxonomytable}"
label 'process_low'
- conda (params.enable_conda ? "bioconda::r-tidyverse=1.2.1" : null)
+ conda "conda-forge::r-tidyverse=1.2.1"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/r-tidyverse:1.2.1' :
'quay.io/biocontainers/r-tidyverse:1.2.1' }"
input:
path taxonomytable
+ path predictions
output:
path "*.tsv" , emit: sbdiannottables
@@ -19,7 +20,13 @@ process SBDIEXPORTREANNOTATE {
script:
"""
- sbdiexportreannotate.R ${params.dada_ref_databases[params.dada_ref_taxonomy]["dbversion"]} $taxonomytable
+ if [[ $workflow.manifest.version == *dev ]]; then
+ ampliseq_version="v$workflow.manifest.version, revision: ${workflow.scriptId.substring(0,10)}"
+ else
+ ampliseq_version="v$workflow.manifest.version"
+ fi
+
+ sbdiexportreannotate.R \"${params.dada_ref_databases[params.dada_ref_taxonomy]["dbversion"]}\" $taxonomytable \"\$ampliseq_version\" $predictions
cat <<-END_VERSIONS > versions.yml
"${task.process}":
diff --git a/modules/local/trunclen.nf b/modules/local/trunclen.nf
index 30d2efdc..faea5bf0 100644
--- a/modules/local/trunclen.nf
+++ b/modules/local/trunclen.nf
@@ -2,7 +2,7 @@ process TRUNCLEN {
tag "$meta"
label 'process_low'
- conda (params.enable_conda ? "pandas=1.1.5" : null)
+ conda "conda-forge::pandas=1.1.5"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/pandas:1.1.5' :
'quay.io/biocontainers/pandas:1.1.5' }"
diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf
index cebb6e05..3df21765 100644
--- a/modules/nf-core/custom/dumpsoftwareversions/main.nf
+++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf
@@ -2,7 +2,7 @@ process CUSTOM_DUMPSOFTWAREVERSIONS {
label 'process_single'
// Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container
- conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null)
+ conda "bioconda::multiqc=1.13"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' :
'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }"
diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
old mode 100644
new mode 100755
index d1390392..e55b8d43
--- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
+++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
@@ -1,11 +1,17 @@
#!/usr/bin/env python
-import yaml
+
+"""Provide functions to merge multiple versions.yml files."""
+
+
import platform
from textwrap import dedent
+import yaml
+
def _make_versions_html(versions):
+ """Generate a tabular HTML output of all versions for MultiQC."""
html = [
dedent(
"""\\
@@ -44,46 +50,53 @@ def _make_versions_html(versions):
return "\\n".join(html)
-versions_this_module = {}
-versions_this_module["${task.process}"] = {
- "python": platform.python_version(),
- "yaml": yaml.__version__,
-}
-
-with open("$versions") as f:
- versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module
-
-# aggregate versions by the module name (derived from fully-qualified process name)
-versions_by_module = {}
-for process, process_versions in versions_by_process.items():
- module = process.split(":")[-1]
- try:
- assert versions_by_module[module] == process_versions, (
- "We assume that software versions are the same between all modules. "
- "If you see this error-message it means you discovered an edge-case "
- "and should open an issue in nf-core/tools. "
- )
- except KeyError:
- versions_by_module[module] = process_versions
-
-versions_by_module["Workflow"] = {
- "Nextflow": "$workflow.nextflow.version",
- "$workflow.manifest.name": "$workflow.manifest.version",
-}
-
-versions_mqc = {
- "id": "software_versions",
- "section_name": "${workflow.manifest.name} Software Versions",
- "section_href": "https://github.com/${workflow.manifest.name}",
- "plot_type": "html",
- "description": "are collected at run time from the software output.",
- "data": _make_versions_html(versions_by_module),
-}
-
-with open("software_versions.yml", "w") as f:
- yaml.dump(versions_by_module, f, default_flow_style=False)
-with open("software_versions_mqc.yml", "w") as f:
- yaml.dump(versions_mqc, f, default_flow_style=False)
-
-with open("versions.yml", "w") as f:
- yaml.dump(versions_this_module, f, default_flow_style=False)
+def main():
+ """Load all version files and generate merged output."""
+ versions_this_module = {}
+ versions_this_module["${task.process}"] = {
+ "python": platform.python_version(),
+ "yaml": yaml.__version__,
+ }
+
+ with open("$versions") as f:
+ versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module
+
+ # aggregate versions by the module name (derived from fully-qualified process name)
+ versions_by_module = {}
+ for process, process_versions in versions_by_process.items():
+ module = process.split(":")[-1]
+ try:
+ if versions_by_module[module] != process_versions:
+ raise AssertionError(
+ "We assume that software versions are the same between all modules. "
+ "If you see this error-message it means you discovered an edge-case "
+ "and should open an issue in nf-core/tools. "
+ )
+ except KeyError:
+ versions_by_module[module] = process_versions
+
+ versions_by_module["Workflow"] = {
+ "Nextflow": "$workflow.nextflow.version",
+ "$workflow.manifest.name": "$workflow.manifest.version",
+ }
+
+ versions_mqc = {
+ "id": "software_versions",
+ "section_name": "${workflow.manifest.name} Software Versions",
+ "section_href": "https://github.com/${workflow.manifest.name}",
+ "plot_type": "html",
+ "description": "are collected at run time from the software output.",
+ "data": _make_versions_html(versions_by_module),
+ }
+
+ with open("software_versions.yml", "w") as f:
+ yaml.dump(versions_by_module, f, default_flow_style=False)
+ with open("software_versions_mqc.yml", "w") as f:
+ yaml.dump(versions_mqc, f, default_flow_style=False)
+
+ with open("versions.yml", "w") as f:
+ yaml.dump(versions_this_module, f, default_flow_style=False)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/modules/nf-core/cutadapt/main.nf b/modules/nf-core/cutadapt/main.nf
index 9b310c0e..45a99ee8 100644
--- a/modules/nf-core/cutadapt/main.nf
+++ b/modules/nf-core/cutadapt/main.nf
@@ -2,7 +2,7 @@ process CUTADAPT {
tag "$meta.id"
label 'process_medium'
- conda (params.enable_conda ? 'bioconda::cutadapt=3.4' : null)
+ conda "bioconda::cutadapt=3.4"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/cutadapt:3.4--py39h38f01e4_1' :
'quay.io/biocontainers/cutadapt:3.4--py39h38f01e4_1' }"
diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf
index 05730368..9ae58381 100644
--- a/modules/nf-core/fastqc/main.nf
+++ b/modules/nf-core/fastqc/main.nf
@@ -2,7 +2,7 @@ process FASTQC {
tag "$meta.id"
label 'process_medium'
- conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null)
+ conda "bioconda::fastqc=0.11.9"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' :
'quay.io/biocontainers/fastqc:0.11.9--0' }"
@@ -20,30 +20,22 @@ process FASTQC {
script:
def args = task.ext.args ?: ''
- // Add soft-links to original FastQs for consistent naming in pipeline
def prefix = task.ext.prefix ?: "${meta.id}"
- if (meta.single_end) {
- """
- [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz
- fastqc $args --threads $task.cpus ${prefix}.fastq.gz
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" )
- END_VERSIONS
- """
- } else {
- """
- [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz
- [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz
- fastqc $args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" )
- END_VERSIONS
- """
- }
+ // Make list of old name and new name pairs to use for renaming in the bash while loop
+ def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] }
+ def rename_to = old_new_pairs*.join(' ').join(' ')
+ def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ')
+ """
+ printf "%s %s\\n" $rename_to | while read old_name new_name; do
+ [ -f "\${new_name}" ] || ln -s \$old_name \$new_name
+ done
+ fastqc $args --threads $task.cpus $renamed_files
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" )
+ END_VERSIONS
+ """
stub:
def prefix = task.ext.prefix ?: "${meta.id}"
diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf
index a8159a57..4b604749 100644
--- a/modules/nf-core/multiqc/main.nf
+++ b/modules/nf-core/multiqc/main.nf
@@ -1,10 +1,10 @@
process MULTIQC {
label 'process_single'
- conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null)
+ conda "bioconda::multiqc=1.14"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' :
- 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }"
+ 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' :
+ 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }"
input:
path multiqc_files, stageAs: "?/*"
diff --git a/modules/nf-core/vsearch/usearchglobal/main.nf b/modules/nf-core/vsearch/usearchglobal/main.nf
index c85546c1..7ce662ba 100644
--- a/modules/nf-core/vsearch/usearchglobal/main.nf
+++ b/modules/nf-core/vsearch/usearchglobal/main.nf
@@ -2,7 +2,7 @@ process VSEARCH_USEARCHGLOBAL {
tag "${meta.id}"
label 'process_low'
- conda (params.enable_conda ? "bioconda::vsearch=2.21.1" : null)
+ conda "bioconda::vsearch=2.21.1"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/vsearch:2.21.1--h95f258a_0':
'quay.io/biocontainers/vsearch:2.21.1--h95f258a_0' }"
diff --git a/nextflow.config b/nextflow.config
index d6e3f7c5..5f4ae444 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -48,7 +48,7 @@ params {
sbdiexport = false
addsh = false
dada_tax_agglom_min = 2
- dada_tax_agglom_max = 7
+ dada_tax_agglom_max = 6
qiime_tax_agglom_min = 2
qiime_tax_agglom_max = 6
ignore_failed_trimming = false
@@ -58,6 +58,8 @@ params {
filter_ssu = null
min_len_asv = null
max_len_asv = null
+ diversity_rarefaction_depth = 500
+ ancom_sample_min_count = 1
// Skipping options
skip_cutadapt = false
@@ -100,10 +102,10 @@ params {
monochrome_logs = false
hook_url = null
help = false
+ version = false
validate_params = true
show_hidden_params = false
schema_ignore_params = 'dada_ref_databases,qiime_ref_databases,igenomes_base'
- enable_conda = false
// Config options
@@ -145,7 +147,7 @@ try {
profiles {
debug { process.beforeScript = 'echo $HOSTNAME' }
conda {
- params.enable_conda = true
+ conda.enabled = true
docker.enabled = false
singularity.enabled = false
podman.enabled = false
@@ -153,7 +155,7 @@ profiles {
charliecloud.enabled = false
}
mamba {
- params.enable_conda = true
+ conda.enabled = true
conda.useMamba = true
docker.enabled = false
singularity.enabled = false
@@ -169,6 +171,9 @@ profiles {
shifter.enabled = false
charliecloud.enabled = false
}
+ arm {
+ docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64'
+ }
singularity {
singularity.enabled = true
singularity.autoMounts = true
@@ -250,12 +255,12 @@ dag {
manifest {
name = 'nf-core/ampliseq'
- author = 'Daniel Straub, Alexander Peltzer'
+ author = """Daniel Straub, Alexander Peltzer"""
homePage = 'https://github.com/nf-core/ampliseq'
- description = 'Amplicon sequencing analysis workflow using DADA2 and QIIME2'
+ description = """Amplicon sequencing analysis workflow using DADA2 and QIIME2"""
mainScript = 'main.nf'
- nextflowVersion = '!>=21.10.3'
- version = '2.4.1'
+ nextflowVersion = '!>=22.10.1'
+ version = '2.5.0'
doi = '10.3389/fmicb.2020.550420'
}
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 540cc870..4a343309 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -16,7 +16,7 @@
"mimetype": "text/tsv",
"fa_icon": "fas fa-dna",
"description": "Either a tab-separated sample sheet, a fasta file, or a folder containing zipped FastQ files",
- "help_text": "Points to the main pipeline input, one of the following:\n- folder containing compressed fastq files\n- sample sheet ending with `.tsv` that points towards compressed fastq files\n- fasta file ending with `.fasta`, `.fna` or `.fa` that will be taxonomically classified\n\nRelated parameters are:\n- `--pacbio` and `--iontorrent` if the sequencing data is PacBio data or IonTorrent data (default expected: paired-end Illumina data)\n- `--single_end` if the sequencing data is single-ended Illumina data (default expected: paired-end Illumina data)\n- `--multiple_sequencing_runs` (folder input only) if the sequencing data originates from multiple sequencing runs\n- `--extension` (folder input only) if the sequencing file names do not follow the default (`\"/*_R{1,2}_001.fastq.gz\"`)\n- `--dada_ref_taxonomy` and `--qiime_ref_taxonomy` to choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS) (default: 16S rRNA sequence database)\n\n##### Folder containing zipped FastQ files\n\nFor example:\n\n```bash\n--input 'path/to/data'\n```\n\nExample for input data organization from one sequencing run with two samples, paired-end data:\n\n```bash\ndata\n \u251c\u2500sample1_1_L001_R1_001.fastq.gz\n \u251c\u2500sample1_1_L001_R2_001.fastq.gz\n \u251c\u2500sample2_1_L001_R1_001.fastq.gz\n \u2514\u2500sample2_1_L001_R2_001.fastq.gz\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The folder must contain gzip compressed demultiplexed fastq files. If the file names do not follow the default (`\"/*_R{1,2}_001.fastq.gz\"`), please check `--extension`.\n3. Sample identifiers are extracted from file names, i.e. the string before the first underscore `_`, these must be unique\n4. If your data is scattered, produce a sample sheet\n5. All sequencing data should originate from one sequencing run, because processing relies on run-specific error models that are unreliable when data from several sequencing runs are mixed. Sequencing data originating from multiple sequencing runs requires additionally the parameter `--multiple_sequencing_runs` and a specific folder structure.\n\n##### Sample sheet\n\nThe sample sheet file is an alternative way to provide input reads, it must be a tab-separated file ending with `.tsv` that must have two to four columns with the following headers: \n- `sampleID` (required): Unique sample identifiers, any unique string (may not contain dots `.`, must not start with a number when using metadata)\n- `forwardReads` (required): Paths to (forward) reads zipped FastQ files\n- `reverseReads` (optional): Paths to reverse reads zipped FastQ files, required if the data is paired-end\n- `run` (optional): If the data was produced by multiple sequencing runs, any string\n\nFor example:\n\n```bash\n--input 'path/to/samplesheet.tsv'\n```\n\n##### Fasta file\n\nWhen pointing at a file ending with `.fasta`, `.fna` or `.fa`, the containing sequences will be taxonomically classified. All other pipeline steps will be skipped.\n\nThis can be used to taxonomically classify previously produced ASV/OTU sequences.\n\nFor example:\n\n```bash\n--input 'path/to/amplicon_sequences.fasta'\n```"
+ "help_text": "Points to the main pipeline input, one of the following:\n- folder containing compressed fastq files\n- sample sheet ending with `.tsv` that points towards compressed fastq files\n- fasta file ending with `.fasta`, `.fna` or `.fa` that will be taxonomically classified\n\nRelated parameters are:\n- `--pacbio` and `--iontorrent` if the sequencing data is PacBio data or IonTorrent data (default expected: paired-end Illumina data)\n- `--single_end` if the sequencing data is single-ended Illumina data (default expected: paired-end Illumina data)\n- `--multiple_sequencing_runs` (folder input only) if the sequencing data originates from multiple sequencing runs\n- `--extension` (folder input only) if the sequencing file names do not follow the default (`\"/*_R{1,2}_001.fastq.gz\"`)\n- `--dada_ref_taxonomy` and `--qiime_ref_taxonomy` to choose an appropriate reference taxonomy for the type of amplicon (16S/18S/ITS/CO1) (default: 16S rRNA sequence database)\n\n##### Folder containing zipped FastQ files\n\nFor example:\n\n```bash\n--input 'path/to/data'\n```\n\nExample for input data organization from one sequencing run with two samples, paired-end data:\n\n```bash\ndata\n \u251c\u2500sample1_1_L001_R1_001.fastq.gz\n \u251c\u2500sample1_1_L001_R2_001.fastq.gz\n \u251c\u2500sample2_1_L001_R1_001.fastq.gz\n \u2514\u2500sample2_1_L001_R2_001.fastq.gz\n```\n\nPlease note the following requirements:\n\n1. The path must be enclosed in quotes\n2. The folder must contain gzip compressed demultiplexed fastq files. If the file names do not follow the default (`\"/*_R{1,2}_001.fastq.gz\"`), please check `--extension`.\n3. Sample identifiers are extracted from file names, i.e. the string before the first underscore `_`, these must be unique\n4. If your data is scattered, produce a sample sheet\n5. All sequencing data should originate from one sequencing run, because processing relies on run-specific error models that are unreliable when data from several sequencing runs are mixed. Sequencing data originating from multiple sequencing runs requires additionally the parameter `--multiple_sequencing_runs` and a specific folder structure.\n\n##### Sample sheet\n\nThe sample sheet file is an alternative way to provide input reads, it must be a tab-separated file ending with `.tsv` that must have two to four columns with the following headers: \n- `sampleID` (required): Unique sample identifiers, any unique string (may not contain dots `.`, must not start with a number)\n- `forwardReads` (required): Paths to (forward) reads zipped FastQ files\n- `reverseReads` (optional): Paths to reverse reads zipped FastQ files, required if the data is paired-end\n- `run` (optional): If the data was produced by multiple sequencing runs, any string\n\nFor example:\n\n```bash\n--input 'path/to/samplesheet.tsv'\n```\n\n##### Fasta file\n\nWhen pointing at a file ending with `.fasta`, `.fna` or `.fa`, the containing sequences will be taxonomically classified. All other pipeline steps will be skipped.\n\nThe sequence header line may contain a description, that will be kept as part of the sequence name. However, tabs will be changed into spaces.\n\nThe fasta file input option can be used to taxonomically classify previously produced ASV/OTU sequences.\n\nFor example:\n\n```bash\n--input 'path/to/amplicon_sequences.fasta'\n```"
},
"FW_primer": {
"type": "string",
@@ -208,7 +208,7 @@
"properties": {
"dada_ref_taxonomy": {
"type": "string",
- "help_text": "Choose any of the supported databases, and optionally also specify the version. Database and version are separated by an equal sign (`=`, e.g. `silva=138`) . This will download the desired database, format it to produce a file that is compatible with DADA2's assignTaxonomy and another file that is compatible with DADA2's addSpecies.\n\nThe following databases are supported:\n- GTDB - Genome Taxonomy Database - 16S rRNA\n- PR2 - Protist Reference Ribosomal Database - 18S rRNA\n- RDP - Ribosomal Database Project - 16S rRNA\n- SILVA ribosomal RNA gene database project - 16S rRNA\n- UNITE - eukaryotic nuclear ribosomal ITS region - ITS\n\nGenerally, using `gtdb`, `pr2`, `rdp`, `sbdi-gtdb`, `silva`, `unite-fungi`, or `unite-alleuk` will select the most recent supported version. For details on what values are valid, please either use an invalid value such as `x` (causing the pipeline to send an error message with a list of all valid values) or see `conf/ref_databases.config`.\n\nPlease note that commercial/non-academic entities [require licensing](https://www.arb-silva.de/silva-license-information) for SILVA v132 database (non-default) but not from v138 on (default).",
+ "help_text": "Choose any of the supported databases, and optionally also specify the version. Database and version are separated by an equal sign (`=`, e.g. `silva=138`) . This will download the desired database, format it to produce a file that is compatible with DADA2's assignTaxonomy and another file that is compatible with DADA2's addSpecies.\n\nThe following databases are supported:\n- GTDB - Genome Taxonomy Database - 16S rRNA\n- PR2 - Protist Reference Ribosomal Database - 18S rRNA\n- RDP - Ribosomal Database Project - 16S rRNA\n- SILVA ribosomal RNA gene database project - 16S rRNA\n- UNITE - eukaryotic nuclear ribosomal ITS region - ITS\n- COIDB - eukaryotic Cytochrome Oxidase I (COI) from The Barcode of Life Data System (BOLD) - COI\n\nGenerally, using `gtdb`, `pr2`, `rdp`, `sbdi-gtdb`, `silva`, `coidb`, `unite-fungi`, or `unite-alleuk` will select the most recent supported version. For details on what values are valid, please either use an invalid value such as `x` (causing the pipeline to send an error message with a list of all valid values) or see `conf/ref_databases.config`.\n\nPlease note that commercial/non-academic entities [require licensing](https://www.arb-silva.de/silva-license-information) for SILVA v132 database (non-default) but not from v138 on (default).",
"description": "Name of supported database, and optionally also version number",
"default": "silva=138",
"enum": [
@@ -218,6 +218,8 @@
"gtdb=R06-RS202",
"gtdb=R07-RS207",
"gtdb",
+ "coidb",
+ "coidb=221216",
"pr2=4.14.0",
"pr2=4.13.0",
"pr2",
@@ -305,7 +307,7 @@
"properties": {
"filter_ssu": {
"type": "string",
- "description": "Enable SSU filtering. Comma separated list of kingdoms in Barrnap.",
+ "description": "Enable SSU filtering. Comma separated list of kingdoms (domains) in Barrnap, a combination (or one) of \"bac\", \"arc\", \"mito\", and \"euk\". ASVs that have their lowest evalue in that kingdoms are kept.",
"enum": [
"bac,arc,mito,euk",
"bac",
@@ -384,6 +386,18 @@
"type": "boolean",
"description": "If data should be exported in SBDI (Swedish biodiversity infrastructure) Excel format."
},
+ "diversity_rarefaction_depth": {
+ "type": "integer",
+ "default": 500,
+ "description": "Minimum rarefaction depth for diversity analysis. Any sample below that threshold will be removed.",
+ "fa_icon": "fas fa-greater-than-equal"
+ },
+ "ancom_sample_min_count": {
+ "type": "integer",
+ "default": 1,
+ "description": "Minimum sample counts to retain a sample for ANCOM analysis. Any sample below that threshold will be removed.",
+ "fa_icon": "fas fa-greater-than-equal"
+ },
"dada_tax_agglom_min": {
"type": "integer",
"default": 2,
@@ -393,10 +407,10 @@
},
"dada_tax_agglom_max": {
"type": "integer",
- "default": 7,
+ "default": 6,
"description": "Maximum taxonomy agglomeration level for DADA2 classification",
"fa_icon": "fas fa-greater-than-equal",
- "help_text": "Depends on the reference taxonomy database used. Default databases should have genus level at 7."
+ "help_text": "Depends on the reference taxonomy database used. Most default databases have genus level at 6."
},
"qiime_tax_agglom_min": {
"type": "integer",
@@ -505,6 +519,12 @@
"fa_icon": "fas fa-question-circle",
"hidden": true
},
+ "version": {
+ "type": "boolean",
+ "description": "Display version and exit.",
+ "fa_icon": "fas fa-question-circle",
+ "hidden": true
+ },
"publish_dir_mode": {
"type": "string",
"default": "copy",
@@ -546,7 +566,7 @@
"type": "string",
"description": "Incoming hook URL for messaging service",
"fa_icon": "fas fa-people-group",
- "help_text": "Incoming hook URL for messaging service. Currently, only MS Teams is supported.",
+ "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.",
"hidden": true
},
"multiqc_config": {
@@ -579,12 +599,6 @@
"default": true,
"fa_icon": "fas fa-check-square",
"hidden": true
- },
- "enable_conda": {
- "type": "boolean",
- "description": "Run this workflow with Conda. You can also use '-profile conda' instead of providing this parameter.",
- "fa_icon": "fas fa-bacon",
- "hidden": true
}
}
},
diff --git a/subworkflows/local/qiime2_ancom.nf b/subworkflows/local/qiime2_ancom.nf
index 548dbe82..af83733d 100644
--- a/subworkflows/local/qiime2_ancom.nf
+++ b/subworkflows/local/qiime2_ancom.nf
@@ -2,7 +2,7 @@
* Diversity indices with QIIME2
*/
-include { QIIME2_FILTERASV } from '../../modules/local/qiime2_filterasv'
+include { QIIME2_FILTERSAMPLES as QIIME2_FILTERSAMPLES_ANCOM } from '../../modules/local/qiime2_filtersamples'
include { QIIME2_ANCOM_TAX } from '../../modules/local/qiime2_ancom_tax'
include { QIIME2_ANCOM_ASV } from '../../modules/local/qiime2_ancom_asv'
@@ -20,18 +20,18 @@ workflow QIIME2_ANCOM {
ch_metadata
.combine( ch_asv )
.combine( ch_metacolumn_all )
- .set{ ch_for_filterasv }
- QIIME2_FILTERASV ( ch_for_filterasv )
+ .set{ ch_for_filtersamples }
+ QIIME2_FILTERSAMPLES_ANCOM ( ch_for_filtersamples )
//ANCOM on various taxonomic levels
ch_taxlevel = Channel.of( tax_agglom_min..tax_agglom_max )
ch_metadata
- .combine( QIIME2_FILTERASV.out.qza )
+ .combine( QIIME2_FILTERSAMPLES_ANCOM.out.qza )
.combine( ch_tax )
.combine( ch_taxlevel )
.set{ ch_for_ancom_tax }
QIIME2_ANCOM_TAX ( ch_for_ancom_tax )
QIIME2_ANCOM_TAX.out.ancom.subscribe { if ( it.baseName[0].toString().startsWith("WARNING") ) log.warn it.baseName[0].toString().replace("WARNING ","QIIME2_ANCOM_TAX: ") }
- QIIME2_ANCOM_ASV ( ch_metadata.combine( QIIME2_FILTERASV.out.qza.flatten() ) )
+ QIIME2_ANCOM_ASV ( ch_metadata.combine( QIIME2_FILTERSAMPLES_ANCOM.out.qza.flatten() ) )
}
diff --git a/subworkflows/local/qiime2_diversity.nf b/subworkflows/local/qiime2_diversity.nf
index ae6ef411..d5ce511d 100644
--- a/subworkflows/local/qiime2_diversity.nf
+++ b/subworkflows/local/qiime2_diversity.nf
@@ -20,6 +20,7 @@ workflow QIIME2_DIVERSITY {
ch_metacolumn_all //METADATA_ALL.out
skip_alpha_rarefaction
skip_diversity_indices
+ diversity_rarefaction_depth
main:
//Phylogenetic tree for beta & alpha diversities
@@ -33,7 +34,7 @@ workflow QIIME2_DIVERSITY {
//Calculate diversity indices
if (!skip_diversity_indices) {
- QIIME2_DIVERSITY_CORE ( ch_metadata, ch_asv, QIIME2_TREE.out.qza, ch_stats )
+ QIIME2_DIVERSITY_CORE ( ch_metadata, ch_asv, QIIME2_TREE.out.qza, ch_stats, diversity_rarefaction_depth )
//Print warning if rarefaction depth is <10000
QIIME2_DIVERSITY_CORE.out.depth.subscribe { if ( it.baseName.toString().startsWith("WARNING") ) log.warn it.baseName.toString().replace("WARNING ","QIIME2_DIVERSITY_CORE: ") }
diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf
index f2ebdacf..864d0802 100644
--- a/workflows/ampliseq.nf
+++ b/workflows/ampliseq.nf
@@ -101,9 +101,12 @@ if ( params.dada_ref_taxonomy && !params.skip_dada_addspecies && !params.skip_ta
}
//only run QIIME2 when taxonomy is actually calculated and all required data is available
-if ( !params.enable_conda && !params.skip_taxonomy && !params.skip_qiime ) {
+if ( !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) && !params.skip_taxonomy && !params.skip_qiime ) {
run_qiime2 = true
-} else { run_qiime2 = false }
+} else {
+ run_qiime2 = false
+ if ( workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 ) { log.warn "Conda or mamba is enabled, any steps involving QIIME2 are not available. Use a container engine instead of conda to enable all software." }
+}
// Set cutoff to use for SH assignment
if ( params.addsh ) {
@@ -124,10 +127,12 @@ include { DADA2_RMCHIMERA } from '../modules/local/dada2_rmchimera
include { DADA2_STATS } from '../modules/local/dada2_stats'
include { DADA2_MERGE } from '../modules/local/dada2_merge'
include { BARRNAP } from '../modules/local/barrnap'
+include { BARRNAPSUMMARY } from '../modules/local/barrnapsummary'
include { FILTER_SSU } from '../modules/local/filter_ssu'
include { FILTER_LEN_ASV } from '../modules/local/filter_len_asv'
include { MERGE_STATS as MERGE_STATS_FILTERSSU } from '../modules/local/merge_stats'
include { MERGE_STATS as MERGE_STATS_FILTERLENASV } from '../modules/local/merge_stats'
+include { FORMAT_FASTAINPUT } from '../modules/local/format_fastainput'
include { FORMAT_TAXONOMY } from '../modules/local/format_taxonomy'
include { ITSX_CUTASV } from '../modules/local/itsx_cutasv'
include { MERGE_STATS as MERGE_STATS_STD} from '../modules/local/merge_stats'
@@ -198,7 +203,6 @@ workflow AMPLISEQ {
//
PARSE_INPUT ( params.input, is_fasta_input, single_end, params.multiple_sequencing_runs, params.extension )
ch_reads = PARSE_INPUT.out.reads
- ch_fasta = PARSE_INPUT.out.fasta
//
// MODULE: Rename files
@@ -231,7 +235,7 @@ workflow AMPLISEQ {
//
// SUBWORKFLOW: Read preprocessing & QC plotting with DADA2
//
- DADA2_PREPROCESSING (
+ DADA2_PREPROCESSING (
ch_trimmed_reads,
single_end,
find_truncation_values,
@@ -300,23 +304,34 @@ workflow AMPLISEQ {
//
// Modules : Filter rRNA
- // TODO: FILTER_SSU.out.stats needs to be merged still into "overall_summary.tsv"
//
+ if ( is_fasta_input ) {
+ FORMAT_FASTAINPUT( PARSE_INPUT.out.fasta )
+ ch_unfiltered_fasta = FORMAT_FASTAINPUT.out.fasta
+ } else {
+ ch_unfiltered_fasta = DADA2_MERGE.out.fasta
+ }
+
if (!params.skip_barrnap && params.filter_ssu) {
- BARRNAP ( DADA2_MERGE.out.fasta )
+ BARRNAP ( ch_unfiltered_fasta )
+ BARRNAPSUMMARY ( BARRNAP.out.gff.collect() )
+ ch_barrnapsummary = BARRNAPSUMMARY.out.summary
ch_versions = ch_versions.mix(BARRNAP.out.versions.ifEmpty(null))
- FILTER_SSU ( DADA2_MERGE.out.fasta, DADA2_MERGE.out.asv, BARRNAP.out.matches )
+ FILTER_SSU ( DADA2_MERGE.out.fasta, DADA2_MERGE.out.asv, BARRNAPSUMMARY.out.summary )
MERGE_STATS_FILTERSSU ( ch_stats, FILTER_SSU.out.stats )
ch_stats = MERGE_STATS_FILTERSSU.out.tsv
ch_dada2_fasta = FILTER_SSU.out.fasta
ch_dada2_asv = FILTER_SSU.out.asv
} else if (!params.skip_barrnap && !params.filter_ssu) {
- BARRNAP ( DADA2_MERGE.out.fasta )
+ BARRNAP ( ch_unfiltered_fasta )
+ BARRNAPSUMMARY ( BARRNAP.out.gff.collect() )
+ ch_barrnapsummary = BARRNAPSUMMARY.out.summary
ch_versions = ch_versions.mix(BARRNAP.out.versions.ifEmpty(null))
- ch_dada2_fasta = DADA2_MERGE.out.fasta
+ ch_dada2_fasta = ch_unfiltered_fasta
ch_dada2_asv = DADA2_MERGE.out.asv
} else {
- ch_dada2_fasta = DADA2_MERGE.out.fasta
+ ch_barrnapsummary = Channel.empty()
+ ch_dada2_fasta = ch_unfiltered_fasta
ch_dada2_asv = DADA2_MERGE.out.asv
}
@@ -324,7 +339,7 @@ workflow AMPLISEQ {
// Modules : amplicon length filtering
//
if (params.min_len_asv || params.max_len_asv) {
- FILTER_LEN_ASV ( ch_dada2_fasta,ch_dada2_asv )
+ FILTER_LEN_ASV ( ch_dada2_fasta, ch_dada2_asv.ifEmpty( [] ) )
ch_versions = ch_versions.mix(FILTER_LEN_ASV.out.versions.ifEmpty(null))
MERGE_STATS_FILTERLENASV ( ch_stats, FILTER_LEN_ASV.out.stats )
ch_stats = MERGE_STATS_FILTERLENASV.out.tsv
@@ -335,10 +350,7 @@ workflow AMPLISEQ {
//
// SUBWORKFLOW / MODULES : Taxonomic classification with DADA2 and/or QIIME2
//
- //Alternative entry point for fasta that is being classified
- if ( !is_fasta_input ) {
- ch_fasta = ch_dada2_fasta
- }
+ ch_fasta = ch_dada2_fasta
//DADA2
if (!params.skip_taxonomy) {
@@ -397,9 +409,9 @@ workflow AMPLISEQ {
ASSIGNSH( DADA2_TAXONOMY.out.tsv, ch_shinfo.collect(), VSEARCH_USEARCHGLOBAL.out.txt, 'ASV_tax_SH.tsv')
ch_versions = ch_versions.mix(ASSIGNSH.out.versions.ifEmpty(null))
ch_dada2_tax = ASSIGNSH.out.tsv
- } else {
- ch_dada2_tax = DADA2_TAXONOMY.out.tsv
- }
+ } else {
+ ch_dada2_tax = DADA2_TAXONOMY.out.tsv
+ }
}
//Cut out ITS region if long ITS reads
} else {
@@ -506,7 +518,7 @@ workflow AMPLISEQ {
tax_agglom_max = 2
}
- //Filtering by taxonomy & prevalence & counts
+ //Filtering ASVs by taxonomy & prevalence & counts
if (params.exclude_taxa != "none" || params.min_frequency != 1 || params.min_samples != 1) {
QIIME2_FILTERTAXA (
QIIME2_INASV.out.qza,
@@ -569,7 +581,8 @@ workflow AMPLISEQ {
ch_metacolumn_pairwise,
ch_metacolumn_all,
params.skip_alpha_rarefaction,
- params.skip_diversity_indices
+ params.skip_diversity_indices,
+ params.diversity_rarefaction_depth
)
}
@@ -604,7 +617,7 @@ workflow AMPLISEQ {
if ( params.sbdiexport ) {
SBDIEXPORT ( ch_dada2_asv, ch_dada2_tax, ch_metadata )
ch_versions = ch_versions.mix(SBDIEXPORT.out.versions.first())
- SBDIEXPORTREANNOTATE ( ch_dada2_tax )
+ SBDIEXPORTREANNOTATE ( ch_dada2_tax, ch_barrnapsummary )
}
CUSTOM_DUMPSOFTWAREVERSIONS (
@@ -632,16 +645,13 @@ workflow AMPLISEQ {
ch_multiqc_files = ch_multiqc_files.mix(CUTADAPT_WORKFLOW.out.logs.collect{it[1]}.ifEmpty([]))
}
- ch_multiqc_configs = Channel.from(ch_multiqc_config).mix(ch_multiqc_custom_config).ifEmpty([])
-
MULTIQC (
ch_multiqc_files.collect(),
- ch_multiqc_config.collect().ifEmpty([]),
- ch_multiqc_custom_config.collect().ifEmpty([]),
- ch_multiqc_logo.collect().ifEmpty([])
+ ch_multiqc_config.toList(),
+ ch_multiqc_custom_config.toList(),
+ ch_multiqc_logo.toList()
)
multiqc_report = MULTIQC.out.report.toList()
- ch_versions = ch_versions.mix(MULTIQC.out.versions)
}
//Save input in results folder
@@ -651,7 +661,7 @@ workflow AMPLISEQ {
input.copyTo("${params.outdir}/input")
}
//Save metadata in results folder
- if ( params.metadata ) {
+ if ( params.metadata ) {
file("${params.outdir}/input").mkdir()
file("${params.metadata}").copyTo("${params.outdir}/input")
}
@@ -669,7 +679,7 @@ workflow.onComplete {
}
NfcoreTemplate.summary(workflow, params, log)
if (params.hook_url) {
- NfcoreTemplate.adaptivecard(workflow, params, summary_params, projectDir, log)
+ NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log)
}
}