Merge pull request #344 from nf-core/dev

Release 2.1.1
nf-core · Oct 28, 2021 · 80b3cb8 · 80b3cb8
2 parents 68e4b5f + 010f0f8
commit 80b3cb8
Show file tree

Hide file tree

Showing 28 changed files with 214 additions and 101 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,27 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## nf-core/ampliseq version 2.1.1 - 2021-10-28
+
+### `Added`
+
+* [#336](https://github.com/nf-core/ampliseq/pull/336) - Taxa agglomeration levels with `--dada_tax_agglom_min`, `--dada_tax_agglom_max`, `--qiime_tax_agglom_min`, `--qiime_tax_agglom_max`, with defaults that go to genus level for abundance tables and ANCOM analysis
+
+### `Changed`
+
+* [338](https://github.com/nf-core/ampliseq/pull/338) - Write empty space instead of `NA` for missing values in output files.
+* [342](https://github.com/nf-core/ampliseq/pull/342) - Added PICRUSt2 to summary figure.
+
+### `Fixed`
+
+* [#329](https://github.com/nf-core/ampliseq/issues/329) - Improve error message when no data files are found
+* [#330](https://github.com/nf-core/ampliseq/issues/330) - Make `--skip_fastqc` usable again
+* [#339](https://github.com/nf-core/ampliseq/issues/339) - Fix sample names when using `--double_primer` or `--illumina_pe_its`
+
+### `Dependencies`
+
+### `Removed`
+
 ## nf-core/ampliseq version 2.1.0 "Gray Steel Boa" - 2021-09-14
 
 ### `Added`

diff --git a/bin/add_full_sequence_to_taxfile.py b/bin/add_full_sequence_to_taxfile.py
@@ -36,5 +36,5 @@
 
 # Join taxonomy and full sequence, write to file
 tax = tax.set_index('ASV_ID').join(seqs.set_index('id'), how='outer')
-tax.to_csv(outfile, sep='\t',na_rep="NA", index_label="ASV_ID")
+tax.to_csv(outfile, sep='\t',na_rep="", index_label="ASV_ID")
 
diff --git a/bin/cutadapt_summary.py b/bin/cutadapt_summary.py
@@ -32,13 +32,8 @@
             else:
                 results.append("")
 
-        #modify sample names
-        if argv[1] == "single_end":
-            results[0] = results[0].replace(".double-primer.trim.fastq.gz","")
-            results[0] = results[0].replace(".trim.fastq.gz","")
-        if argv[1] == "paired_end":
-            results[0] = results[0].replace(".double-primer_1.trim.fastq.gz","")
-            results[0] = results[0].replace("_1.trim.fastq.gz","")
+        #modify sample names (all before ".")
+        results[0] = results[0].split(".", 1)[0]
 
         #output per file
         print("\t".join(results))
diff --git a/conf/modules.config b/conf/modules.config
@@ -32,6 +32,7 @@ params {
         }
         'cutadapt' {
             args          = "--minimum-length 1"
+            suffix        = ".trimmed"
             publish_files = ['log':'']
         }
         'cutadapt_readthrough' {

diff --git a/conf/test.config b/conf/test.config
@@ -32,5 +32,9 @@ params {
     min_samples = 2
     min_frequency = 10
 
+    //restrict ANCOM analysis to higher taxonomic levels
+    dada_tax_agglom_max = 4
+    qiime_tax_agglom_max = 4
+
     sbdiexport = true
 }
diff --git a/conf/test_doubleprimers.config b/conf/test_doubleprimers.config
@@ -22,4 +22,5 @@ params {
     dada_ref_taxonomy = false
     input = "https://raw.githubusercontent.com/nf-core/test-datasets/ampliseq/samplesheets/Samplesheet_double_primer.tsv"
     trunc_qmin = 30
+    skip_fastqc = true
 }
diff --git a/docs/images/ampliseq_workflow.png b/docs/images/ampliseq_workflow.png
diff --git a/docs/images/ampliseq_workflow.svg b/docs/images/ampliseq_workflow.svg
diff --git a/docs/output.md b/docs/output.md
@@ -155,12 +155,7 @@ All following analysis is based on these filtered tables.
     * `descriptive_stats.tsv`: Length, mean, etc. of ASV sequences.
     * `seven_number_summary.tsv`: Length of ASV sequences in different quantiles.
 * `qiime2/abundance_tables/`
-    * `abs-abund-table-2.tsv`: Tab-separated absolute abundance table at phylum level.
-    * `abs-abund-table-3.tsv`: Tab-separated absolute abundance table at class level.
-    * `abs-abund-table-4.tsv`: Tab-separated absolute abundance table at order level.
-    * `abs-abund-table-5.tsv`: Tab-separated absolute abundance table at family level.
-    * `abs-abund-table-6.tsv`: Tab-separated absolute abundance table at genus level.
-    * `abs-abund-table-7.tsv`: Tab-separated absolute abundance table at species level.
+    * `abs-abund-table-*.tsv`: Tab-separated absolute abundance table at taxa level `*`, where `*` ranges by default from 2 to 6 or 7, depending on the used reference taxonomy database.
     * `count_table_filter_stats.tsv`: Tab-separated table with information on how much counts were filtered for each sample.
     * `feature-table.biom`: Abundance table in biom format for importing into downstream analysis tools.
     * `feature-table.tsv`: Tab-separated abundance table for each ASV and each sample.
@@ -175,12 +170,7 @@ Absolute abundance tables produced by the previous steps contain count data, but
 <summary>Output files</summary>
 
 * `qiime2/rel_abundance_tables/`
-    * `rel-table-2.tsv`: Tab-separated relative abundance table at phylum level.
-    * `rel-table-3.tsv`: Tab-separated relative abundance table at class level.
-    * `rel-table-4.tsv`: Tab-separated relative abundance table at order level.
-    * `rel-table-5.tsv`: Tab-separated relative abundance table at family level.
-    * `rel-table-6.tsv`: Tab-separated relative abundance table at genus level.
-    * `rel-table-7.tsv`: Tab-separated relative abundance table at species level.
+    * `rel-table-*.tsv`: Tab-separated absolute abundance table at taxa level `*`, where `*` ranges by default from 2 to 6 or 7, depending on the used reference taxonomy database.
     * `rel-table-ASV.tsv`: Tab-separated relative abundance table for all ASVs.
     * `rel-table-ASV_with-DADA2-tax.tsv`: Tab-separated table for all ASVs with DADA2 taxonomic classification, sequence and relative abundance.
     * `rel-table-ASV_with-QIIME2-tax.tsv`: Tab-separated table for all ASVs with QIIME2 taxonomic classification, sequence and relative abundance.

diff --git a/lib/WorkflowAmpliseq.groovy b/lib/WorkflowAmpliseq.groovy
@@ -22,6 +22,16 @@ class WorkflowAmpliseq {
             log.error "Incompatible parameters `--double_primer` and `--retain_untrimmed` cannot be set at the same time."
             System.exit(1)
         }
+
+        if (params.dada_tax_agglom_min > params.dada_tax_agglom_max) {
+            log.error "Incompatible parameters: `--dada_tax_agglom_min` may not be greater than `--dada_tax_agglom_max`."
+            System.exit(1)
+        }
+
+        if (params.qiime_tax_agglom_min > params.qiime_tax_agglom_max) {
+            log.error "Incompatible parameters: `--qiime_tax_agglom_min` may not be greater than `--qiime_tax_agglom_max`."
+            System.exit(1)
+        }
     }
 
     //

diff --git a/modules/local/cutadapt_summary_merge.nf b/modules/local/cutadapt_summary_merge.nf
@@ -29,8 +29,8 @@ process CUTADAPT_SUMMARY_MERGE {
     if (action == "merge") {
         """
         #!/usr/bin/env Rscript
-        standard <- read.table(\"${files[0]}\", header = TRUE, sep = "\t", stringsAsFactors = FALSE)
-        doubleprimer <- read.table(\"${files[1]}\", header = TRUE, sep = "\t", stringsAsFactors = FALSE)
+        standard <- read.table(\"${files[0]}\", header = TRUE, sep = "\\t", stringsAsFactors = FALSE)
+        doubleprimer <- read.table(\"${files[1]}\", header = TRUE, sep = "\\t", stringsAsFactors = FALSE)
         colnames(doubleprimer) <- c("sample", "cutadapt_doubleprimer_total_processed", "cutadapt_doubleprimer_reverse_complemented", "cutadapt_doubleprimer_passing_filters", "cutadapt_doubleprimer_passing_filters_percent")
 
         #merge
@@ -41,7 +41,7 @@ process CUTADAPT_SUMMARY_MERGE {
         for(column in remove_columns) df[column]<-NULL
 
         #write
-        write.table(df, file = \"cutadapt_summary.tsv\", quote=FALSE, col.names=TRUE, row.names=FALSE, sep="\t")
+        write.table(df, file = \"cutadapt_summary.tsv\", quote=FALSE, col.names=TRUE, row.names=FALSE, sep="\\t")
         """
     } else {
         """

diff --git a/modules/local/dada2_addspecies.nf b/modules/local/dada2_addspecies.nf
@@ -57,9 +57,9 @@ process DADA2_ADDSPECIES {
         row.names=row.names(tmp)
     )
 
-    write.table(taxa, file = \"$outfile\", sep = "\t", row.names = FALSE, col.names = TRUE, quote = FALSE)
+    write.table(taxa, file = \"$outfile\", sep = "\\t", row.names = FALSE, col.names = TRUE, quote = FALSE, na = '')
 
-    write.table('addSpecies\t$options.args', file = "addSpecies.args.txt", row.names = FALSE, col.names = FALSE, quote = FALSE)
-    write.table(packageVersion("dada2"), file = "${software}.version.txt", row.names = FALSE, col.names = FALSE, quote = FALSE)
+    write.table('addSpecies\toptions.args', file = "addSpecies.args.txt", row.names = FALSE, col.names = FALSE, quote = FALSE, na = '')
+    write.table(packageVersion("dada2"), file = "${software}.version.txt", row.names = FALSE, col.names = FALSE, quote = FALSE, na = '')
     """
 }