From 08a3e574c843c96b5cd5ca64fe044f9cf8cf4bb3 Mon Sep 17 00:00:00 2001 From: Charles Plessy Date: Fri, 24 May 2024 10:30:36 +0900 Subject: [PATCH] Ran pre-commit run --all-files --- docs/output.md | 25 +++++++++++----------- docs/usage.md | 19 +++++++++-------- modules.json | 50 ++++++++++++-------------------------------- nextflow_schema.json | 27 +++++------------------- 4 files changed, 40 insertions(+), 81 deletions(-) diff --git a/docs/output.md b/docs/output.md index 70f1dd7..9490a13 100644 --- a/docs/output.md +++ b/docs/output.md @@ -10,19 +10,18 @@ The directories listed below will be created in the results directory after the ## Outputs -For each _query_ genome, this pipeline will align it to the _target_genome, post-process the alignments and produce dot plots visualisations at different steps of the - workflow. Each file contains a name suffix that indicates in which order they were created. - - - `.train` is the alignment parameters computed by `last-train` (optional) - - `m2m_aln` is the _**many-to-many**_ alignment between _target_ and _query_ genomes. (optional through the `--m2m` option) - - `m2m_plot` (optional) - - `m2o_aln` is the _**many-to-one**_ alignment regions of the _target_ genome are matched at most once by the _query_ genome. - - `m2o_plot` (optional) - - `o2o_aln` is the _**one-to-one**_ alignment between the _target_ and _query_ genomes. - - `o2o_plot` (optional) - - `o2m_aln` is the _**one-to-many**_ alignment between the _target_ and _query_ genomes (optional). - - `o2m_plot` (optional) - +For each _query_ genome, this pipeline will align it to the \_target_genome, post-process the alignments and produce dot plots visualisations at different steps of the +workflow. Each file contains a name suffix that indicates in which order they were created. + +- `.train` is the alignment parameters computed by `last-train` (optional) +- `m2m_aln` is the _**many-to-many**_ alignment between _target_ and _query_ genomes. (optional through the `--m2m` option) +- `m2m_plot` (optional) +- `m2o_aln` is the _**many-to-one**_ alignment regions of the _target_ genome are matched at most once by the _query_ genome. +- `m2o_plot` (optional) +- `o2o_aln` is the _**one-to-one**_ alignment between the _target_ and _query_ genomes. +- `o2o_plot` (optional) +- `o2m_aln` is the _**one-to-many**_ alignment between the _target_ and _query_ genomes (optional). +- `o2m_plot` (optional) The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: diff --git a/docs/usage.md b/docs/usage.md index dcfcee9..66c79e7 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -24,7 +24,6 @@ A final samplesheet file consisting of both samples id and fasta file path may l Make sure to [test your setup] with `-profile test` before running the workflow on actual data. - First, prepare a samplesheet with your input data that looks as follows: `samplesheet.csv`: @@ -33,13 +32,13 @@ First, prepare a samplesheet with your input data that looks as follows: sample,fasta Query_1,AEG588A1_S1_L002_R1_001.fasta ``` -Each row represents a fasta file, this can also contain multiple rows to accomodate multiple query genomes in fasta format. +Each row represents a fasta file, this can also contain multiple rows to accomodate multiple query genomes in fasta format. -| Column | Description | -| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `sample` | Custom sample name. Spaces in sample names are automatically converted to underscores (`_`). | -| `fasta` | Full path to Fasta/fa/gz file +| Column | Description | +| -------- | -------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. Spaces in sample names are automatically converted to underscores (`_`). | +| `fasta` | Full path to Fasta/fa/gz file | An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. @@ -82,11 +81,13 @@ add it to the run's configuration with the `-c` option. For instance, with file called `overrideLabels.nf` containing the following: ``` + process { - withLabel:process_high { - time = 3.d - } +withLabel:process_high { +time = 3.d +} } + ``` The command `nextflow -c overrideLabels.nf run …` would set the execution time diff --git a/modules.json b/modules.json index 8cf7b1a..926c7ad 100644 --- a/modules.json +++ b/modules.json @@ -8,65 +8,47 @@ "assemblyscan": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gfastats": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "last/dotplot": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "last/lastal": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "last/lastdb": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "last/mafswap": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "last/split": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "last/train": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "multiqc": { "branch": "master", "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } }, @@ -75,26 +57,20 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] } } } } } -} \ No newline at end of file +} diff --git a/nextflow_schema.json b/nextflow_schema.json index fb0c188..15ba504 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -26,13 +26,7 @@ "properties": { "seed": { "type": "string", - "enum": [ - "YASS", - "NEAR", - "MAM8", - "RY128", - "PSEUDO" - ], + "enum": ["YASS", "NEAR", "MAM8", "RY128", "PSEUDO"], "help_text": "--seed selects the name of the LAST seed The default (YASS) searches for \u201clong-and-weak similarities\u201d that \u201callow for mismatches but not gaps\u201d. Among alternatives, there are NEAR for \u201cshort-and-strong (near-identical) similarities \u2026 with many gaps (insertions and deletions)\u201d, MAM8 to find \u201cweak similarities with high sensitivity, but low speed and high memory usage\u201d or RY128 that \u201creduces run time and memory use, by only seeking seeds at ~1/128 of positions in each sequence\u201d, which is useful when the purpose of running this pipeline is only to generate whole-genome dotplots, or when sensitivity for tiny fragments may be unnecessary or undesirable. Setting the seed to PSEUDO triggers protein-to-DNA alignment mode (experimental).", "description": "The default (YASS) searches for \u201clong-and-weak similarities\u201d that \u201callow for mismatches but not gaps\u201d.", "default": "YASS" @@ -79,11 +73,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input", - "target", - "outdir" - ], + "required": ["input", "target", "outdir"], "properties": { "input": { "type": "string", @@ -262,14 +252,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { @@ -379,7 +362,7 @@ }, "last_split_mismap": { "type": "string", - "default": 1e-05, + "default": 1e-5, "description": "By default, last-split runs with -m1e-5 to omit alignments with mismap probability > 10\u22125, but this can be overriden with the --last_split_mismap option." }, "lastal_params": { @@ -418,4 +401,4 @@ "$ref": "#/definitions/new_group_1" } ] -} \ No newline at end of file +}