From deaa471d957091395758d8b04df2dcbed5a047cc Mon Sep 17 00:00:00 2001 From: Nick Wiltsie Date: Mon, 1 Jul 2024 08:58:12 -0700 Subject: [PATCH] Initial commit --- .gitattributes | 1 + .github/CODEOWNERS | 2 + .github/ISSUE_TEMPLATE/issue_report.md | 80 ++++++ .github/PULL_REQUEST_TEMPLATE.md | 49 ++++ .github/dependabot.yml | 14 + .github/workflows/deploy-docs.yaml | 21 ++ .github/workflows/nextflow-tests.yaml | 21 ++ .github/workflows/pipeline-release.yaml | 17 ++ .github/workflows/render-puml.yaml | 20 ++ .github/workflows/static-analysis.yml | 17 ++ .github/workflows/trigger-tests.yaml | 20 ++ .gitignore | 82 ++++++ .gitmodules | 6 + CHANGELOG.md | 65 +++++ LICENSE.md | 339 ++++++++++++++++++++++++ README.md | 160 +++++++++++ config/F72.config | 23 ++ config/M64.config | 23 ++ config/base.config | 23 ++ config/default.config | 45 ++++ config/methods.config | 82 ++++++ config/template.config | 26 ++ docs/pipeline-flow.puml | 14 + docs/pipeline-flow.svg | 17 ++ external/pipeline-Nextflow-config | 1 + external/pipeline-Nextflow-module | 1 + input/template-input-BAM.yaml | 9 + main.nf | 113 ++++++++ metadata.yaml | 8 + module/module-name.nf | 60 +++++ nextflow.config | 7 + 31 files changed, 1366 insertions(+) create mode 100644 .gitattributes create mode 100644 .github/CODEOWNERS create mode 100644 .github/ISSUE_TEMPLATE/issue_report.md create mode 100644 .github/PULL_REQUEST_TEMPLATE.md create mode 100644 .github/dependabot.yml create mode 100644 .github/workflows/deploy-docs.yaml create mode 100644 .github/workflows/nextflow-tests.yaml create mode 100644 .github/workflows/pipeline-release.yaml create mode 100644 .github/workflows/render-puml.yaml create mode 100644 .github/workflows/static-analysis.yml create mode 100644 .github/workflows/trigger-tests.yaml create mode 100644 .gitignore create mode 100644 .gitmodules create mode 100644 CHANGELOG.md create mode 100644 LICENSE.md create mode 100644 README.md create mode 100644 config/F72.config create mode 100644 config/M64.config create mode 100644 config/base.config create mode 100644 config/default.config create mode 100644 config/methods.config create mode 100644 config/template.config create mode 100644 docs/pipeline-flow.puml create mode 100644 docs/pipeline-flow.svg create mode 160000 external/pipeline-Nextflow-config create mode 160000 external/pipeline-Nextflow-module create mode 100644 input/template-input-BAM.yaml create mode 100644 main.nf create mode 100644 metadata.yaml create mode 100644 module/module-name.nf create mode 100644 nextflow.config diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..cef56aa --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.config linguist-language=groovy diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..cdbfd10 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,2 @@ +# Default owner(s) +* @tyamaguchi-ucla @yashpatel6 @zhuchcn @uclahs-cds/software-wg diff --git a/.github/ISSUE_TEMPLATE/issue_report.md b/.github/ISSUE_TEMPLATE/issue_report.md new file mode 100644 index 0000000..872f072 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/issue_report.md @@ -0,0 +1,80 @@ +--- +name: Issue report +about: Create a report to help us improve our pipelines +title: "[ISSUE]: " +labels: '' +assignees: '' + +--- + +**Describe the issue**
+A clear and concise description of what the issue is. Please include the following in your issue report along with any explicit errors observed + +**Error:** +``` +paste relevant error message here, if applicable +``` + +**Screenshots**
+If applicable, add screenshots to help explain your problem. + +--- + +**Pipeline release version:**
+`version` + +**Cluster you are using:** +- [ ] `Slurm-Dev` +- [ ] `Slurm-Test` +- [ ] `Regeneron` + +**Node type:** + +- [ ] `F2`    (scratch: `32GB`) +- [ ] `F16`   (scratch: `500GB`) +- [ ] `F32`   (scratch: `500GB`) +- [ ] `F72`   (scratch: `2TB`) +- [ ] `M64`   (scratch: `2TB`) + +**Submission method** +- [ ] interactive +- [ ] submission script + +**Actual submission command**
+Paste the command used inside your script if you used one. + + +``` +submission command +``` + +**Paths** + + +| Description | Path | +|-------------|------| +| sbatch/qsub command and logs
(if applicable) | ```/path/to/logs```| +| Config File | ```/path/to/config/file``` | +| Working Directory | ```/path/to/work/dir``` | +| Logs Produceded by the Pipeline | ```/path/to/logs``` | + + +--- + +**To Reproduce**
+Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior**
+A clear and concise description of what you expected to happen. + +**Additional context**
+Add any other context about the problem here. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..6e8bed3 --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,49 @@ +> This is a template for UCLA-CDS pipeline developers to create a github pull request template. Things should be adjusted for individual pipeline including: +> 1. additional checklist items sepecific to the pipeline +> 2. a description of how testing is expected to be done +> 3. a template list or table for testing results +> 4. additional notes wrapped in \ (or \ + +### Closes #... + +## Testing Results + +- Case 1 + - sample: + - input csv: + - config: + - output: +- Case 2 + - sample: + - input csv: + - config: + - output: +- NFTest + - output: + - log: + - cases: + +# Checklist + + +- [ ] I have read the [code review guidelines](https://uclahs-cds.atlassian.net/wiki/spaces/BOUTROSLAB/pages/3187646/Code+Review+Guidelines) and the [code review best practice on GitHub check-list](https://uclahs-cds.atlassian.net/wiki/spaces/BOUTROSLAB/pages/3189956/Code+Review+Best+Practice+on+GitHub+-+Check+List). + +- [ ] I have reviewed the [Nextflow pipeline standards](https://uclahs-cds.atlassian.net/wiki/spaces/BOUTROSLAB/pages/3193890/Nextflow+pipeline+standardization). + +- [ ] The name of the branch is meaningful and well formatted following the [standards](https://uclahs-cds.atlassian.net/wiki/spaces/BOUTROSLAB/pages/3189956/Code+Review+Best+Practice+on+GitHub+-+Check+List), using \[AD_username (or 5 letters of AD if AD is too long)]-\[brief_description_of_branch]. + +- [ ] I have set up or verified the branch protection rule following the [github standards](https://uclahs-cds.atlassian.net/wiki/spaces/BOUTROSLAB/pages/3190380/GitHub+Standards#GitHubStandards-Branchprotectionrule) before opening this pull request. + +- [ ] I have added my name to the contributors listings in the ``manifest`` block in the `nextflow.config` as part of this pull request, am listed +already, or do not wish to be listed. (*This acknowledgement is optional.*) + +- [ ] I have added the changes included in this pull request to the `CHANGELOG.md` under the next release version or unreleased, and updated the date. + +- [ ] I have updated the version number in the `metadata.yaml` and `manifest` block of the `nextflow.config` file following [semver](https://semver.org/), or the version number has already been updated. (*Leave it unchecked if you are unsure about new version number and discuss it with the infrastructure team in this PR.*) + +- [ ] I have tested the pipeline on at least one A-mini sample. diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..18078dd --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,14 @@ +--- +version: 2 +updates: + - package-ecosystem: "gitsubmodule" + directory: "/" + schedule: + interval: "weekly" + groups: + # Group these two modules together for pull requests + # "pipeline-submodules" is an arbitrary name + pipeline-submodules: + patterns: + - "*/pipeline-Nextflow-config" + - "*/pipeline-Nextflow-module" diff --git a/.github/workflows/deploy-docs.yaml b/.github/workflows/deploy-docs.yaml new file mode 100644 index 0000000..942a508 --- /dev/null +++ b/.github/workflows/deploy-docs.yaml @@ -0,0 +1,21 @@ +--- +name: Build and Deploy Docs + +on: + workflow_dispatch: + push: + branches: + - main + tags: + - 'v[0-9]*' + +jobs: + build: + name: Deploy docs + runs-on: ubuntu-latest + steps: + - name: Checkout main + uses: actions/checkout@v4 + + - name: Deploy docs + uses: uclahs-cds/tool-Nextflow-action/build-and-deploy-docs@main diff --git a/.github/workflows/nextflow-tests.yaml b/.github/workflows/nextflow-tests.yaml new file mode 100644 index 0000000..0338914 --- /dev/null +++ b/.github/workflows/nextflow-tests.yaml @@ -0,0 +1,21 @@ +--- +name: Nextflow config tests + +on: + workflow_run: + workflows: [Trigger Tests] + types: + - completed + +permissions: + actions: read + contents: write + packages: read + pull-requests: write + statuses: write + +jobs: + tests: + if: ${{ github.event.workflow_run.conclusion == 'success' }} + uses: uclahs-cds/tool-Nextflow-action/.github/workflows/nextflow-tests.yml@main + secrets: inherit diff --git a/.github/workflows/pipeline-release.yaml b/.github/workflows/pipeline-release.yaml new file mode 100644 index 0000000..b2a0110 --- /dev/null +++ b/.github/workflows/pipeline-release.yaml @@ -0,0 +1,17 @@ +--- +on: + push: + tags: "v*" + release: + types: [published] + +permissions: read-all +jobs: + add_release_asset: + runs-on: ubuntu-latest + name: A job to add a release asset with submodules + steps: + - id: release-asset + uses: uclahs-cds/tool-Nextflow-action/add-source-with-submodules@latest + with: + my-token: ${{ secrets.UCLAHS_CDS_REPO_READ_TOKEN }} diff --git a/.github/workflows/render-puml.yaml b/.github/workflows/render-puml.yaml new file mode 100644 index 0000000..23e8621 --- /dev/null +++ b/.github/workflows/render-puml.yaml @@ -0,0 +1,20 @@ +--- +name: PlantUML Generation + +on: + push: + paths: + - '**.puml' + workflow_dispatch: + +jobs: + plantuml: + runs-on: ubuntu-latest + + steps: + - name: Generate PUML diagrams + uses: uclahs-cds/tool-PlantUML-action@v1.0.0 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + ghcr-username: ${{ github.actor }} + ghcr-password: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/static-analysis.yml b/.github/workflows/static-analysis.yml new file mode 100644 index 0000000..608753d --- /dev/null +++ b/.github/workflows/static-analysis.yml @@ -0,0 +1,17 @@ +--- +name: CI + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + CICD-base: + runs-on: ubuntu-latest + + steps: + - uses: uclahs-cds/tool-static-analysis@v1 diff --git a/.github/workflows/trigger-tests.yaml b/.github/workflows/trigger-tests.yaml new file mode 100644 index 0000000..8e0e00a --- /dev/null +++ b/.github/workflows/trigger-tests.yaml @@ -0,0 +1,20 @@ +--- +name: Trigger tests + +on: + issue_comment: + types: [created] + pull_request_target: + branches: + - main + push: + branches: + - main + +permissions: + pull-requests: write + statuses: write + +jobs: + check-user: + uses: uclahs-cds/tool-Nextflow-action/.github/workflows/test-setup.yml@main diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..27d8114 --- /dev/null +++ b/.gitignore @@ -0,0 +1,82 @@ +# System files +*.log +*.error +.DS_Store + +# R +.Rhistory +.Rapp.history +.Rproj.user +*.RData +*.rd[as] + +# Python +__pycache__/ +.pytest_cache/ +.Python +pip-log.txt +pip-delete-this-directory.txt +*.py[cod] +*$py.class +venv +.venv +.env + +# VScode +.vscode + +# Nextflow +work/ +.nextflow/ +*.nextflow.log* + +## Molecular files + +# Array file +*.CEL +*.cel +*.OSCHP + +# Sequence file (e.g. Reference genome) +*.fasta +*.fa +*.fai +*.dict +*.gtf + + +# FASTQ file +*.fq +*.fastq + +# Proteomics file +*.raw +*.mzML +*.mzXML +*.mgf +*.idXML +*.consensusXML +*.featureXML +*.mzid +*.mzData +*.dta +*.dta2d + +# DNA/RNA alignment file +*.sam +*.bam +*.bai +*.cram +*.crai + +# Variant file +*.bcf +*.csi +*.vcf +*.tbi +*.maf + +# Compressed file +*.gz +*.tar +*.zip diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..f051c22 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,6 @@ +[submodule "external/pipeline-Nextflow-module"] + path = external/pipeline-Nextflow-module + url = git@github.com:uclahs-cds/pipeline-Nextflow-module.git +[submodule "external/pipeline-Nextflow-config"] + path = external/pipeline-Nextflow-config + url = git@github.com:uclahs-cds/pipeline-Nextflow-config.git diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..cb5160a --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,65 @@ +# Changelog +All notable changes to the pipeline-name pipeline. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). + +This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +--- + +## [Unreleased] +### Added +- Add `sample_id` extraction from BAM +- Add template input YAMLs +- Add pipeline-Nexflow-config as submodule and redirect set_resources_allocation +- Add pipeline-Nextflow-module as submodule +- Additional out of memory exit code +- Pipeline release action +- Template for NFTest testing results in PR template +- Enable dependabot +- Add example PlantUML image to README +- Add workflow to build documentation +- Add workflows to run Nextflow configuration tests + +### Changed +- Switch resource limit checks to external scripts +- Update links in on-prem Confluence to point to cloud-based Confluence +- Fix `CODEOWNERS` file +- Use `schema.check_path` for `workDir` validation +- Add `Discussions` and `Contributors` to the Table of Contents in `README.md` +- Update from DSL1 to DSL2 +- Standardize config structure +- Restructure repo so main script is main.nf +- Reorganize contributors and metadata +- Reorganize PR template so description is at top +- Update automatic node detection to allow for F2 detection +- Update Issue Template +- Standardize input/output/parameter structure in README +- Avoid modification of input parameter `output_dir` +- Create default docker container registry parameter for tools +- Use `methods.setup_process_afterscript()` to capture log files + +--- + +## [1.0.0] - YYYY-MM-DD +### Added +- For new features. +- Added item 1. + +### Changed +- For changes in existing functionality. +- Changed item 1. + +### Deprecated +- For soon-to-be removed features. + +### Removed +- For now removed features. +- Removed item 1. + +### Fixed +- For any bug fixes. +- Fixed item 1. + +### Security +- In case of vulnerabilities. diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..d159169 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..50f4efe --- /dev/null +++ b/README.md @@ -0,0 +1,160 @@ +# Pipeline Name + +- [Pipeline Name](#pipeline-name) + - [Overview](#overview) + - [How To Run](#how-to-run) + - [Flow Diagram](#flow-diagram) + - [Pipeline Steps](#pipeline-steps) + - [1. Step/Process 1](#1-stepprocess-1) + - [2. Step/Process 2](#2-stepprocess-2) + - [3. Step/Process n](#3-stepprocess-n) + - [Inputs](#inputs) + - [Outputs](#outputs) + - [Testing and Validation](#testing-and-validation) + - [Test Data Set](#test-data-set) + - [Validation ](#validation-version-number) + - [Validation Tool](#validation-tool) + - [References](#references) + - [Discussions](#discussions) + - [Contributors](#contributors) + - [License](#license) +## Overview + +A 3-4 sentence summary of the pipeline, including the pipeline's purpose, the type of expected scientific inputs/outputs of the pipeline (e.g: FASTQs and BAMs), and a list of tools/steps in the pipeline. + +--- + +## How To Run + +1. Update the params section of the .config file + +2. Update the input yaml + +3. See the submission script, [here](https://github.com/uclahs-cds/tool-submit-nf), to submit your pipeline + +--- + +## Flow Diagram + +A directed acyclic graph of your pipeline. The [PlantUML](https://plantuml.com/) code defining this diagram is version-controlled in the [docs/](./docs/) folder, and a [GitHub Action](https://github.com/uclahs-cds/tool-PlantUML-action) automatically regenerates the SVG image when that file is changed. + +![Pipeline Graph](./docs/pipeline-flow.svg) + +--- + +## Pipeline Steps + +### 1. Step/Process 1 + +> A 2-3 sentence description of each step/proccess in your pipeline that includes the purpose of the step/process, the tool(s) being used and their version, and the expected scientific inputs/outputs (e.g: FASTQs and BAMs) of the pipeline. + +### 2. Step/Process 2 + +> A 2-3 sentence description of each step/proccess in your pipeline that includes the purpose of the step/process, the tool(s) being used and their version, and the expected scientific inputs/outputs (e.g: FASTQs and BAMs) of the pipeline. + +### 3. Step/Process n + +> A 2-3 sentence description of each step/proccess in your pipeline that includes the purpose of the step/process, the tool(s) being used and their version, and the expected scientific inputs/outputs (e.g: FASTQs and BAMs) of the pipeline. + +--- + +## Inputs + +### Input YAML + +> include an example of the organization structure within the YAML. Example: +```yaml +input 1: 'patient_id' +input: + normal: + - id: + BAM: + tumor: + - id: + BAM: +``` + +### Config + +| Field | Type | Required | Description | +| ----- | ---- | ------------ | ------------------------ | +| param 1 | _type_ | yes/no | 1-2 sentence description of the parameter, including any defaults if any. | +| param 2 | _type_ | yes/no | 1-2 sentence description of the parameter, including any defaults if any. | +| param n | _type_ | yes/no | 1-2 sentence description of the parameter, including any defaults if any. | +| `work_dir` | path | no | Path of working directory for Nextflow. When included in the sample config file, Nextflow intermediate files and logs will be saved to this directory. With ucla_cds, the default is `/scratch` and should only be changed for testing/development. Changing this directory to `/hot` or `/tmp` can lead to high server latency and potential disk space limitations, respectively. | + +> Include the optional param `work_dir` in the inputs accompanied by a warning of the potentials dangers of using the param. Update the warning if necessary. + +--- + +## Outputs + + + +| Output | Description | +| ------------ | ------------------------ | +| ouput 1 | 1 - 2 sentence description of the output. | +| ouput 2 | 1 - 2 sentence description of the output. | +| ouput n | 1 - 2 sentence description of the output. | + +--- + +## Testing and Validation + +### Test Data Set + +A 2-3 sentence description of the test data set(s) used to validate and test this pipeline. If possible, include references and links for how to access and use the test dataset + +### Validation + + Input/Output | Description | Result + | ------------ | ------------------------ | ------------------------ | +| metric 1 | 1 - 2 sentence description of the metric | quantifiable result | +| metric 2 | 1 - 2 sentence description of the metric | quantifiable result | +| metric n | 1 - 2 sentence description of the metric | quantifiable result | + +- [Reference/External Link/Path 1 to any files/plots or other validation results]() +- [Reference/External Link/Path 2 to any files/plots or other validation results]() +- [Reference/External Link/Path n to any files/plots or other validation results]() + +### Validation Tool + +Included is a template for validating your input files. For more information on the tool check out: https://github.com/uclahs-cds/package-PipeVal + +--- + +## References + +1. [Reference 1]() +2. [Reference 2]() +3. [Reference n]() + +--- + +## Discussions + +- [Issue tracker]() to report errors and enhancement ideas. +- Discussions can take place in [ Discussions]() +- [ pull requests]() are also open for discussion + +--- + +## Contributors + +> Update link to repo-specific URL for GitHub Insights Contributors page. + +Please see list of [Contributors](https://github.com/uclahs-cds/template-NextflowPipeline/graphs/contributors) at GitHub. + +--- + +## License + +[pipeline name] is licensed under the GNU General Public License version 2. See the file LICENSE for the terms of the GNU GPL license. + + + +Copyright (C) 2024 University of California Los Angeles ("Boutros Lab") All rights reserved. + +This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. diff --git a/config/F72.config b/config/F72.config new file mode 100644 index 0000000..21ae4b0 --- /dev/null +++ b/config/F72.config @@ -0,0 +1,23 @@ +// Static process resource allocation here +// Specific for each node type - F72 here +process { + withName: process_name { + cpus = + memory = + // Other process-specific allocations here + } + withName: process_name_2 { + cpus = + memory = + // Other process-specific allocations here + } + withName: process_name_3 { + cpus = + memory = + // Other process-specific allocations here + } + withName: example_process { + cpus = 2 + memory = 5.GB + } +} diff --git a/config/M64.config b/config/M64.config new file mode 100644 index 0000000..8b12f44 --- /dev/null +++ b/config/M64.config @@ -0,0 +1,23 @@ +// Static process resource allocation here +// Specific for each node type - M64 here +process { + withName: process_name { + cpus = + memory = + // Other process-specific allocations here + } + withName: process_name_2 { + cpus = + memory = + // Other process-specific allocations here + } + withName: process_name_3 { + cpus = + memory = + // Other process-specific allocations here + } + withName: example_process { + cpus = 2 + memory = 5.GB + } +} diff --git a/config/base.config b/config/base.config new file mode 100644 index 0000000..28fc976 --- /dev/null +++ b/config/base.config @@ -0,0 +1,23 @@ +/** +* Do not modify these labels! +*/ + +process { + cpus = { methods.check_limits( 1 * task.attempt, 'cpus' ) } + + errorStrategy = { task.exitStatus in [143, 137, 104, 134, 139, 247] ? 'retry' : 'finish' } + maxRetries = 1 + + withLabel:process_low { + cpus = { methods.check_limits( 2 * task.attempt, 'cpus' ) } + memory = { methods.check_limits( 3.GB * task.attempt, 'memory' ) } + } + withLabel:process_medium { + cpus = { methods.check_limits( 6 * task.attempt, 'cpus' ) } + memory = { methods.check_limits( 42.GB * task.attempt, 'memory' ) } + } + withLabel:process_high { + cpus = { methods.check_limits(12 * task.attempt, 'cpus' ) } + memory = { methods.check_limits( 84.GB * task.attempt, 'memory' ) } + } +} diff --git a/config/default.config b/config/default.config new file mode 100644 index 0000000..2e67a46 --- /dev/null +++ b/config/default.config @@ -0,0 +1,45 @@ +import nextflow.util.SysHelper + +// Default inputs/parameters of the pipeline +params { + max_cpus = SysHelper.getAvailCpus() + max_memory = SysHelper.getAvailMemory() + min_cpus = 1 + min_memory = 1.MB + + ucla_cds = true + docker_container_registry = "ghcr.io/uclahs-cds" + + // Docker images + // REPLACE WITH TOOLS USED + tool_a_version = 'x.x.x' // Docker version for tool a + docker_image_tool_a = "${-> params.docker_container_registry}/tool_a:${params.tool_a_version}" +} + +// Process specific scope +process { + // Process results are stored to local cache. + // If pipeline is launched with the 'resume' option, existing cache results will be used when available + // rather than re-executing processes + cache = true + + // Forward process 'stdout' to shell terminal and, consequently, the log file + echo = true + executor = 'local' + + // Other directives or options that should apply for every process + + // total amount of resources avaible to the pipeline + cpus = params.max_cpus + memory = params.max_memory +} + +// Enable Docker and configure docker-related options like user and group IDs +docker { + enabled = true + // Pass user's UID/GID and group IDs to Docker + uid_and_gid = "-u \$(id -u):\$(id -g)" + all_group_ids = "\$(for i in `id --real --groups`; do echo -n \"--group-add=\$i \"; done)" + + runOptions = "${uid_and_gid} ${all_group_ids}" +} diff --git a/config/methods.config b/config/methods.config new file mode 100644 index 0000000..761fd3b --- /dev/null +++ b/config/methods.config @@ -0,0 +1,82 @@ + +includeConfig "${projectDir}/external/pipeline-Nextflow-config/config/methods/common_methods.config" +includeConfig "${projectDir}/external/pipeline-Nextflow-config/config/schema/schema.config" +includeConfig "${projectDir}/external/pipeline-Nextflow-config/config/bam/bam_parser.config" + +methods { + get_ids_from_bams = { + params.samples_to_process = [] as Set + params.input.BAM.each { k, v -> + v.each { bam_path -> + def bam_header = bam_parser.parse_bam_header(bam_path) + def sm_tags = bam_header['read_group'].collect{ it['SM'] }.unique() + if (sm_tags.size() != 1) { + throw new Exception("${bam_path} contains multiple samples! Please run pipeline with single sample BAMs.") + } + if (alreadyExists == params.samples_to_process.any { it.orig_id == sm_tags[0] }) { + throw new Exception("Sample ${sm_tags[0]} was found in multiple BAMs. Please provide only one BAM per sample") + } + new_sm_tag = methods.sanitize_string(sm_tags[0]) + params.samples_to_process.add(['orig_id': sm_tags[0], 'id': new_sm_tag, 'path': bam_path, 'sample_type': k]) + } + } + } + // Set the output and log output dirs here. + set_output_dir = { + + tz = TimeZone.getTimeZone('UTC') + def date = new Date().format("yyyyMMdd'T'HHmmss'Z'", tz) + + params.dataset_registry_prefix = '/hot/data' + + if (params.blcds_registered_dataset == true) { + if ("${params.dataset_id.length()}" != 11) { + throw new Exception("Dataset id must be eleven characters long") + } + def disease = "${params.dataset_id.substring(0,4)}" + // Need to fill in analyte, technology, raw_od_aligned, genome, pipeline-name + params.output_log_directory = "${params.dataset_registry_prefix}/$disease/${params.dataset_id}/${patient}/${sample}/analyte/technology,raw_or_aligned/genome/log/pipeline-name/$date" + params.disease = "${disease}" + } else { + params.output_dir_base = "${params.output_dir}/${manifest.name}-${manifest.version}/${params.sample_name.replace(' ', '_')}" + params.log_output_dir = "${params.output_dir_base}/log-${manifest.name}-${manifest.version}-${date}" + } + } + + set_pipeline_logs = { + trace.enabled = true + trace.file = "${params.log_output_dir}/nextflow-log/trace.txt" + + timeline.enabled = true + timeline.file = "${params.log_output_dir}/nextflow-log/timeline.html" + + report.enabled = true + report.file = "${params.log_output_dir}/nextflow-log/report.html" + } + + modify_base_allocations = { + if (!(params.containsKey('base_resource_update') && params.base_resource_update)) { + return + } + + params.base_resource_update.each { resource, updates -> + updates.each { processes, multiplier -> + def processes_to_update = (custom_schema_types.is_string(processes)) ? [processes] : processes + methods.update_base_resource_allocation(resource, multiplier, processes_to_update) + } + } + } + + setup = { +// add this file and uncomment if needed +// schema.load_custom_types("${projectDir}/config/custom_schema_types.config") + methods.set_output_dir() + methods.set_resources_allocation() + methods.modify_base_allocations() + methods.set_pipeline_logs() + methods.set_env() + methods.get_ids_from_bams() + methods.setup_docker_cpus() + methods.setup_process_afterscript() + } +} diff --git a/config/template.config b/config/template.config new file mode 100644 index 0000000..1002616 --- /dev/null +++ b/config/template.config @@ -0,0 +1,26 @@ +// EXECUTION SETTINGS AND GLOBAL DEFAULTS + +// External config files import. DO NOT MODIFY THESE LINES! +includeConfig "${projectDir}/config/default.config" +includeConfig "${projectDir}/config/methods.config" +includeConfig "${projectDir}/nextflow.config" + + +// Inputs/parameters of the pipeline +params { + dataset_id = '' + blcds_registered_dataset = false // if you want the output to be registered + + variable_name = 'foo-bar' + + // input/output locations + output_dir = 'where/to/save/outputs/' + // Tool-specific temp dirs here + // Using other directories, like /hot or /tmp, can cause latency and disk space issues + // tool_temp_dir = '/scratch' // Default is scratch + + // Add other inputs/parameters here +} + +// Setup the pipeline config. DO NOT REMOVE THIS LINE! +methods.setup() diff --git a/docs/pipeline-flow.puml b/docs/pipeline-flow.puml new file mode 100644 index 0000000..6474bfb --- /dev/null +++ b/docs/pipeline-flow.puml @@ -0,0 +1,14 @@ +/' +This is a PlantUML (https://plantuml.com/) diagram documenting the pipeline. + +Everything between the opening slash-single-quote and the closing +single-quote-slash is a comment. +'/ + +@startuml +title Pipeline Steps + +:Step 1; +:Step 2; + +@enduml diff --git a/docs/pipeline-flow.svg b/docs/pipeline-flow.svg new file mode 100644 index 0000000..6249539 --- /dev/null +++ b/docs/pipeline-flow.svg @@ -0,0 +1,17 @@ +Pipeline StepsStep 1Step 2 \ No newline at end of file diff --git a/external/pipeline-Nextflow-config b/external/pipeline-Nextflow-config new file mode 160000 index 0000000..da8e990 --- /dev/null +++ b/external/pipeline-Nextflow-config @@ -0,0 +1 @@ +Subproject commit da8e990e15f136f992a427ed629fe3a2bb78a1ea diff --git a/external/pipeline-Nextflow-module b/external/pipeline-Nextflow-module new file mode 160000 index 0000000..9db08f0 --- /dev/null +++ b/external/pipeline-Nextflow-module @@ -0,0 +1 @@ +Subproject commit 9db08f0243ed844ab1f6460dad9a9197cc151605 diff --git a/input/template-input-BAM.yaml b/input/template-input-BAM.yaml new file mode 100644 index 0000000..624f9c1 --- /dev/null +++ b/input/template-input-BAM.yaml @@ -0,0 +1,9 @@ +--- +patient_id: 'patient_id' +input: + normal: + - id: normal_sample_id + path: /path/to/normal.bam + tumor: + - id: tumor_sample_id + path: /path/to/tumor.bam diff --git a/main.nf b/main.nf new file mode 100644 index 0000000..bae2734 --- /dev/null +++ b/main.nf @@ -0,0 +1,113 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl=2 + +// Include processes and workflows here +include { run_validate_PipeVal } from './external/pipeline-Nextflow-module/modules/PipeVal/validate/main.nf' addParams( + options: [ + docker_image_version: params.pipeval_version, + main_process: "./" //Save logs in /process-log/run_validate_PipeVal + ] +) +include { generate_standard_filename } from './external/pipeline-Nextflow-module/modules/common/generate_standardized_filename/main.nf' +include { tool_name_command_name } from './module/module-name' addParams( + workflow_output_dir: "${params.output_dir_base}/ToolName-${params.toolname_version}", + workflow_log_output_dir: "${params.log_output_dir}/process-log/ToolName-${params.toolname_version}" + ) + +// Log info here +log.info """\ + ====================================== + T E M P L A T E - N F P I P E L I N E + ====================================== + Boutros Lab + + Current Configuration: + - pipeline: + name: ${workflow.manifest.name} + version: ${workflow.manifest.version} + + - input: + input a: ${params.variable_name} + ... + + - output: + output a: ${params.output_path} + ... + + - options: + option a: ${params.option_name} + ... + + Tools Used: + tool a: ${params.docker_image_name} + + ------------------------------------ + Starting workflow... + ------------------------------------ + """ + .stripIndent() + +def indexFile(bam_or_vcf) { + if(bam_or_vcf.endsWith('.bam')) { + return "${bam_or_vcf}.bai" + } + else if(bam_or_vcf.endsWith('vcf.gz')) { + return "${bam_or_vcf}.tbi" + } + else { + throw new Exception("Index file for ${bam_or_vcf} file type not supported. Use .bam or .vcf.gz files.") + } + } + +// Channels here +Channel + .fromList(params.samples_to_process) + .map { sample -> + return tuple(sample.orig_id, sample.id, sample.path, sample.sample_type) + } + .set { samplesToProcessChannel } + +Channel + .fromList(params.samples_to_process) + .map{ it -> [it['path'], indexFile(it['path'])] } + .flatten() + .set { files_to_validate_ch } + +// Possible reference channel +Channel + .from( + params.reference, + params.reference_index, + params.reference_dict + ) + .set { reference_ch } + +// Decription of input channel +Channel + .fromPath(params.variable_name) + .ifEmpty { error "Cannot find: ${params.variable_name}" } + .set { input_ch_variable_name } + +files_to_validate_ch = files_to_validate_ch + .mix(reference_ch) + .mix(input_ch_variable_name) + +// Main workflow here +workflow { + // Validation process + run_validate_PipeVal( + files_to_validate_ch + ) + + run_validate_PipeVal.out.val_file.collectFile( + name: 'input_validation.txt', newLine: true, + storeDir: "${params.output_dir_base}/validation" + ) + + // Workflow or process + tool_name_command_name( + samplesToProcessChannel, + input_ch_variable_name + ) +} diff --git a/metadata.yaml b/metadata.yaml new file mode 100644 index 0000000..8b58496 --- /dev/null +++ b/metadata.yaml @@ -0,0 +1,8 @@ +--- +category: "pipeline" +description: "A template Nextflow pipeline for styling and standardization" +maintainers: "Boutros Lab Infrastructure " +languages: ["Nextflow", "Docker"] +dependencies: ["Java", "Nextflow", "Docker"] +references: "https://uclahs-cds.atlassian.net/wiki/spaces/BOUTROSLAB/pages/3189620/Guide+to+Nextflow" +tools: ["tool_a:v1.1.0", "tool_b:v1.1.0"] diff --git a/module/module-name.nf b/module/module-name.nf new file mode 100644 index 0000000..33d2ea4 --- /dev/null +++ b/module/module-name.nf @@ -0,0 +1,60 @@ +/* +* Module/process description here +* +* @input +* @params +* @output +*/ + +include { generate_standard_filename } from '../external/pipeline-Nextflow-module/modules/common/generate_standardized_filename/main.nf' + +process tool_name_command_name { + container params.docker_image_name + + // if setting resources via label (see base.config), set label here + label "resource_allocation_tool_name_command_name" + + // remove task.index extension if not needed + publishDir path: "${params.workflow_output_dir}/output", + pattern: "", + mode: "copy", + enabled: true + + // Process logs (the `.command.*` files) will be automatically saved as + // `tool_name_command_name/log.command.*` due to + // methods.setup_process_afterscript(). The folder can be customized + // per-process to add a suffix distinguishing multiple runs of the same + // process: + + // ext log_dir_suffix: { "-${variable_name}" } + + // Additional directives here + + input: + tuple val(orig_id), val(id), path(path), val(sample_type) + val(variable_name) + + output: + path("${variable_name}.command_name.file_extension"), emit: output_tag + + script: + output_filename = generate_standard_filename("Samtools-${params.samtools_version}", + params.dataset_id, + id, + [:]) + + """ + # make sure to specify pipefail to make sure process correctly fails on error + set -euo pipefail + + # the script should ideally only have call to a single tool + # to make the command more human readable: + # - seperate components of the call out on different lines + # - when possible be explict with command options, spelling out their long names + tool_name \ + command_name \ + --option_1_long_name ${id} \ + --input ${path} \ + --output ${variable_name}.command_name.file_extension + """ +} diff --git a/nextflow.config b/nextflow.config new file mode 100644 index 0000000..bfd2628 --- /dev/null +++ b/nextflow.config @@ -0,0 +1,7 @@ +// Metadata +manifest { + name = 'pipeline-name-nf' + author = 'Authors Name' + description = 'A template for Nextflow pipelines' + version = 'Version Number' +}