Skip to content

Checksum verification #10

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 6 additions & 10 deletions assets/schema_input.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,23 @@
"items": {
"type": "object",
"properties": {
"sample": {
"id": {
"type": "string",
"pattern": "^\\S+$",
"errorMessage": "Sample name must be provided and cannot contain spaces",
"meta": ["id"]
},
"fastq_1": {
"path": {
"type": "string",
"format": "file-path",
"exists": true,
"pattern": "^\\S+\\.f(ast)?q\\.gz$",
"errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'"
"format": "path"
},
"fastq_2": {
"checksums": {
"type": "string",
"format": "file-path",
"exists": true,
"pattern": "^\\S+\\.f(ast)?q\\.gz$",
"errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'"
"errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'"
}
},
"required": ["sample", "fastq_1"]
"required": ["id", "path", "checksums"]
}
}
4 changes: 4 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,9 @@ process {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
// we only want to keep the "reports" folder, not the result of the individual chunks
withName:"SHA256SUM_CHECK" {
publishDir = [ enabled: false ]
}

}
25 changes: 4 additions & 21 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -37,25 +37,7 @@ params.fasta = getGenomeAttribute('fasta')
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

//
// WORKFLOW: Run main analysis pipeline depending on type of input
//
workflow NFCORE_DATASYNC {

take:
samplesheet // channel: samplesheet read in from --input

main:

//
// WORKFLOW: Run pipeline
//
DATASYNC (
samplesheet
)
emit:
multiqc_report = DATASYNC.out.multiqc_report // channel: /path/to/multiqc_report.html
}
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
RUN MAIN WORKFLOW
Expand All @@ -80,8 +62,9 @@ workflow {
//
// WORKFLOW: Run main workflow
//
NFCORE_DATASYNC (
PIPELINE_INITIALISATION.out.samplesheet
DATASYNC (
PIPELINE_INITIALISATION.out.samplesheet,
params.workflow_type
)
//
// SUBWORKFLOW: Run completion tasks
Expand All @@ -93,7 +76,7 @@ workflow {
params.outdir,
params.monochrome_logs,
params.hook_url,
NFCORE_DATASYNC.out.multiqc_report
DATASYNC.out.multiqc_report
)
}

Expand Down
26 changes: 26 additions & 0 deletions modules/local/sha256sum/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
process SHA256SUM_CHECK {
// container "biocontainers/fastp:0.23.4--h5f740d0_0" //Using the same as the nf-core shasum module
// //Rocky doesnt contain ps - which is required for nextflow https://nextflow.io/docs/latest/container.html

label 'process_single'

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
'nf-core/ubuntu:20.04' }"

input:
tuple val(meta), path(path_to_check), path(checksum_file)

output:
tuple val(meta), path("${meta.id}.report.txt"), emit: report
tuple val(meta), env("EXIT_CODE"), emit: exit_code

script:
"""
# we don't want to fail, even when subprocess fails
set +euo pipefail
sha256sum --strict -c ${checksum_file} 2>&1 > ${meta.id}.report.txt
EXIT_CODE=\$?
echo
"""
}
7 changes: 6 additions & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ params {
max_multiqc_email_size = '25.MB'
multiqc_methods_description = null

// workflow
workflow_type = "checksum_verify"
// Checksum verify options
chunksize = 100

// Boilerplate options
outdir = null
publish_dir_mode = 'copy'
Expand Down Expand Up @@ -232,7 +237,7 @@ manifest {

// Nextflow plugins
plugins {
id 'nf-schema@2.1.1' // Validation of pipeline parameters and creation of an input channel from a sample sheet
id 'nf-schema@2.2.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet
}

validation {
Expand Down
6 changes: 6 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@
"description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.",
"fa_icon": "fas fa-folder-open"
},
"workflow_type": {
"type": "string"
},
"chunksize": {
"type": "integer"
},
"email": {
"type": "string",
"description": "Email address for completion summary.",
Expand Down
56 changes: 56 additions & 0 deletions subworkflows/local/checksum_verify.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
include { SHA256SUM_CHECK } from "../../modules/local/sha256sum/main"
include { MULTIQC } from '../../modules/nf-core/multiqc/main'
include { paramsSummaryMap } from 'plugin/nf-schema'
include { paramsSummaryMultiqc } from '../nf-core/utils_nfcore_pipeline'
include { softwareVersionsToYAML } from '../nf-core/utils_nfcore_pipeline'
include { methodsDescriptionText } from '../local/utils_nfcore_datasync_pipeline'

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
RUN MAIN WORKFLOW
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/


workflow CHECKSUM_VERIFY {

take:
ch_samplesheet // channel: samplesheet read in from --input
main:

// ch_samplesheet.view()
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()

ch_chunks = ch_samplesheet.flatMap { meta, path, checksum_file ->
checksum_file.splitText( by: params.chunksize, file: true).collect{ it -> [meta, path, it]}
}
SHA256SUM_CHECK(ch_chunks)

// collate reports from chunks
SHA256SUM_CHECK.out.report.collectFile(storeDir: "${params.outdir}/reports"){ meta, report -> ["${meta.id}.report.txt", report]}

// check if verification was sucessful (= all processes exited with code 0)
exit_codes = SHA256SUM_CHECK.out.exit_code.groupTuple().map{ meta, exit_codes -> [meta, exit_codes.every{ it == "0" }] }.map{
meta, status -> if(!status) {
log.warn("Checksum verifycation failed for ${meta.id}!")
}
}


emit:
versions = ch_versions // channel: [ path(versions.yml) ]
multiqc_files = ch_multiqc_files

}

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
THE END
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
23 changes: 1 addition & 22 deletions subworkflows/local/utils_nfcore_datasync_pipeline/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -74,22 +74,9 @@ workflow PIPELINE_INITIALISATION {

Channel
.fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json"))
.map {
meta, fastq_1, fastq_2 ->
if (!fastq_2) {
return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ]
} else {
return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ]
}
}
.groupTuple()
.map { samplesheet ->
validateInputSamplesheet(samplesheet)
}
.map {
meta, fastqs ->
return [ meta, fastqs.flatten() ]
}
.set { ch_samplesheet }

emit:
Expand Down Expand Up @@ -160,15 +147,7 @@ def validateInputParameters() {
// Validate channels from input samplesheet
//
def validateInputSamplesheet(input) {
def (metas, fastqs) = input[1..2]

// Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end
def endedness_ok = metas.collect{ meta -> meta.single_end }.unique().size == 1
if (!endedness_ok) {
error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}")
}

return [ metas[0], fastqs ]
return input
}
//
// Get attribute from genome config file e.g. fasta
Expand Down
24 changes: 14 additions & 10 deletions workflows/datasync.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
include { FASTQC } from '../modules/nf-core/fastqc/main'
include { MULTIQC } from '../modules/nf-core/multiqc/main'
include { CHECKSUM_VERIFY } from "../subworkflows/local/checksum_verify"
include { paramsSummaryMap } from 'plugin/nf-schema'
include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline'
include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline'
Expand All @@ -20,18 +20,21 @@ workflow DATASYNC {

take:
ch_samplesheet // channel: samplesheet read in from --input
workflow_type

main:

ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()
//
// MODULE: Run FastQC
//
FASTQC (
ch_samplesheet
)
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]})
ch_versions = ch_versions.mix(FASTQC.out.versions.first())

if(workflow_type == "checksum_verify") {
CHECKSUM_VERIFY(ch_samplesheet)
ch_versions = ch_versions.mix(CHECKSUM_VERIFY.out.versions)
ch_multiqc_files = ch_multiqc_files.mix(CHECKSUM_VERIFY.out.multiqc_files)
} else {
error "Not Implemented"
}


//
// Collate and save software versions
Expand Down Expand Up @@ -85,7 +88,8 @@ workflow DATASYNC {
[]
)

emit:multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html
emit:
multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html
versions = ch_versions // channel: [ path(versions.yml) ]

}
Expand Down
Loading