Skip to content

Commit 924ce82

Browse files
committed
Implement chunking
1 parent b2b17fa commit 924ce82

File tree

2 files changed

+23
-44
lines changed

2 files changed

+23
-44
lines changed

modules/local/sha256sum/main.nf

+10-7
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
process SHA256SUM_CHECK {
2-
container "biocontainers/fastp:0.23.4--h5f740d0_0" //Using the same as the nf-core shasum module
3-
//Rocky doesnt contain ps - which is required for nextflow https://nextflow.io/docs/latest/container.html
2+
// container "biocontainers/fastp:0.23.4--h5f740d0_0" //Using the same as the nf-core shasum module
3+
// //Rocky doesnt contain ps - which is required for nextflow https://nextflow.io/docs/latest/container.html
44

55
label 'process_single'
66

@@ -9,15 +9,18 @@ process SHA256SUM_CHECK {
99
'nf-core/ubuntu:20.04' }"
1010

1111
input:
12-
tuple val(meta), path(checksum_file), path("rename.sh"), path("files/??????"),
12+
tuple val(meta), path(path_to_check), path(checksum_file)
1313

1414
output:
15-
tuple val(meta), path(report)
15+
tuple val(meta), path("${meta.id}.report.txt"), emit: report
16+
tuple val(meta), env("EXIT_CODE"), emit: exit_code
1617

1718
script:
1819
"""
19-
bash rename.sh
20-
21-
cd work && sha256sum -c ${checksum_file} > ../${meta.id}.report.txt
20+
# we don't want to fail, even when subprocess fails
21+
set +euo pipefail
22+
sha256sum --strict -c ${checksum_file} 2>&1 > ${meta.id}.report.txt
23+
EXIT_CODE=\$?
24+
echo
2225
"""
2326
}

subworkflows/local/checksum_verify.nf

+13-37
Original file line numberDiff line numberDiff line change
@@ -16,24 +16,6 @@ include { methodsDescriptionText } from '../local/utils_nfcore_datasync_pipeline
1616
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1717
*/
1818

19-
def splitChecksumFile(f, batchsize) {
20-
lines = []
21-
f.eachLine { line, index ->
22-
def parts = line.split(/\s+/)
23-
lines.add([parts[0], parts[1], String.format('%06d', (index % batchsize) + 1)])
24-
}
25-
return lines.collate(batchsize)
26-
}
27-
28-
def makeRenameScript(batch) {
29-
script = []
30-
script.add("#!/bin/bash -euo pipefail")
31-
script.add("mkdir -p work")
32-
batch.each { checksum, filename, index ->
33-
script.add("mkdir -p work/\$(dirname '${filename}') && mv files/${index} 'work/${filename}'")
34-
}
35-
return script.join("\n")
36-
}
3719

3820
workflow CHECKSUM_VERIFY {
3921

@@ -45,27 +27,21 @@ workflow CHECKSUM_VERIFY {
4527
ch_versions = Channel.empty()
4628
ch_multiqc_files = Channel.empty()
4729

48-
ch_batches = ch_samplesheet.map{ meta, path, checksum_file ->
49-
splitChecksumFile(checksum_file, params.chunksize).withIndex().collect {
50-
chunk, index -> [meta, chunk]
30+
ch_chunks = ch_samplesheet.flatMap { meta, path, checksum_file ->
31+
checksum_file.splitText( by: params.chunksize, file: true).collect{ it -> [meta, path, it]}
32+
}
33+
SHA256SUM_CHECK(ch_chunks)
34+
35+
// collate reports from chunks
36+
SHA256SUM_CHECK.out.report.collectFile(storeDir: "${params.outdir}/reports"){ meta, report -> ["${meta.id}.report.txt", report]}
37+
38+
// check if verification was sucessful (= all processes exited with code 0)
39+
exit_codes = SHA256SUM_CHECK.out.exit_code.groupTuple().map{ meta, exit_codes -> [meta, exit_codes.every{ it == "0" }] }.map{
40+
meta, status -> if(!status) {
41+
log.warn("Checksum verifycation failed for ${meta.id}!")
5142
}
52-
}.flatMap { meta, chunk -> tuple(meta, chunk)}
53-
// ch_batches.view()
54-
ch_scripts = ch_batches.map{ meta, chunk -> [meta, makeRenameScript(chunk)] }
55-
// ch_scripts.view()
56-
ch_files = ch_batches.join(ch_samplesheet).map{ meta, chunk, path, checksum_file ->
57-
[meta, chunk.collect{ checksum, filename, idx -> file("${path}/${filename}")}]
5843
}
59-
ch_files.view()
60-
// ch_files = ch_batches.map{
61-
// meta, path, chunk -> chunk.each{
62-
// checksum, filename, numeric_id ->
63-
// files = []
64-
// files.add(file("${path}/${filename}", checkIfExists:true))
65-
// }
66-
// }
67-
// ch_files.view()
68-
// SHA256SUM_CHECK([[:], ["foo/test.txt", "foo/bar.txt"], [file("foo/test.txt"), file("foo/bar.txt")], []])
44+
6945

7046
emit:
7147
versions = ch_versions // channel: [ path(versions.yml) ]

0 commit comments

Comments
 (0)