Skip to content

Commit 7218c1c

Browse files
authored
Merge pull request #11 from ikmb/devel
Release 1.2 with cleaned up modules and version reporting
2 parents 0bb9136 + 320ad2b commit 7218c1c

34 files changed

+609
-333
lines changed

README.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,15 @@
22

33
# IKMB HLA pipeline
44

5-
This pipeline performs HLA typing from short read NGS data.
5+
This pipeline performs HLA typing from short read NGS data. Several tools are supported:
6+
7+
* [xHLA](https://github.com/humanlongevity/HLA) (xhla)
8+
* [Hisat-genotype](https://daehwankimlab.github.io/hisat-genotype/) (hisat) Slow on anything other than targetted sequencing!
9+
* [Optitype](https://github.com/FRED-2/OptiType) (optitype)
10+
* [HLAscan](https://github.com/SyntekabioTools/HLAscan) (hlascan)
11+
* [HLA-HD](https://www.genome.med.kyoto-u.ac.jp/HLA-HD/) (hlahd)
12+
13+
For usage instructions, please see below.
614

715
## Documentation
816

bin/gendx_parser.rb

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ def hisat_reconcile(list)
113113

114114
# FIND MATCHING JSON FILE
115115
json = jsons.find{|j| j.include?(sample) }
116-
abort "Could not find matching json file (#{sample}) under provided path!"
116+
abort "Could not find matching json file (#{sample}) under the path provided!"
117117

118118
# Build a HASH per gene, for each calling approach - starting with GenDX
119119
if json
@@ -213,6 +213,13 @@ def hisat_reconcile(list)
213213
if tool == "Hisat"
214214
tcalls = hisat_reconcile(tcalls)
215215
end
216+
217+
tcalls = tcalls.select {|tc| tc.length > 1 }
218+
219+
# if only one call exists, we assume it is homozygous and we double it.
220+
if tcalls.length == 1
221+
tcalls << tcalls[0]
222+
end
216223

217224
tcalls.sort[0..1].each_with_index do |t,i|
218225

bin/report.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@
248248
r = [ k ]
249249
this_result = [ k ]
250250
rheader[1..-1].each do |h|
251-
this_result << alleles[k][h].sort.join("\n")
251+
this_result << alleles[k][h].sort.map {|a| a.split("*")[-1]}.join("\n")
252252
end
253253
results << this_result
254254
end

conf/base.config

Lines changed: 26 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -18,51 +18,40 @@ process {
1818

1919
// software dependencies moved to conda.config
2020

21-
withName:FASTP {
22-
cpus = 4
23-
}
24-
withName: HISAT_GENOTYPE {
25-
time = { check_max( 12.h * task.attempt, 'time' ) }
26-
memory = { check_max( 64.GB * task.attempt, 'memory' ) }
27-
cpus = params.max_cpus/2
28-
}
29-
30-
withName: 'DEDUP|BAM2FASTQ|BEDCOV' {
31-
cpus = 8
32-
memory = { check_max( 8.GB * task.attempt, 'memory' ) }
33-
21+
withLabel: short_serial {
22+
time = { check_max( 4.h * task.attempt, 'time' ) }
23+
memory = { check_max( 8.GB * task.attempt, 'memory' ) }
3424
}
35-
withName: 'XHLA' {
36-
memory = { check_max( 32.GB * task.attempt, 'memory' ) }
25+
withLabel: medium_serial {
26+
time = { check_max( 8.h * task.attempt, 'time' ) }
27+
memory = { check_max( 8.GB * task.attempt, 'memory' ) }
3728
}
38-
39-
withName: 'HLASCAN' {
40-
cpus = 4
41-
memory = { check_max( 32.GB * task.attempt, 'memory' ) }
29+
withLabel: long_serial {
30+
time = { check_max( 12.h * task.attempt, 'time' ) }
31+
memory = { check_max( 16.GB * task.attempt, 'memory' ) }
4232
}
43-
44-
withName: 'BWA' {
45-
time = { check_max( 12.h * task.attempt, 'time' ) }
46-
memory = { check_max( 64.GB * task.attempt, 'memory' ) }
47-
cpus = { check_max( 12 * task.attempt, 'cpus' ) }
33+
withLabel: short_parallel {
34+
time = { check_max( 4.h * task.attempt, 'time' ) }
35+
memory = { check_max( 16.GB * task.attempt, 'memory' ) }
36+
cpus = { check_max( 4 * task.attempt, 'cpus' ) }
4837
}
49-
50-
withLabel: 'optitype' {
51-
container = "docker://quay.io/biocontainers/optitype:1.3.5--hdfd78af_1"
38+
withLabel: medium_parallel {
39+
time = { check_max( 8.h * task.attempt, 'time' ) }
40+
memory = { check_max( 24.GB * task.attempt, 'memory' ) }
41+
cpus = { check_max( 8 * task.attempt, 'cpus' ) }
5242
}
53-
54-
withName: 'OPTITYPE_FILTER' {
55-
time = { check_max( 12.h * task.attempt, 'time' ) }
56-
memory = { check_max( 64.GB * task.attempt, 'memory' ) }
57-
cpus = { check_max( 10 * task.attempt, 'cpus' ) }
43+
withLabel: long_parallel {
44+
time = { check_max( 24.h * task.attempt, 'time' ) }
45+
memory = { check_max( 24.GB * task.attempt, 'memory' ) }
46+
cpus = { check_max( 8 * task.attempt, 'cpus' ) }
5847
}
59-
withName: 'OPTITYPE_RUN' {
60-
time = { check_max( 4.h * task.attempt, 'time' ) }
61-
memory = { check_max( 32.GB * task.attempt, 'memory' ) }
62-
cpus = 12
48+
withLabel: extra_long_parallel {
49+
time = { check_max( 96.h * task.attempt, 'time' ) }
50+
memory = { check_max( 24.GB * task.attempt, 'memory' ) }
51+
cpus = { check_max( 8 * task.attempt, 'cpus' ) }
6352
}
6453

65-
54+
6655
}
6756

6857

main.nf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ WorkflowHla.initialise( params, log)
2727

2828
params.fasta = file(params.genomes[ "hg38" ].fasta, checkIfExists: true)
2929
params.dict = file(params.genomes[ "hg38" ].dict, checkIfExists: true)
30+
params.fai = file(params.genomes[ "hg38" ].fasta + ".fai", checkIfExists: true)
3031

3132
include { HLA } from './workflows/hla'
3233

modules/bwa.nf

Lines changed: 0 additions & 29 deletions
This file was deleted.

modules/bwa/mem.nf

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
process BWA_MEM {
2+
3+
tag "${meta.patient_id}|${meta.sample_id}"
4+
5+
label 'medium_parallel'
6+
7+
input:
8+
tuple val(meta), path(left),path(right)
9+
val(bwa_index)
10+
11+
output:
12+
tuple val(meta), path(bam), emit: bam
13+
val(sample), emit: sample_name
14+
val(meta), emit: meta_data
15+
path("versions.yml"), emit: versions
16+
17+
script:
18+
bam = "${meta.sample_id}_${meta.library_id}_${meta.readgroup_id}_bwa-aligned_fm.bam"
19+
sample = "${meta.patient_id}_${meta.sample_id}"
20+
21+
"""
22+
bwa mem -H ${params.dict} -M -R "@RG\\tID:${meta.readgroup_id}\\tPL:ILLUMINA\\tPU:${meta.platform_unit}\\tSM:${meta.patient_id}_${meta.sample_id}\\tLB:${meta.library_id}\\tDS:${bwa_index}\\tCN:${meta.center}" \
23+
-t ${task.cpus} ${bwa_index} $left $right \
24+
| samtools fixmate -@ ${task.cpus} -m - - \
25+
| samtools sort -@ ${task.cpus} -m 4G -O bam -o $bam -
26+
27+
cat <<-END_VERSIONS > versions.yml
28+
"${task.process}":
29+
bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//')
30+
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
31+
END_VERSIONS
32+
33+
"""
34+
}

modules/concat_fastq.nf

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
process CONCAT_FASTQ {
22

3+
tag "${meta.sample_id}"
4+
35
//publishDir "${params.outdir}/GenDX", mode: 'copy'
46

57
input:
@@ -17,4 +19,4 @@ process CONCAT_FASTQ {
1719
zcat $r2 | gzip -c >> $r2_merged
1820
"""
1921

20-
}
22+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
process CUSTOM_DUMPSOFTWAREVERSIONS {
2+
3+
label 'short_serial'
4+
5+
container 'quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0'
6+
7+
input:
8+
path versions
9+
10+
output:
11+
path "software_versions.yml" , emit: yml
12+
path "software_versions_mqc.yml", emit: mqc_yml
13+
path "versions.yml" , emit: versions
14+
15+
script:
16+
template 'dumpsoftwareversions.py'
17+
18+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
name: custom_dumpsoftwareversions
2+
description: Custom module used to dump software versions within the nf-core pipeline template
3+
keywords:
4+
- custom
5+
- version
6+
tools:
7+
- custom:
8+
description: Custom module used to dump software versions within the nf-core pipeline template
9+
homepage: https://github.com/nf-core/tools
10+
documentation: https://github.com/nf-core/tools
11+
licence: ["MIT"]
12+
input:
13+
- versions:
14+
type: file
15+
description: YML file containing software versions
16+
pattern: "*.yml"
17+
18+
output:
19+
- yml:
20+
type: file
21+
description: Standard YML file containing software versions
22+
pattern: "software_versions.yml"
23+
- mqc_yml:
24+
type: file
25+
description: MultiQC custom content YML file containing software versions
26+
pattern: "software_versions_mqc.yml"
27+
- versions:
28+
type: file
29+
description: File containing software versions
30+
pattern: "versions.yml"
31+
32+
authors:
33+
- "@drpatelh"
34+
- "@grst"

0 commit comments

Comments
 (0)