Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,16 @@ LABEL authors="Marc Hoeppner" \

COPY environment.yml /
RUN conda env create -f /environment.yml && conda clean -a
ENV PATH /opt/conda/envs/hla-1.0/bin:/opt/hisat-genotype:/opt/hisat-genotype/hisat2:$PATH
ENV PATH /opt/conda/envs/hla-1.0/bin:/opt/hisat-genotype:/opt/hisat-genotype/hisat2:/opt/hlascan/:$PATH
ENV PYTHONPATH /opt/hisat-genotype/hisatgenotype_modules:$PYTHONPATH

RUN apt-get -y update && apt-get -y install make wget git g++ ruby-full ruby-dev

RUN cd /opt && git clone --recurse-submodules https://github.com/DaehwanKimLab/hisat-genotype \
&& cd hisat-genotype/hisat2 && make -j2

RUN cd /opt && mkdir hlascan && cd hlascan && wget https://github.com/SyntekabioTools/HLAscan/releases/download/v2.1.4/hla_scan_r_v2.1.4 && mv hla_scan_r_v2.1.4 hla_scan && chmod +x hla_scan

RUN gem install json
RUN gem install prawn
RUN gem install prawn-table
76 changes: 52 additions & 24 deletions bin/report.rb
Original file line number Diff line number Diff line change
Expand Up @@ -45,19 +45,20 @@

sample = options.sample

alleles = { "A" => { "xHLA" => [], "Hisat" => [], "Optitype" => [] },
"B" => { "xHLA" => [], "Hisat" => [], "Optitype" => [] },
"C" => { "xHLA" => [], "Hisat" => [], "Optitype" => [] },
"DPB1" => { "xHLA" => [], "Hisat" => [], "Optitype" => [] },
"DQB1" => { "xHLA" => [], "Hisat" => [], "Optitype" => [] },
"DRB1" => { "xHLA" => [], "Hisat" => [], "Optitype" => [] },
"DQA1" => { "xHLA" => [], "Hisat" => [], "Optitype" => [] }
alleles = { "A" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => [] },
"B" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => [] },
"C" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => [] },
"DPB1" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => [] },
"DQB1" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => [] },
"DRB1" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => [] },
"DQA1" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => [] }
}

files = Dir["*"]
xhla = files.find{|f| f.upcase.include?("XHLA") }
hisat = files.find{|f| f.upcase.include?("HISAT") }
optitype = files.find{|f| f.upcase.include?("OPTI") }
hlascan = files.select {|f| f.upcase.include?("HLASCAN") }

########################
### xHLA data processing
Expand All @@ -71,10 +72,45 @@

alleles.keys.each do |k|

alleles[k]["xHLA"] << this_alleles.select {|al| al.match(/^#{k}.*/) }
this_alleles.select {|al| al.match(/^#{k}.*/) }.each {|a| alleles[k]["xHLA"] << a }
end
end

###########################
### HLAscan data processing
###########################

unless hlascan.empty?

hlascan.each do |h|

lines = IO.readlines(h)
gene_line = lines.find {|l| l.include?("HLA gene") }

next unless gene_line

gene = gene_line.split(" ")[-1].split("-")[-1]

allele_1 = "???"
allele_2 = "???"

first = lines.find {|l| l.include?("Type 1") }
second = lines.find {|l| l.include?("Type 2") }

if first
allele_1 = "#{gene}*#{first.split(/\s+/)[2]}"
end
if second
allele_2 = "#{gene}*#{second.split(/\s+/)[2]}"
end

alleles[gene]["HLAscan"] << allele_1
alleles[gene]["HLAscan"] << allele_2

end

end

############################
### Optitype data processing
############################
Expand Down Expand Up @@ -116,23 +152,14 @@


header.each_with_index do |h,i|
if h.include?("EM: A")

if h.include?("EM: ")
gene = h.split(" ")[-1]
tmp = info[i]
tmp.split(",").each do |t|
alleles["A"]["Hisat"] << t.split(" ")[0]
end
elsif h.include?("EM: B")
tmp = info[i]
tmp.split(",").each do |t|
alleles["B"]["Hisat"] << t.split(" ")[0]
end
elsif h.include?("EM: C")
tmp = info[i]
tmp.split(",").each do |t|
alleles["C"]["Hisat"] << t.split(" ")[0]
alleles[gene]["Hisat"] << t.split(" ")[0].strip
end
end

end

end
Expand Down Expand Up @@ -165,13 +192,14 @@

# Table content
results = []
results << [ "Allele", "xHLA (Nicht-kommerziell)", "Hisat", "Optitype" ]
results << [ "HLA Gene", "xHLA", "Hisat", "Optitype", "HLAscan" ]
alleles.keys.each do |k|
results << [ k, alleles[k]["xHLA"].sort.join(", "), alleles[k]["Hisat"].sort.join(", "), alleles[k]["Optitype"].sort.join(", ") ]
results << [ k, alleles[k]["xHLA"].sort.join("\n"), alleles[k]["Hisat"].sort.join("\n"), alleles[k]["Optitype"].sort.join("\n"), alleles[k]["HLAscan"].sort.join("\n") ]
end

t = pdf.make_table(
results
results,
:header => true
)

t.draw
Expand Down
5 changes: 5 additions & 0 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ process {
memory = { check_max( 32.GB * task.attempt, 'memory' ) }
}

withName: 'HLASCAN' {
cpus = 4
memory = { check_max( 32.GB * task.attempt, 'memory' ) }
}

withName: 'BWA' {
time = { check_max( 12.h * task.attempt, 'time' ) }
memory = { check_max( 64.GB * task.attempt, 'memory' ) }
Expand Down
1 change: 1 addition & 0 deletions conf/diagnostic.config
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ params {
max_time = 240.h
maxMultiqcEmailFileSize = 25.MB
hisat_genome_index = "/work_ifs/ikmb_repository/references/hisat-genotype/2022-07/"
hlascan_db = "/work_ifs/ikmb_repository/databases/hlascan/2022-12/HLA-ALL.IMGT"
}

// Resource manager settings
Expand Down
4 changes: 3 additions & 1 deletion conf/medcluster.config
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@

// Job paramaters to set for this environment

gatk_bundle_path = "/work_ifs/ikmb_repository/references/gatk/v2"

gatk_bundle_path = "/work_beegfs/ikmb_repository/references/gatk/v2"

params {
max_cpus = 24
max_ram = 250.GB
max_time = 24.h
hisat_genome_index = "/work_beegfs/ikmb_repository/references/hisat-genotype/2022-07/"
hlascan_db = "/work_beegfs/ikmb_repository/databases/hlascan/2022-12/HLA-ALL.IMGT"
}

// Resource manager settings
Expand Down
4 changes: 2 additions & 2 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ log.info "------------------------------------------"
log.info "IKMB HLA Pipeline"
log.info "------------------------------------------"
log.info "${workflow.manifest.description} v${params.version}"
log.info "Nextflow Version: $workflow.nextflow.version"
log.info "Tools requested: ${params.tools}"
log.info "Nextflow Version: $workflow.nextflow.version"
log.info "Tools requested: ${params.tools}"
log.info "------------------------------------------"

workflow {
Expand Down
2 changes: 1 addition & 1 deletion modules/hisat/genotype.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@ process HISAT_GENOTYPE {
script:

"""
hisatgenotype --index_dir ${params.hisat_genome_index} --base hla -p ${task.cpus} --locus-list A,B,C,DPB1,DQB1,DRB1,DQA1 -1 $left -2 $right
hisatgenotype --index_dir ${params.hisat_genome_index} --base hla -p ${task.cpus} --locus-list ${params.hla_genes_hisat} -1 $left -2 $right
"""
}
20 changes: 20 additions & 0 deletions modules/hlascan.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
process HLASCAN {

publishDir "${params.outdir}/${meta.patient_id}/${meta.sample_id}", mode: 'copy'

tag "${meta.sample_id}|${gene}"

input:
tuple val(meta),path(bam),path(bai),val(gene)

output:
tuple val(meta),path(txt), emit: report

script:
txt = meta.sample_id + "_" + gene + "_hlascan.txt"

"""
hla_scan -b $bam -d ${params.hlascan_db} -v 38 -t ${task.cpus} -g HLA-${gene} > $txt 2> /dev/null || true
touch yeah.txt
"""
}
9 changes: 7 additions & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,14 @@ params {
logo = "${baseDir}/assets/ikmblogo.png"

hisat_genome_index = null
hlascan_db = null

optitype_index = "${baseDir}/assets/optitype/hla_reference_dna.fasta"
optitype_config = "${baseDir}/assets/optitype/config.ini"

hla_genes = [ "A", "B", "C", "DPB1","DQB1","DRB1","DQA1","DPA1"]
hla_genes_hisat = "A,B,C,DRB1,DQA1,DQB1,DPA1,DPB1"

max_memory = 128.GB
max_cpus = 20
max_time = 240.h
Expand All @@ -27,7 +32,7 @@ params {

manifest {
name = "ikmb/hla"
version = "1.0"
version = "1.1"
description = "Pipeline to determine HLA alleles from short-read data"
author = "Marc Hoeppner"
homePage = "https://github.com/ikmb/hla"
Expand All @@ -53,7 +58,7 @@ dag {
file = "${params.outdir}/pipeline_info/pipeline_dag.svg"
}

process.container = 'ikmb/hla:devel'
process.container = 'ikmb/hla:1.1'

profiles {
standard {
Expand Down
10 changes: 10 additions & 0 deletions workflows/hla.nf
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ include { SOFTWARE_VERSIONS } from '../modules/software_versions'
include { MULTIQC } from '../modules/multiqc'
include { OPTITYPE } from '../subworkflows/optitype'
include { REPORT } from '../modules/reporting'
include { HLASCAN } from '../modules/hlascan'

// Helper function for the sample sheet parsing to produce sane channel elements
def returnFile(it) {
Expand Down Expand Up @@ -60,6 +61,8 @@ if (params.samples) {

tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase().replaceAll('-', '').replaceAll('_', '')} : []

ch_genes = Channel.fromList(params.hla_genes)

ch_versions = Channel.from([])
ch_qc = Channel.from([])
ch_reports = Channel.from([])
Expand Down Expand Up @@ -90,6 +93,13 @@ workflow HLA {
ch_reports = ch_reports.mix(XHLA_TYPING.out.report)
}

if ( 'hlascan' in tools) {
HLASCAN(
TRIM_AND_ALIGN.out.bam.combine(ch_genes)
)
ch_reports = ch_reports.mix(HLASCAN.out.report)
}

if ('optitype' in tools) {
OPTITYPE(
TRIM_AND_ALIGN.out.reads
Expand Down