ikmb · marchoeppner · Dec 16, 2022 · Dec 14, 2022 · Dec 14, 2022 · Dec 14, 2022
diff --git a/Dockerfile b/Dockerfile
@@ -4,14 +4,16 @@ LABEL authors="Marc Hoeppner" \
 
 COPY environment.yml /
 RUN conda env create -f /environment.yml && conda clean -a
-ENV PATH /opt/conda/envs/hla-1.0/bin:/opt/hisat-genotype:/opt/hisat-genotype/hisat2:$PATH
+ENV PATH /opt/conda/envs/hla-1.0/bin:/opt/hisat-genotype:/opt/hisat-genotype/hisat2:/opt/hlascan/:$PATH
 ENV PYTHONPATH /opt/hisat-genotype/hisatgenotype_modules:$PYTHONPATH
 
 RUN apt-get -y update && apt-get -y install make wget git g++ ruby-full ruby-dev
 
 RUN cd /opt && git clone --recurse-submodules https://github.com/DaehwanKimLab/hisat-genotype \
 	&& cd hisat-genotype/hisat2 && make -j2
 
+RUN cd /opt && mkdir hlascan && cd hlascan && wget https://github.com/SyntekabioTools/HLAscan/releases/download/v2.1.4/hla_scan_r_v2.1.4 && mv hla_scan_r_v2.1.4 hla_scan && chmod +x hla_scan
+
 RUN gem install json
 RUN gem install prawn
 RUN gem install prawn-table
diff --git a/bin/report.rb b/bin/report.rb
@@ -45,19 +45,20 @@
 
 sample = options.sample
 
-alleles =  { "A" => { "xHLA" => [], "Hisat" => [], "Optitype" => []  }, 
-	"B" => { "xHLA" => [], "Hisat" => [], "Optitype" => []  },
-	"C" => { "xHLA" => [], "Hisat" => [], "Optitype" => []  },
-	"DPB1" => { "xHLA" => [], "Hisat" => [], "Optitype" => []  },
-	"DQB1" => { "xHLA" => [], "Hisat" => [], "Optitype" => []  },
-	"DRB1" => { "xHLA" => [], "Hisat" => [], "Optitype" => []  },
-	"DQA1" => { "xHLA" => [], "Hisat" => [], "Optitype" => []  }
+alleles =  { "A" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => []  }, 
+	"B" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => []  },
+	"C" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => []  },
+	"DPB1" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => []  },
+	"DQB1" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => []  },
+	"DRB1" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => []  },
+	"DQA1" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => []  }
 }
 
 files = Dir["*"]
 xhla = files.find{|f| f.upcase.include?("XHLA") }
 hisat = files.find{|f| f.upcase.include?("HISAT") }
 optitype = files.find{|f| f.upcase.include?("OPTI") }
+hlascan = files.select {|f| f.upcase.include?("HLASCAN") }
 
 ########################
 ### xHLA data processing
@@ -71,10 +72,45 @@
 
 	alleles.keys.each do |k|
 
-		alleles[k]["xHLA"] << this_alleles.select {|al| al.match(/^#{k}.*/) }
+		this_alleles.select {|al| al.match(/^#{k}.*/) }.each {|a| alleles[k]["xHLA"] << a }
 	end
 end
 
+###########################
+### HLAscan data processing
+###########################
+
+unless hlascan.empty?
+
+	hlascan.each do |h|
+
+		lines = IO.readlines(h)
+		gene_line = lines.find {|l| l.include?("HLA gene") }
+
+		next unless gene_line
+
+		gene = gene_line.split(" ")[-1].split("-")[-1]
+
+		allele_1 = "???"
+		allele_2 = "???"
+
+		first = lines.find {|l| l.include?("Type 1") }
+		second = lines.find {|l| l.include?("Type 2") }
+
+		if first
+			allele_1 = "#{gene}*#{first.split(/\s+/)[2]}"
+		end
+		if second
+			allele_2 = "#{gene}*#{second.split(/\s+/)[2]}"
+		end
+
+		alleles[gene]["HLAscan"] << allele_1
+		alleles[gene]["HLAscan"] << allele_2
+
+	end
+
+end
+
 ############################
 ### Optitype data processing
 ############################
@@ -116,23 +152,14 @@
 
 
 	header.each_with_index do |h,i|
-		if h.include?("EM: A")
+
+		if h.include?("EM: ")
+			gene = h.split(" ")[-1]
 			tmp = info[i]
 			tmp.split(",").each do |t|
-				alleles["A"]["Hisat"] << t.split(" ")[0]
-			end
-		elsif h.include?("EM: B")
-			tmp = info[i]
-                        tmp.split(",").each do |t|
-                                alleles["B"]["Hisat"] << t.split(" ")[0]
-                        end
-		elsif h.include?("EM: C")
-			tmp = info[i]
-                        tmp.split(",").each do |t|
-                                alleles["C"]["Hisat"] << t.split(" ")[0]
+                                alleles[gene]["Hisat"] << t.split(" ")[0].strip
                         end
 		end
-
 	end
 
 end
@@ -165,13 +192,14 @@
 
 # Table content
 results = []
-results << [ "Allele", "xHLA (Nicht-kommerziell)", "Hisat", "Optitype" ]
+results << [ "HLA Gene", "xHLA", "Hisat", "Optitype", "HLAscan" ]
 alleles.keys.each do |k|
-	results << [ k, alleles[k]["xHLA"].sort.join(", "), alleles[k]["Hisat"].sort.join(", "), alleles[k]["Optitype"].sort.join(", ") ]
+	results << [ k, alleles[k]["xHLA"].sort.join("\n"), alleles[k]["Hisat"].sort.join("\n"), alleles[k]["Optitype"].sort.join("\n"), alleles[k]["HLAscan"].sort.join("\n") ]
 end
 
 t = pdf.make_table( 
-	results
+	results,
+	:header => true
  )
 
 t.draw

diff --git a/conf/base.config b/conf/base.config
@@ -36,6 +36,11 @@ process {
 	memory = { check_max( 32.GB * task.attempt, 'memory' ) }
   }
 
+  withName: 'HLASCAN' {
+	cpus = 4
+        memory = { check_max( 32.GB * task.attempt, 'memory' ) }
+  }
+
   withName: 'BWA' {
 	time = { check_max( 12.h * task.attempt, 'time' ) }
         memory = { check_max( 64.GB * task.attempt, 'memory' ) }

diff --git a/conf/diagnostic.config b/conf/diagnostic.config
@@ -9,6 +9,7 @@ params {
 	max_time = 240.h
 	maxMultiqcEmailFileSize = 25.MB
 	hisat_genome_index = "/work_ifs/ikmb_repository/references/hisat-genotype/2022-07/"
+	hlascan_db = "/work_ifs/ikmb_repository/databases/hlascan/2022-12/HLA-ALL.IMGT"
 }
 
 // Resource manager settings

diff --git a/conf/medcluster.config b/conf/medcluster.config
@@ -1,13 +1,15 @@
 
 // Job paramaters to set for this environment
 
-gatk_bundle_path = "/work_ifs/ikmb_repository/references/gatk/v2"
+
+gatk_bundle_path = "/work_beegfs/ikmb_repository/references/gatk/v2"
 
 params {
 	max_cpus = 24
 	max_ram = 250.GB
 	max_time = 24.h
 	hisat_genome_index = "/work_beegfs/ikmb_repository/references/hisat-genotype/2022-07/"
+	hlascan_db = "/work_beegfs/ikmb_repository/databases/hlascan/2022-12/HLA-ALL.IMGT"
 }
 
 // Resource manager settings

diff --git a/main.nf b/main.nf
@@ -36,8 +36,8 @@ log.info "------------------------------------------"
 log.info "IKMB HLA Pipeline"
 log.info "------------------------------------------"
 log.info "${workflow.manifest.description}              v${params.version}"
-log.info "Nextflow Version:			                     		$workflow.nextflow.version"
-log.info "Tools requested:						${params.tools}"
+log.info "Nextflow Version:		                     		$workflow.nextflow.version"
+log.info "Tools requested:							${params.tools}"
 log.info "------------------------------------------"
 
 workflow {

diff --git a/modules/hisat/genotype.nf b/modules/hisat/genotype.nf
@@ -13,6 +13,6 @@ process HISAT_GENOTYPE {
 	script:
 
 	"""
-		hisatgenotype --index_dir ${params.hisat_genome_index} --base hla -p ${task.cpus}  --locus-list A,B,C,DPB1,DQB1,DRB1,DQA1 -1 $left -2 $right 
+		hisatgenotype --index_dir ${params.hisat_genome_index} --base hla -p ${task.cpus}  --locus-list ${params.hla_genes_hisat} -1 $left -2 $right 
 	"""
 }
diff --git a/modules/hlascan.nf b/modules/hlascan.nf
@@ -0,0 +1,20 @@
+process HLASCAN {
+
+	publishDir "${params.outdir}/${meta.patient_id}/${meta.sample_id}", mode: 'copy'
+
+	tag "${meta.sample_id}|${gene}"
+
+	input:
+	tuple val(meta),path(bam),path(bai),val(gene)
+
+	output:
+	tuple val(meta),path(txt), emit: report
+
+	script:
+	txt = meta.sample_id + "_" + gene + "_hlascan.txt"
+
+	"""
+		hla_scan -b $bam -d ${params.hlascan_db} -v 38 -t ${task.cpus} -g HLA-${gene} > $txt 2> /dev/null || true
+		touch yeah.txt
+	"""
+}
diff --git a/nextflow.config b/nextflow.config
@@ -15,9 +15,14 @@ params {
 	logo = "${baseDir}/assets/ikmblogo.png"
 
 	hisat_genome_index = null
+	hlascan_db = null
+
 	optitype_index = "${baseDir}/assets/optitype/hla_reference_dna.fasta"
 	optitype_config = "${baseDir}/assets/optitype/config.ini"
 
+	hla_genes = [ "A", "B", "C", "DPB1","DQB1","DRB1","DQA1","DPA1"]
+	hla_genes_hisat = "A,B,C,DRB1,DQA1,DQB1,DPA1,DPB1"
+
         max_memory = 128.GB
         max_cpus = 20
         max_time = 240.h
@@ -27,7 +32,7 @@ params {
 
 manifest {
         name = "ikmb/hla"
-	version = "1.0"
+	version = "1.1"
 	description = "Pipeline to determine HLA alleles from short-read data"
 	author = "Marc Hoeppner"
 	homePage = "https://github.com/ikmb/hla"
@@ -53,7 +58,7 @@ dag {
         file = "${params.outdir}/pipeline_info/pipeline_dag.svg"
 }
 
-process.container = 'ikmb/hla:devel'
+process.container = 'ikmb/hla:1.1'
 
 profiles {
 	standard {

diff --git a/workflows/hla.nf b/workflows/hla.nf
@@ -6,6 +6,7 @@ include { SOFTWARE_VERSIONS } from '../modules/software_versions'
 include { MULTIQC } from '../modules/multiqc'
 include { OPTITYPE } from '../subworkflows/optitype'
 include { REPORT } from '../modules/reporting'
+include { HLASCAN } from '../modules/hlascan'
 
 // Helper function for the sample sheet parsing to produce sane channel elements
 def returnFile(it) {
@@ -60,6 +61,8 @@ if (params.samples) {
 
 tools = params.tools ? params.tools.split(',').collect{it.trim().toLowerCase().replaceAll('-', '').replaceAll('_', '')} : []
 
+ch_genes = Channel.fromList(params.hla_genes)
+
 ch_versions = Channel.from([])
 ch_qc = Channel.from([])
 ch_reports = Channel.from([])
@@ -90,6 +93,13 @@ workflow HLA {
 		ch_reports = ch_reports.mix(XHLA_TYPING.out.report)
 	}
 
+	if ( 'hlascan' in tools) {
+		HLASCAN(
+			TRIM_AND_ALIGN.out.bam.combine(ch_genes)
+		)
+		ch_reports = ch_reports.mix(HLASCAN.out.report)
+	}
+
 	if ('optitype' in tools) {
 		OPTITYPE(
 			TRIM_AND_ALIGN.out.reads
-Original file line number
+Diff line change
@@ Expand Up / @@ -13,6 +13,6 @@ process HISAT_GENOTYPE { @@
     	script:
     	"""
-    		hisatgenotype --index_dir ${params.hisat_genome_index} --base hla -p ${task.cpus}  --locus-list A,B,C,DPB1,DQB1,DRB1,DQA1 -1 $left -2 $right
+    		hisatgenotype --index_dir ${params.hisat_genome_index} --base hla -p ${task.cpus}  --locus-list ${params.hla_genes_hisat} -1 $left -2 $right
     	"""
     }