Merge pull request #21 from ikmb/devel

marchoeppner · web-flow · commit 78036cc834b6 · 2023-07-17T12:43:15.000+02:00
Devel
diff --git a/bin/json2pdf.rb b/bin/json2pdf.rb
@@ -52,7 +52,6 @@ def build_intervals(regions)
 opts.banner = "A script description here"
 opts.separator ""
 opts.on("-j","--json", "=JSON","JSON input") {|argument| options.json = argument }
-opts.on("-c","--coverage", "=COVERAGE","Coverage input") {|argument| options.cov = argument }
 opts.on("-l","--logo", "=LOGO","Logo to include") {|argument| options.logo = argument }
 opts.on("-o","--outfile", "=OUTFILE","Output file") {|argument| options.outfile = argument }
 opts.on("-h","--help","Display the usage information") {
@@ -65,26 +64,6 @@ def build_intervals(regions)
 coverage    = {}
 genes       = {}
 
-if options.cov
-    IO.readlines(options.cov).each do |line|
-        chr,from,to,exon,score,strand,cov = line.strip.split("\t")
-        from = from.to_i
-        to = to.to_i
-        len = to-from
-        mean = (cov.to_f/len.to_f).round()
-        
-        gene = exon.split(".")[0]
-
-        if genes.has_key?(gene)
-            genes[gene]["exons"] << [ from, to, mean ]
-        else
-            genes[gene] = { "strand" => strand, "exons" => [ [from, to, mean ]] }
-        end
-
-        coverage[exon] = mean
-    end
-end
-
 json = JSON.parse(IO.readlines(options.json).join("\n"))
 
 calls       = json["calls"]
@@ -94,6 +73,24 @@ def build_intervals(regions)
 data        = []
 header      = []
 
+cov_data    = json["coverage"]
+
+cov_data.each do |gene,data|
+
+    data.each do |d|
+    
+        if genes.has_key?(gene)
+            genes[gene]["exons"] << [ d["start"], d["stop"], d["mean_cov"] ]
+        else
+            genes[gene] = { "strand" => d["strand"], "exons" => [ [ d["start"], d["stop"], d["mean_cov"] ] ] }
+        end
+
+        coverage["exon"] = d["mean_cov"]
+
+    end
+
+end
+
 # Transform the JSON structure into something Prawn can make a table from (array of arrays)
 calls.each do |gene,results|
 
diff --git a/bin/report.rb b/bin/report.rb
@@ -193,7 +193,7 @@ def trim_allele(call,precision)
 
     header.each_with_index do |h,i|
         if h.match(/^A.*/)
-            alleles["A"]["Optitype"] << trim_allle(e[i+1],precision)
+            alleles["A"]["Optitype"] << trim_allele(e[i+1],precision)
         elsif h.match(/^B.*/)
             alleles["B"]["Optitype"] << trim_allele(e[i+1],precision)
         elsif h.match(/^C.*/)
diff --git a/bin/report_with_cov.rb b/bin/report_with_cov.rb
@@ -0,0 +1,275 @@
+#!/usr/bin/env ruby
+# == NAME
+# report.rb
+#
+# == USAGE
+# ./this_script.rb [ -h | --help ]
+#[ -i | --infile ] |[ -o | --outfile ] | 
+# == DESCRIPTION
+# A script to parse raw results from the ikmb/hla pipeline
+#
+# == OPTIONS
+# -h,--help Show help
+# -o,--outfile=OUTFILE : output file
+
+#
+# == EXPERT OPTIONS
+#
+# == AUTHOR
+#  Marc Hoeppner, mphoeppner@gmail.com
+
+require 'optparse'
+require 'ostruct'
+require 'json'
+require 'date'
+
+### Define modules and classes here
+
+def trim_allele(call,precision)
+
+    answer = nil
+
+    call = call.split("*")[-1] if call.include?("*")
+
+    e = call.split(":")
+
+    if e.length > precision
+        answer = e[0..precision-1].join(":")
+    else 
+        answer = call
+    end
+
+    return answer
+end
+
+### Get the script arguments and open relevant files
+options = OpenStruct.new()
+opts = OptionParser.new()
+opts.banner = "A script description here"
+opts.separator ""
+opts.on("-s","--sample", "=SAMPLE","Sample name") {|argument| options.sample = argument }
+opts.on("-c","--coverage", "=COVERAGE","Coverage of exons") {|argument| options.coverage = argument }
+opts.on("-p","--precision", "=PRECISION","Precision of calls") {|argument| options.precision = argument }
+opts.on("-v","--version", "=VERSION","PipelineVersion") {|argument| options.version = argument }
+opts.on("-o","--outfile", "=OUTFILE","Output file") {|argument| options.outfile = argument }
+opts.on("-h","--help","Display the usage information") {
+    puts opts
+    exit
+}
+
+opts.parse! 
+
+options.precision ? precision = options.precision.to_i : precision = 3
+
+date = Date.today.strftime("%d.%m.%Y")
+
+sample = options.sample
+
+alleles =  { "A" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => [], "HLA-HD" => []  }, 
+    "B" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => [], "HLA-HD" => []  },
+    "C" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => [], "HLA-HD" => []  },
+    "DQB1" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => [], "HLA-HD" => []  },
+    "DRB1" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => [], "HLA-HD" => []  },
+    "DRB4" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => [], "HLA-HD" => []  },
+    "DRB5" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => [], "HLA-HD" => []  },
+    "DQA1" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => [], "HLA-HD" => []  },
+    "DRB3" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => [], "HLA-HD" => []  },
+    "DPA1" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => [], "HLA-HD" => []  },
+    "DPB1" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => [], "HLA-HD" => []  }
+}
+
+###################
+# Coverage of exons
+###################
+
+coverage = {}
+
+if options.coverage
+    bedcov = File.open(options.coverage)
+
+    while (line = bedcov.gets)
+
+        # chr6    29942554        29942626        HLA-A.ENST00000376809.1 100     +       9425
+
+        seq,from,to,exon,score,strand,cov = line.strip.split("\t")
+        gene = exon.split(".")[0]
+
+        from = from.to_i
+        to = to.to_i
+        len = to-from
+        mean_cov = (cov.to_f/len.to_f).round()
+
+        this_target = { "exon" => exon, "seq" => seq, "start" => from, "stop" => to, "strand" => strand, "mean_cov" => mean_cov }
+        coverage.has_key?(gene) ? coverage[gene] << this_target : coverage[gene] = [ this_target ]
+
+    end
+
+    bedcov.close
+
+end
+
+# Options
+############
+
+## Only show hisat results supported at this level
+hisat_cutoff	= 0.3
+
+# Find result files
+files		= Dir["*"]
+xhla		= files.find{|f| f.upcase.include?("XHLA") }
+hisat		= files.find{|f| f.upcase.include?("HISAT") }
+optitype	= files.find{|f| f.upcase.include?("OPTI") }
+hlascan		= files.select {|f| f.upcase.include?("HLASCAN") }
+hlahd		= files.find {|f| f.upcase.include?("HLAHD") }
+
+# The header of the result table, only including those tools we actually have data for. 
+rheader		= [ "HLA Genes" ]
+
+########################
+# HLA-HD data processing
+########################
+
+if hlahd 
+
+    IO.readlines(hlahd).each do |line|
+        
+        line.strip!
+        # A       HLA-A*24:02:01  HLA-A*01:01:01
+
+        gene,a,b = line.split("\t")
+        if alleles.has_key?(gene)
+            these_alleles = []
+            these_alleles << trim_allele(a,precision) unless a.include?("Not typed") or a == "-"
+            these_alleles << trim_allele(b,precision) unless b.include?("Not typed") or b == "-"
+
+            alleles[gene]["HLA-HD"] = these_alleles
+        end
+
+    end
+    rheader << "HLA-HD"
+end
+
+########################
+### xHLA data processing
+########################
+
+if xhla
+
+    json = JSON.parse( IO.readlines(xhla).join )
+
+    this_alleles = json["hla"]["alleles"]
+
+    alleles.keys.each do |k|
+
+        this_alleles.select {|al| al.match(/^#{k}.*/) }.each {|a| alleles[k]["xHLA"] << trim_allele(a,precision) }
+    end
+
+    rheader << "xHLA"
+end
+
+###########################
+### HLAscan data processing
+###########################
+
+unless hlascan.empty?
+
+    hlascan.each do |h|
+
+        lines = IO.readlines(h)
+        gene_line = lines.find {|l| l.include?("HLA gene") }
+
+        next unless gene_line
+
+        gene = gene_line.split(" ")[-1].split("-")[-1]
+
+        next unless alleles.has_key?(gene)
+
+        allele_1 = ""
+        allele_2 = ""
+
+        first = lines.find {|l| l.include?("Type 1") }
+        second = lines.find {|l| l.include?("Type 2") }
+
+        these_alleles = []
+        if first
+            allele_1 = "#{gene}*#{first.split(/\s+/)[2]}"
+            these_alleles << allele_1 unless allele_1.length == 0
+        end
+        if second
+            allele_2 = "#{gene}*#{second.split(/\s+/)[2]}"
+            these_alleles << allele_2 unless allele_1.length == 0
+        end
+        
+        these_alleles.each {|a| alleles[gene]["HLAscan"] << trim_allele(a,precision)  }
+
+    end
+
+    rheader << "HLAscan"
+
+end
+
+############################
+### Optitype data processing
+############################
+
+if optitype
+
+    lines = IO.readlines(optitype)[0..1]
+    header = lines.shift.strip.split(/\t/)
+
+    # 0       A*26:01 A*30:01 B*13:02 B*38:01 C*06:02 C*12:03 4526.0  4281.585999999999
+
+    e = lines.shift.strip.split(/\t/)
+
+    header.each_with_index do |h,i|
+        if h.match(/^A.*/)
+            alleles["A"]["Optitype"] << trim_allele(e[i+1],precision)
+        elsif h.match(/^B.*/)
+            alleles["B"]["Optitype"] << trim_allele(e[i+1],precision)
+        elsif h.match(/^C.*/)
+            alleles["C"]["Optitype"] << trim_allele(e[i+1],precision)
+        end
+    end
+
+    rheader << "Optitype"
+
+end
+    
+
+#############################
+#### Hisat Genotype
+#############################
+
+if hisat
+
+    # 1 ranked B*35:08:01 (abundance: 50.20%) 
+
+    lines = IO.readlines(hisat)
+    header = lines.shift.split("\t")
+
+    info = lines.shift.split("\t")
+
+    header.each_with_index do |h,i|
+
+        if h.include?("Allele splitting: ")
+            gene = h.split(" ")[-1]
+            tmp = info[i]
+            tmp.split(",").each do |t|
+                # C*08 - Trimmed (score: 0.9090),C*08:02 - Trimmed (score: 0.3636)
+                # We show percentages for ambiguous calls, but only >= 20% fraction
+                c = trim_allele(t.split(" ")[0].strip,precision)
+                abundance = t.split(" ")[-1].gsub(")","")
+                f = abundance.split("%")[0].to_f
+                next if f < hisat_cutoff
+                    alleles[gene]["Hisat"] << "#{c} (#{abundance})"
+                end
+        end
+    end
+
+    rheader << "Hisat"
+end
+
+f = File.new("#{sample}.json","w+")
+data = { "sample" => sample, "coverage" => coverage, "calls" => alleles, "pipeline_version" => options.version, "date" => date }
+f.puts data.to_json
+f.close
diff --git a/modules/reporting.nf b/modules/reporting.nf
@@ -5,7 +5,7 @@ process REPORT {
     publishDir "${params.outdir}/Reports", mode: 'copy'
 
     input:
-    tuple val(meta),path(reports)
+    tuple val(meta),path(reports),path(coverage)
     val(precision)
 
     output:
@@ -16,7 +16,7 @@ process REPORT {
     json = "${sample}.json"
     
     """
-        report.rb -s ${sample} -v ${workflow.manifest.version} -p $precision
+        report_with_cov.rb -c $coverage -s ${sample} -v ${workflow.manifest.version} -p $precision
     """
 
 }
@@ -28,7 +28,7 @@ process JSON2PDF {
     publishDir "${params.outdir}/Reports", mode: 'copy'
 
     input:
-    tuple val(meta),path(json),path(bed)
+    tuple val(meta),path(json)
 
     output:
     tuple val(meta),path(pdf), emit: pdf
@@ -38,7 +38,7 @@ process JSON2PDF {
     pdf = "${sample}.pdf"
     
     """
-        json2pdf.rb -j ${json} -o $pdf -l ${baseDir}/images/ikmb_bfx_logo.png -c $bed
+        json2pdf.rb -j ${json} -o $pdf -l ${baseDir}/images/ikmb_bfx_logo.png
     """
 }
 
diff --git a/workflows/hla.nf b/workflows/hla.nf
@@ -152,12 +152,12 @@ workflow HLA {
     )
 
     REPORT(
-        ch_reports.groupTuple(),
+        ch_reports.groupTuple().join(bedcov),
         params.precision
     )
 
     JSON2PDF(
-        REPORT.out.json.join(bedcov)
+        REPORT.out.json
     )
 
     JSON2XLS(

Original file line number	Diff line number	Diff line change
`@@ -152,12 +152,12 @@ workflow HLA {`
`152`	`152`	`)`
`153`	`153`
`154`	`154`	`REPORT(`
`155`		`- ch_reports.groupTuple(),`
	`155`	`+ ch_reports.groupTuple().join(bedcov),`
`156`	`156`	`params.precision`
`157`	`157`	`)`
`158`	`158`
`159`	`159`	`JSON2PDF(`
`160`		`- REPORT.out.json.join(bedcov)`
	`160`	`+ REPORT.out.json`
`161`	`161`	`)`
`162`	`162`
`163`	`163`	`JSON2XLS(`