Skip to content

Commit 78036cc

Browse files
authored
Merge pull request #21 from ikmb/devel
Devel
2 parents d536048 + ccb4c9c commit 78036cc

File tree

5 files changed

+300
-28
lines changed

5 files changed

+300
-28
lines changed

bin/json2pdf.rb

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ def build_intervals(regions)
5252
opts.banner = "A script description here"
5353
opts.separator ""
5454
opts.on("-j","--json", "=JSON","JSON input") {|argument| options.json = argument }
55-
opts.on("-c","--coverage", "=COVERAGE","Coverage input") {|argument| options.cov = argument }
5655
opts.on("-l","--logo", "=LOGO","Logo to include") {|argument| options.logo = argument }
5756
opts.on("-o","--outfile", "=OUTFILE","Output file") {|argument| options.outfile = argument }
5857
opts.on("-h","--help","Display the usage information") {
@@ -65,26 +64,6 @@ def build_intervals(regions)
6564
coverage = {}
6665
genes = {}
6766

68-
if options.cov
69-
IO.readlines(options.cov).each do |line|
70-
chr,from,to,exon,score,strand,cov = line.strip.split("\t")
71-
from = from.to_i
72-
to = to.to_i
73-
len = to-from
74-
mean = (cov.to_f/len.to_f).round()
75-
76-
gene = exon.split(".")[0]
77-
78-
if genes.has_key?(gene)
79-
genes[gene]["exons"] << [ from, to, mean ]
80-
else
81-
genes[gene] = { "strand" => strand, "exons" => [ [from, to, mean ]] }
82-
end
83-
84-
coverage[exon] = mean
85-
end
86-
end
87-
8867
json = JSON.parse(IO.readlines(options.json).join("\n"))
8968

9069
calls = json["calls"]
@@ -94,6 +73,24 @@ def build_intervals(regions)
9473
data = []
9574
header = []
9675

76+
cov_data = json["coverage"]
77+
78+
cov_data.each do |gene,data|
79+
80+
data.each do |d|
81+
82+
if genes.has_key?(gene)
83+
genes[gene]["exons"] << [ d["start"], d["stop"], d["mean_cov"] ]
84+
else
85+
genes[gene] = { "strand" => d["strand"], "exons" => [ [ d["start"], d["stop"], d["mean_cov"] ] ] }
86+
end
87+
88+
coverage["exon"] = d["mean_cov"]
89+
90+
end
91+
92+
end
93+
9794
# Transform the JSON structure into something Prawn can make a table from (array of arrays)
9895
calls.each do |gene,results|
9996

bin/report.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ def trim_allele(call,precision)
193193

194194
header.each_with_index do |h,i|
195195
if h.match(/^A.*/)
196-
alleles["A"]["Optitype"] << trim_allle(e[i+1],precision)
196+
alleles["A"]["Optitype"] << trim_allele(e[i+1],precision)
197197
elsif h.match(/^B.*/)
198198
alleles["B"]["Optitype"] << trim_allele(e[i+1],precision)
199199
elsif h.match(/^C.*/)

bin/report_with_cov.rb

Lines changed: 275 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,275 @@
1+
#!/usr/bin/env ruby
2+
# == NAME
3+
# report.rb
4+
#
5+
# == USAGE
6+
# ./this_script.rb [ -h | --help ]
7+
#[ -i | --infile ] |[ -o | --outfile ] |
8+
# == DESCRIPTION
9+
# A script to parse raw results from the ikmb/hla pipeline
10+
#
11+
# == OPTIONS
12+
# -h,--help Show help
13+
# -o,--outfile=OUTFILE : output file
14+
15+
#
16+
# == EXPERT OPTIONS
17+
#
18+
# == AUTHOR
19+
# Marc Hoeppner, [email protected]
20+
21+
require 'optparse'
22+
require 'ostruct'
23+
require 'json'
24+
require 'date'
25+
26+
### Define modules and classes here
27+
28+
def trim_allele(call,precision)
29+
30+
answer = nil
31+
32+
call = call.split("*")[-1] if call.include?("*")
33+
34+
e = call.split(":")
35+
36+
if e.length > precision
37+
answer = e[0..precision-1].join(":")
38+
else
39+
answer = call
40+
end
41+
42+
return answer
43+
end
44+
45+
### Get the script arguments and open relevant files
46+
options = OpenStruct.new()
47+
opts = OptionParser.new()
48+
opts.banner = "A script description here"
49+
opts.separator ""
50+
opts.on("-s","--sample", "=SAMPLE","Sample name") {|argument| options.sample = argument }
51+
opts.on("-c","--coverage", "=COVERAGE","Coverage of exons") {|argument| options.coverage = argument }
52+
opts.on("-p","--precision", "=PRECISION","Precision of calls") {|argument| options.precision = argument }
53+
opts.on("-v","--version", "=VERSION","PipelineVersion") {|argument| options.version = argument }
54+
opts.on("-o","--outfile", "=OUTFILE","Output file") {|argument| options.outfile = argument }
55+
opts.on("-h","--help","Display the usage information") {
56+
puts opts
57+
exit
58+
}
59+
60+
opts.parse!
61+
62+
options.precision ? precision = options.precision.to_i : precision = 3
63+
64+
date = Date.today.strftime("%d.%m.%Y")
65+
66+
sample = options.sample
67+
68+
alleles = { "A" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => [], "HLA-HD" => [] },
69+
"B" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => [], "HLA-HD" => [] },
70+
"C" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => [], "HLA-HD" => [] },
71+
"DQB1" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => [], "HLA-HD" => [] },
72+
"DRB1" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => [], "HLA-HD" => [] },
73+
"DRB4" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => [], "HLA-HD" => [] },
74+
"DRB5" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => [], "HLA-HD" => [] },
75+
"DQA1" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => [], "HLA-HD" => [] },
76+
"DRB3" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => [], "HLA-HD" => [] },
77+
"DPA1" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => [], "HLA-HD" => [] },
78+
"DPB1" => { "xHLA" => [], "Hisat" => [], "Optitype" => [], "HLAscan" => [], "HLA-HD" => [] }
79+
}
80+
81+
###################
82+
# Coverage of exons
83+
###################
84+
85+
coverage = {}
86+
87+
if options.coverage
88+
bedcov = File.open(options.coverage)
89+
90+
while (line = bedcov.gets)
91+
92+
# chr6 29942554 29942626 HLA-A.ENST00000376809.1 100 + 9425
93+
94+
seq,from,to,exon,score,strand,cov = line.strip.split("\t")
95+
gene = exon.split(".")[0]
96+
97+
from = from.to_i
98+
to = to.to_i
99+
len = to-from
100+
mean_cov = (cov.to_f/len.to_f).round()
101+
102+
this_target = { "exon" => exon, "seq" => seq, "start" => from, "stop" => to, "strand" => strand, "mean_cov" => mean_cov }
103+
coverage.has_key?(gene) ? coverage[gene] << this_target : coverage[gene] = [ this_target ]
104+
105+
end
106+
107+
bedcov.close
108+
109+
end
110+
111+
# Options
112+
############
113+
114+
## Only show hisat results supported at this level
115+
hisat_cutoff = 0.3
116+
117+
# Find result files
118+
files = Dir["*"]
119+
xhla = files.find{|f| f.upcase.include?("XHLA") }
120+
hisat = files.find{|f| f.upcase.include?("HISAT") }
121+
optitype = files.find{|f| f.upcase.include?("OPTI") }
122+
hlascan = files.select {|f| f.upcase.include?("HLASCAN") }
123+
hlahd = files.find {|f| f.upcase.include?("HLAHD") }
124+
125+
# The header of the result table, only including those tools we actually have data for.
126+
rheader = [ "HLA Genes" ]
127+
128+
########################
129+
# HLA-HD data processing
130+
########################
131+
132+
if hlahd
133+
134+
IO.readlines(hlahd).each do |line|
135+
136+
line.strip!
137+
# A HLA-A*24:02:01 HLA-A*01:01:01
138+
139+
gene,a,b = line.split("\t")
140+
if alleles.has_key?(gene)
141+
these_alleles = []
142+
these_alleles << trim_allele(a,precision) unless a.include?("Not typed") or a == "-"
143+
these_alleles << trim_allele(b,precision) unless b.include?("Not typed") or b == "-"
144+
145+
alleles[gene]["HLA-HD"] = these_alleles
146+
end
147+
148+
end
149+
rheader << "HLA-HD"
150+
end
151+
152+
########################
153+
### xHLA data processing
154+
########################
155+
156+
if xhla
157+
158+
json = JSON.parse( IO.readlines(xhla).join )
159+
160+
this_alleles = json["hla"]["alleles"]
161+
162+
alleles.keys.each do |k|
163+
164+
this_alleles.select {|al| al.match(/^#{k}.*/) }.each {|a| alleles[k]["xHLA"] << trim_allele(a,precision) }
165+
end
166+
167+
rheader << "xHLA"
168+
end
169+
170+
###########################
171+
### HLAscan data processing
172+
###########################
173+
174+
unless hlascan.empty?
175+
176+
hlascan.each do |h|
177+
178+
lines = IO.readlines(h)
179+
gene_line = lines.find {|l| l.include?("HLA gene") }
180+
181+
next unless gene_line
182+
183+
gene = gene_line.split(" ")[-1].split("-")[-1]
184+
185+
next unless alleles.has_key?(gene)
186+
187+
allele_1 = ""
188+
allele_2 = ""
189+
190+
first = lines.find {|l| l.include?("Type 1") }
191+
second = lines.find {|l| l.include?("Type 2") }
192+
193+
these_alleles = []
194+
if first
195+
allele_1 = "#{gene}*#{first.split(/\s+/)[2]}"
196+
these_alleles << allele_1 unless allele_1.length == 0
197+
end
198+
if second
199+
allele_2 = "#{gene}*#{second.split(/\s+/)[2]}"
200+
these_alleles << allele_2 unless allele_1.length == 0
201+
end
202+
203+
these_alleles.each {|a| alleles[gene]["HLAscan"] << trim_allele(a,precision) }
204+
205+
end
206+
207+
rheader << "HLAscan"
208+
209+
end
210+
211+
############################
212+
### Optitype data processing
213+
############################
214+
215+
if optitype
216+
217+
lines = IO.readlines(optitype)[0..1]
218+
header = lines.shift.strip.split(/\t/)
219+
220+
# 0 A*26:01 A*30:01 B*13:02 B*38:01 C*06:02 C*12:03 4526.0 4281.585999999999
221+
222+
e = lines.shift.strip.split(/\t/)
223+
224+
header.each_with_index do |h,i|
225+
if h.match(/^A.*/)
226+
alleles["A"]["Optitype"] << trim_allele(e[i+1],precision)
227+
elsif h.match(/^B.*/)
228+
alleles["B"]["Optitype"] << trim_allele(e[i+1],precision)
229+
elsif h.match(/^C.*/)
230+
alleles["C"]["Optitype"] << trim_allele(e[i+1],precision)
231+
end
232+
end
233+
234+
rheader << "Optitype"
235+
236+
end
237+
238+
239+
#############################
240+
#### Hisat Genotype
241+
#############################
242+
243+
if hisat
244+
245+
# 1 ranked B*35:08:01 (abundance: 50.20%)
246+
247+
lines = IO.readlines(hisat)
248+
header = lines.shift.split("\t")
249+
250+
info = lines.shift.split("\t")
251+
252+
header.each_with_index do |h,i|
253+
254+
if h.include?("Allele splitting: ")
255+
gene = h.split(" ")[-1]
256+
tmp = info[i]
257+
tmp.split(",").each do |t|
258+
# C*08 - Trimmed (score: 0.9090),C*08:02 - Trimmed (score: 0.3636)
259+
# We show percentages for ambiguous calls, but only >= 20% fraction
260+
c = trim_allele(t.split(" ")[0].strip,precision)
261+
abundance = t.split(" ")[-1].gsub(")","")
262+
f = abundance.split("%")[0].to_f
263+
next if f < hisat_cutoff
264+
alleles[gene]["Hisat"] << "#{c} (#{abundance})"
265+
end
266+
end
267+
end
268+
269+
rheader << "Hisat"
270+
end
271+
272+
f = File.new("#{sample}.json","w+")
273+
data = { "sample" => sample, "coverage" => coverage, "calls" => alleles, "pipeline_version" => options.version, "date" => date }
274+
f.puts data.to_json
275+
f.close

modules/reporting.nf

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ process REPORT {
55
publishDir "${params.outdir}/Reports", mode: 'copy'
66

77
input:
8-
tuple val(meta),path(reports)
8+
tuple val(meta),path(reports),path(coverage)
99
val(precision)
1010

1111
output:
@@ -16,7 +16,7 @@ process REPORT {
1616
json = "${sample}.json"
1717

1818
"""
19-
report.rb -s ${sample} -v ${workflow.manifest.version} -p $precision
19+
report_with_cov.rb -c $coverage -s ${sample} -v ${workflow.manifest.version} -p $precision
2020
"""
2121

2222
}
@@ -28,7 +28,7 @@ process JSON2PDF {
2828
publishDir "${params.outdir}/Reports", mode: 'copy'
2929

3030
input:
31-
tuple val(meta),path(json),path(bed)
31+
tuple val(meta),path(json)
3232

3333
output:
3434
tuple val(meta),path(pdf), emit: pdf
@@ -38,7 +38,7 @@ process JSON2PDF {
3838
pdf = "${sample}.pdf"
3939

4040
"""
41-
json2pdf.rb -j ${json} -o $pdf -l ${baseDir}/images/ikmb_bfx_logo.png -c $bed
41+
json2pdf.rb -j ${json} -o $pdf -l ${baseDir}/images/ikmb_bfx_logo.png
4242
"""
4343
}
4444

workflows/hla.nf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,12 +152,12 @@ workflow HLA {
152152
)
153153

154154
REPORT(
155-
ch_reports.groupTuple(),
155+
ch_reports.groupTuple().join(bedcov),
156156
params.precision
157157
)
158158

159159
JSON2PDF(
160-
REPORT.out.json.join(bedcov)
160+
REPORT.out.json
161161
)
162162

163163
JSON2XLS(

0 commit comments

Comments
 (0)