Skip to content

Commit fac2944

Browse files
committed
Staging release 0.2
1 parent 1ac9388 commit fac2944

19 files changed

+458
-28
lines changed

assets/genomes/tomato/rules.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
"payload": [
1818
{
1919
"format": "VCF",
20-
"target": "1:14834",
20+
"target": "1:14834-14836",
2121
"name": "GABA Mutation in SIGAD3",
2222
"matcher": "1\t14834\t.\tGTG\tGTTG",
2323
"positive_report": "Diese Probe enthält eine GABA Mutation in SIGAD3. Nachweis erbracht über: Varianten Analyse.",

bin/analyze_blast.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@
8888
perc = (carrier_cov.to_f / total_cov.to_f) * 100
8989
output["matches"] << { "rule" => rule_name , "toolchain" => "vsearch", "result" => rule["positive_report"], "perc_gmo" => perc.round(2), "ref_cov" => total_cov-carrier_cov, "alt_cov" => carrier_cov }
9090
else
91-
output["matches"] << { "rule" => rule_name , "toolchain" => "vsearch", "result" => rule["negative_report"] }
91+
output["matches"] << { "rule" => rule_name , "toolchain" => "vsearch", "result" => rule["negative_report"], "ref_cov" => total_cov, "alt_cov" => "NA" }
9292
end
9393

9494
end

bin/analyze_vcf.rb

+16-1
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ def parse_vcf(file)
7272
opts.separator ""
7373
opts.on("-v","--vcf", "=VCF","VCF to read") {|argument| options.vcf = argument }
7474
opts.on("-j","--json", "=JSON","JSON to read") {|argument| options.json = argument }
75+
opts.on("-c","--coverage", "=COVERAGE","BAM coverage") {|argument| options.coverage = argument }
7576
opts.on("-s","--sample", "=SAMPLE","Sample name") {|argument| options.sample = argument }
7677
opts.on("-h","--help","Display the usage information") {
7778
puts opts
@@ -82,6 +83,14 @@ def parse_vcf(file)
8283

8384
date = Time.now.strftime("%Y-%m-%d")
8485

86+
coverages = {}
87+
cov_lines = IO.readlines(options.coverage)
88+
cov_lines.each do |cl|
89+
# 1 14834 14836 GABA Mutation in SIGAD3 1332221 2 2 1.0000000
90+
seq,from,to,name,cov,la,lb,frac = cl.split("\t")
91+
coverages[name] = cov.to_i
92+
end
93+
8594
result = { "sample" => options.sample, "matches" => [] }
8695

8796
json = JSON.parse(IO.readlines(options.json).join)
@@ -95,7 +104,9 @@ def parse_vcf(file)
95104
rule_name = rule["name"]
96105
rule_string = rule["matcher"]
97106

98-
this_match = { "toolchain" => "bwa2" , "rule" => rule_name}
107+
coverages.has_key?(rule_name) ? this_cov = coverages[rule_name] : this_cov = "NA"
108+
109+
this_match = { "toolchain" => "bwa2" , "rule" => rule_name, "bam_cov" => this_cov }
99110

100111
has_matched = false
101112

@@ -120,6 +131,7 @@ def parse_vcf(file)
120131
rcov,acov = this_sample["AD"].split(",")
121132
cov_sum = acov.to_i + rcov.to_i
122133
perc = (acov.to_f / cov_sum.to_f)*100.0
134+
123135
this_match["perc_gmo"] = perc.round(2)
124136
this_match["ref_cov"] = rcov
125137
this_match["alt_cov"] = acov
@@ -131,6 +143,9 @@ def parse_vcf(file)
131143

132144
unless has_matched
133145
this_match["report"] = rule["negative_report"]
146+
this_match["ref_cov"] = "NA"
147+
this_match["alt_cov"] = "NA"
148+
this_match["perc_gmo"] = 0.0
134149
result["matches"] << this_match
135150
end
136151

bin/reports_to_table_by_tool.rb

+127
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
#!/bin/env ruby
2+
3+
require 'optparse'
4+
require 'ostruct'
5+
require 'json'
6+
require 'csv'
7+
8+
### Define modules and classes here
9+
10+
def parse_json(filename)
11+
12+
return JSON.parse(IO.readlines(filename).join)
13+
14+
end
15+
16+
### Get the script arguments and open relevant files
17+
options = OpenStruct.new()
18+
opts = OptionParser.new()
19+
opts.banner = "Reads reports and makes an Excel table with one sheet per analysis rule"
20+
opts.separator ""
21+
opts.on("-o","--outfile", "=OUTFILE","Output file") {|argument| options.outfile = argument }
22+
opts.on("-h","--help","Display the usage information") {
23+
puts opts
24+
exit
25+
}
26+
27+
opts.parse!
28+
29+
negative_result = "-"
30+
31+
files = Dir["*.json"]
32+
33+
bucket = {}
34+
35+
toolchain = nil
36+
37+
files.each do |file|
38+
39+
json = parse_json(file)
40+
matches = json["matches"]
41+
42+
matches.each do |match|
43+
44+
rule = match["rule"]
45+
match["Sample"] = json["sample"]
46+
47+
bucket.has_key?(rule) ? bucket[rule] << match : bucket[rule] = [ match ]
48+
49+
end
50+
51+
end
52+
53+
# a bucket is a rule with all the matching reports, i.e. one page
54+
bucket.each do |rule,reports|
55+
56+
csv_list = []
57+
58+
this_row = []
59+
row = 0
60+
col = 0
61+
62+
# The table header
63+
[ "Probe", "Abdeckung WT (%)", "Abdeckung GMO (%)"].each do |e|
64+
this_row << e
65+
col += 1
66+
end
67+
csv_list << this_row
68+
69+
# Each sample with all its reports (max 2)
70+
reports.sort_by{|r| r["sample"]}.each do |r|
71+
72+
this_row = []
73+
74+
row += 1
75+
col = 0
76+
77+
sample = r["Sample"]
78+
79+
this_row << sample
80+
81+
col += 1
82+
83+
ref_cov = r["ref_cov"]
84+
85+
if ref_cov == "NA"
86+
ref_cov = r["bam_cov"] if r.has_key?("bam_cov")
87+
end
88+
89+
perc_gmo = r["perc_gmo"]
90+
91+
this_row << ref_cov
92+
alt_cov = r["alt_cov"]
93+
this_row << alt_cov
94+
95+
toolchain = r["toolchain"]
96+
97+
csv_list << this_row
98+
99+
end
100+
101+
header = [ "# id: 'gmo_check_result_#{toolchain}'",
102+
"# section_name: '#{rule} (#{toolchain})'",
103+
"# description: 'GMO Nachweis für #{rule} (Anteil in %).'",
104+
"# format: 'tsv'",
105+
"# plot_type: 'table'",
106+
"# pconfig:",
107+
"# id: 'custom_bargraph_w_header'",
108+
"# ylab: 'Anteil GMO'" ]
109+
110+
file_name = rule.gsub(" ","_").downcase
111+
file = File.new(file_name +"_mqc.tsv","w+")
112+
113+
file.puts header.join("\n")
114+
115+
csv_list.each do |entry|
116+
117+
file.puts entry.join("\t")
118+
119+
end
120+
121+
file.close
122+
123+
end
124+
125+
126+
127+

bin/reports_to_xls_v2.rb

+164
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
#!/bin/env ruby
2+
3+
require 'optparse'
4+
require 'ostruct'
5+
require 'json'
6+
require 'rubyXL'
7+
require 'rubyXL/convenience_methods/cell'
8+
require 'rubyXL/convenience_methods/color'
9+
require 'rubyXL/convenience_methods/font'
10+
require 'rubyXL/convenience_methods/workbook'
11+
require 'rubyXL/convenience_methods/worksheet'
12+
13+
### Define modules and classes here
14+
15+
def parse_json(filename)
16+
17+
return JSON.parse(IO.readlines(filename).join)
18+
19+
end
20+
21+
### Get the script arguments and open relevant files
22+
options = OpenStruct.new()
23+
opts = OptionParser.new()
24+
opts.banner = "Reads reports and makes an Excel table with one sheet per analysis rule"
25+
opts.separator ""
26+
opts.on("-o","--outfile", "=OUTFILE","Output file") {|argument| options.outfile = argument }
27+
opts.on("-h","--help","Display the usage information") {
28+
puts opts
29+
exit
30+
}
31+
32+
opts.parse!
33+
34+
color = {
35+
"even" => "FFFFFF",
36+
"uneven" => "d4e6f1"
37+
}
38+
39+
negative_result = "-"
40+
41+
files = Dir["*.json"]
42+
43+
toolchains = []
44+
45+
bucket = {}
46+
47+
files.each do |file|
48+
49+
json = parse_json(file)
50+
sample = json["sample"]
51+
matches = json["matches"]
52+
53+
matches.each do |match|
54+
55+
toolchain = match["toolchain"]
56+
rule = match["rule"]
57+
58+
toolchains << toolchain unless toolchains.include?(toolchain)
59+
60+
bucket[rule] = {} unless bucket.has_key?(rule)
61+
62+
bucket[rule].has_key?(sample) ? bucket[rule][sample] << match : bucket[rule][sample] = [ match ]
63+
64+
end
65+
66+
end
67+
68+
toolchains.sort!
69+
70+
workbook = RubyXL::Workbook.new
71+
page = 0
72+
73+
# a bucket is a rule with all the matching reports, i.e. one page
74+
bucket.each do |rule,samples|
75+
76+
sheet = workbook.worksheets[page]
77+
sheet.sheet_name = rule
78+
79+
row = 0
80+
col = 0
81+
82+
[ "" ].push(toolchains.map { |tc| [ tc, "", ""] }).flatten.each do |tc|
83+
sheet.add_cell(row,col,tc)
84+
sheet.sheet_data[row][col].change_font_bold(true)
85+
sheet.change_column_width(col, 15)
86+
col += 1
87+
end
88+
row += 1
89+
col = 0
90+
91+
[ "Probe" ].push(toolchains.map{|tc| [ "% GMO","Reads WT","Reads GMO"]}).flatten.each do |tc|
92+
sheet.add_cell(row,col,tc)
93+
sheet.sheet_data[row][col].change_font_bold(true)
94+
sheet.change_column_width(col, 15)
95+
col += 1
96+
end
97+
98+
samples.each do |sample,reports|
99+
100+
row += 1
101+
col = 0
102+
103+
sheet.add_cell(row,col,sample)
104+
col += 1
105+
106+
failed = false
107+
108+
toolchains.each do |tool|
109+
110+
perc_gmo = ""
111+
ref_cov = ""
112+
alt_cov = ""
113+
114+
report = reports.find{|r| r["toolchain"] == tool }
115+
116+
if report
117+
perc_gmo = report["perc_gmo"]
118+
ref_cov = report["ref_cov"]
119+
alt_cov = report["alt_cov"]
120+
if ref_cov == "NA" && report.has_key?("bam_cov")
121+
ref_cov = report["bam_cov"]
122+
alt_cov = "-"
123+
end
124+
end
125+
126+
# Simplistic rule to catch failed samples.
127+
if ref_cov.to_i < 100
128+
failed = true
129+
end
130+
131+
[ perc_gmo, ref_cov, alt_cov].each do |e|
132+
sheet.add_cell(row,col,e)
133+
col += 1
134+
end
135+
136+
137+
end
138+
139+
if failed
140+
sheet.change_row_fill(row,"FF3300")
141+
else
142+
row.even? ? bg = color["even"] : bg = color["uneven"]
143+
sheet.change_row_fill(row, bg)
144+
end
145+
sheet.change_row_horizontal_alignment(row, 'right')
146+
147+
end
148+
149+
col = 0
150+
toolchains.each do |tc|
151+
sheet.change_column_border(col, :right, 'medium')
152+
col += 3
153+
end
154+
155+
sheet.change_row_border(0, :bottom, 'medium')
156+
157+
# increment page counter for the next rule, if any
158+
page += 1
159+
160+
end
161+
162+
workbook.write(options.outfile)
163+
164+

0 commit comments

Comments
 (0)