marchoeppner
diff --git a/‎assets/genomes/tomato/rules.json
+1-1 b/‎assets/genomes/tomato/rules.json
+1-1
diff --git a/‎bin/analyze_blast.rb
+1-1 b/‎bin/analyze_blast.rb
+1-1
diff --git a/‎bin/analyze_vcf.rb
+16-1 b/‎bin/analyze_vcf.rb
+16-1
diff --git a/‎bin/reports_to_table_by_tool.rb
+127 b/‎bin/reports_to_table_by_tool.rb
+127
diff --git a/‎bin/reports_to_xls_v2.rb
+164 b/‎bin/reports_to_xls_v2.rb
+164
@@ -17,7 +17,7 @@
             "payload": [
                 {
                     "format": "VCF",
-                    "target": "1:14834",
+                    "target": "1:14834-14836",
                     "name": "GABA Mutation in SIGAD3",
                     "matcher": "1\t14834\t.\tGTG\tGTTG",
                     "positive_report": "Diese Probe enthält eine GABA Mutation in SIGAD3. Nachweis erbracht über: Varianten Analyse.",
 
@@ -88,7 +88,7 @@
         perc = (carrier_cov.to_f / total_cov.to_f) * 100
         output["matches"] << { "rule" => rule_name , "toolchain" => "vsearch", "result" => rule["positive_report"], "perc_gmo" => perc.round(2), "ref_cov" => total_cov-carrier_cov, "alt_cov" => carrier_cov }
     else
-        output["matches"] << { "rule" => rule_name , "toolchain" => "vsearch", "result" => rule["negative_report"] }
+        output["matches"] << { "rule" => rule_name , "toolchain" => "vsearch", "result" => rule["negative_report"], "ref_cov" => total_cov, "alt_cov" => "NA" }
     end
 
 end
 
@@ -72,6 +72,7 @@ def parse_vcf(file)
 opts.separator ""
 opts.on("-v","--vcf", "=VCF","VCF to read") {|argument| options.vcf = argument }
 opts.on("-j","--json", "=JSON","JSON to read") {|argument| options.json = argument }
+opts.on("-c","--coverage", "=COVERAGE","BAM coverage") {|argument| options.coverage = argument }
 opts.on("-s","--sample", "=SAMPLE","Sample name") {|argument| options.sample = argument }
 opts.on("-h","--help","Display the usage information") {
     puts opts
@@ -82,6 +83,14 @@ def parse_vcf(file)
 
 date = Time.now.strftime("%Y-%m-%d")
 
+coverages = {}
+cov_lines = IO.readlines(options.coverage)
+cov_lines.each do |cl|
+    # 1	14834	14836	GABA Mutation in SIGAD3	1332221	2	2	1.0000000
+    seq,from,to,name,cov,la,lb,frac = cl.split("\t")
+    coverages[name] = cov.to_i
+end
+
 result = { "sample" => options.sample, "matches" => [] }
 
 json = JSON.parse(IO.readlines(options.json).join)
@@ -95,7 +104,9 @@ def parse_vcf(file)
     rule_name = rule["name"]
     rule_string = rule["matcher"]
 
-    this_match = { "toolchain" => "bwa2" , "rule" => rule_name}
+    coverages.has_key?(rule_name) ? this_cov = coverages[rule_name] : this_cov = "NA"
+
+    this_match = { "toolchain" => "bwa2" , "rule" => rule_name, "bam_cov" => this_cov }
 
     has_matched = false
 
@@ -120,6 +131,7 @@ def parse_vcf(file)
             rcov,acov = this_sample["AD"].split(",")
             cov_sum = acov.to_i + rcov.to_i
             perc = (acov.to_f / cov_sum.to_f)*100.0
+            
             this_match["perc_gmo"] = perc.round(2)
             this_match["ref_cov"] = rcov
             this_match["alt_cov"] = acov
@@ -131,6 +143,9 @@ def parse_vcf(file)
 
     unless has_matched
         this_match["report"] = rule["negative_report"]
+        this_match["ref_cov"] = "NA"
+        this_match["alt_cov"] = "NA"
+        this_match["perc_gmo"] = 0.0
         result["matches"] << this_match
     end
 
 
@@ -0,0 +1,127 @@
+#!/bin/env ruby
+
+require 'optparse'
+require 'ostruct'
+require 'json'
+require 'csv'
+
+### Define modules and classes here
+
+def parse_json(filename)
+
+    return JSON.parse(IO.readlines(filename).join)
+
+end
+
+### Get the script arguments and open relevant files
+options = OpenStruct.new()
+opts = OptionParser.new()
+opts.banner = "Reads reports and makes an Excel table with one sheet per analysis rule"
+opts.separator ""
+opts.on("-o","--outfile", "=OUTFILE","Output file") {|argument| options.outfile = argument }
+opts.on("-h","--help","Display the usage information") {
+    puts opts
+    exit
+}
+
+opts.parse! 
+
+negative_result = "-"
+
+files = Dir["*.json"]
+
+bucket = {}
+
+toolchain = nil
+
+files.each do |file|
+
+    json = parse_json(file)
+    matches = json["matches"]
+
+    matches.each do |match|
+
+        rule = match["rule"]
+        match["Sample"] = json["sample"]
+
+        bucket.has_key?(rule) ? bucket[rule] << match : bucket[rule] = [ match ]
+
+    end
+
+end
+
+# a bucket is a rule with all the matching reports, i.e. one page
+bucket.each do |rule,reports|
+
+    csv_list = []
+
+    this_row = []
+    row = 0
+    col = 0
+
+    # The table header
+    [ "Probe", "Abdeckung WT (%)", "Abdeckung GMO (%)"].each do |e|
+        this_row << e
+        col += 1
+    end
+    csv_list << this_row
+
+    # Each sample with all its reports (max 2)
+    reports.sort_by{|r| r["sample"]}.each do |r|
+
+        this_row = []
+
+        row += 1
+        col = 0
+
+        sample = r["Sample"]
+
+        this_row << sample
+
+        col += 1
+
+        ref_cov = r["ref_cov"]
+        
+        if ref_cov == "NA"
+            ref_cov = r["bam_cov"] if r.has_key?("bam_cov")
+        end
+
+        perc_gmo = r["perc_gmo"]
+
+        this_row << ref_cov
+        alt_cov = r["alt_cov"]
+        this_row << alt_cov
+    
+        toolchain = r["toolchain"]
+
+        csv_list << this_row
+
+    end
+
+    header = [ "# id: 'gmo_check_result_#{toolchain}'",
+        "# section_name: '#{rule} (#{toolchain})'",
+        "# description: 'GMO Nachweis für #{rule} (Anteil in %).'",
+        "# format: 'tsv'",
+        "# plot_type: 'table'",
+        "# pconfig:",
+        "#    id: 'custom_bargraph_w_header'",
+        "#    ylab: 'Anteil GMO'" ]
+
+    file_name = rule.gsub(" ","_").downcase
+    file = File.new(file_name +"_mqc.tsv","w+")
+
+    file.puts header.join("\n")
+
+    csv_list.each do |entry|
+
+        file.puts entry.join("\t")
+
+    end
+
+    file.close
+    
+end
+
+
+
+
@@ -0,0 +1,164 @@
+#!/bin/env ruby
+
+require 'optparse'
+require 'ostruct'
+require 'json'
+require 'rubyXL'
+require 'rubyXL/convenience_methods/cell'
+require 'rubyXL/convenience_methods/color'
+require 'rubyXL/convenience_methods/font'
+require 'rubyXL/convenience_methods/workbook'
+require 'rubyXL/convenience_methods/worksheet'
+
+### Define modules and classes here
+
+def parse_json(filename)
+
+    return JSON.parse(IO.readlines(filename).join)
+
+end
+
+### Get the script arguments and open relevant files
+options = OpenStruct.new()
+opts = OptionParser.new()
+opts.banner = "Reads reports and makes an Excel table with one sheet per analysis rule"
+opts.separator ""
+opts.on("-o","--outfile", "=OUTFILE","Output file") {|argument| options.outfile = argument }
+opts.on("-h","--help","Display the usage information") {
+    puts opts
+    exit
+}
+
+opts.parse! 
+
+color = {
+	"even" => "FFFFFF",
+	"uneven" => "d4e6f1"
+}
+
+negative_result = "-"
+
+files = Dir["*.json"]
+
+toolchains = []
+
+bucket = {}
+
+files.each do |file|
+
+    json        = parse_json(file)
+    sample      = json["sample"]
+    matches     = json["matches"]
+
+    matches.each do |match|
+
+        toolchain   = match["toolchain"]
+        rule        = match["rule"]
+
+        toolchains << toolchain unless toolchains.include?(toolchain)
+
+        bucket[rule] = {} unless bucket.has_key?(rule)
+
+        bucket[rule].has_key?(sample) ? bucket[rule][sample] << match : bucket[rule][sample] = [ match ]
+
+    end
+
+end
+
+toolchains.sort!
+
+workbook = RubyXL::Workbook.new
+page = 0
+
+# a bucket is a rule with all the matching reports, i.e. one page
+bucket.each do |rule,samples|
+
+    sheet = workbook.worksheets[page]
+    sheet.sheet_name = rule
+
+    row = 0
+    col = 0
+
+    [ "" ].push(toolchains.map { |tc| [ tc, "", ""] }).flatten.each do |tc|
+        sheet.add_cell(row,col,tc)
+        sheet.sheet_data[row][col].change_font_bold(true)
+        sheet.change_column_width(col, 15)
+        col += 1
+    end
+    row += 1
+    col = 0
+
+    [ "Probe" ].push(toolchains.map{|tc| [ "% GMO","Reads WT","Reads GMO"]}).flatten.each do |tc|
+        sheet.add_cell(row,col,tc)
+        sheet.sheet_data[row][col].change_font_bold(true)
+        sheet.change_column_width(col, 15)
+        col += 1
+    end
+
+    samples.each do |sample,reports|
+
+        row += 1
+        col = 0
+
+        sheet.add_cell(row,col,sample)
+        col += 1
+
+        failed = false
+
+        toolchains.each do |tool|
+
+            perc_gmo = ""
+            ref_cov = ""
+            alt_cov = ""
+
+            report = reports.find{|r| r["toolchain"] == tool }
+
+            if report
+                perc_gmo = report["perc_gmo"]
+                ref_cov = report["ref_cov"]
+                alt_cov = report["alt_cov"]
+                if ref_cov == "NA" && report.has_key?("bam_cov")
+                    ref_cov = report["bam_cov"]
+                    alt_cov = "-"
+                end
+            end
+
+            # Simplistic rule to catch failed samples. 
+            if ref_cov.to_i < 100
+                failed = true
+            end
+
+            [ perc_gmo, ref_cov, alt_cov].each do |e|
+                sheet.add_cell(row,col,e)
+                col += 1
+            end
+
+
+        end
+
+        if failed
+            sheet.change_row_fill(row,"FF3300")
+        else
+            row.even? ? bg = color["even"] : bg = color["uneven"]
+            sheet.change_row_fill(row, bg)
+        end
+        sheet.change_row_horizontal_alignment(row, 'right')
+
+    end
+    
+    col = 0
+    toolchains.each do |tc|
+        sheet.change_column_border(col, :right, 'medium')
+        col += 3
+    end
+    
+    sheet.change_row_border(0, :bottom, 'medium')  
+
+    # increment page counter for the next rule, if any
+    page += 1
+
+end
+
+workbook.write(options.outfile)
+
+
Original file line number	Diff line number	Diff line change
`@@ -17,7 +17,7 @@`
`17`	`17`	`"payload": [`
`18`	`18`	`{`
`19`	`19`	`"format": "VCF",`
`20`		`- "target": "1:14834",`
	`20`	`+ "target": "1:14834-14836",`
`21`	`21`	`"name": "GABA Mutation in SIGAD3",`
`22`	`22`	`"matcher": "1\t14834\t.\tGTG\tGTTG",`
`23`	`23`	`"positive_report": "Diese Probe enthält eine GABA Mutation in SIGAD3. Nachweis erbracht über: Varianten Analyse.",`