-
Notifications
You must be signed in to change notification settings - Fork 119
/
Copy pathencode_lib_qc_category.py
executable file
·259 lines (220 loc) · 7.7 KB
/
encode_lib_qc_category.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
#!/usr/bin/env python3
"""
ENCODE QC log/plot to HTML converter
Author: Jin Lee ([email protected])
"""
from base64 import b64encode
def to_number(var):
"""Convert to number or return None
"""
try:
if '.' in var:
raise ValueError
return int(var)
except ValueError:
try:
return float(var)
except ValueError:
return None
class QCLog(object):
"""Parse a QC text file and convert it into a Python dict.
TSV (number of columns >= 1) can be converted without a parser function.
First column will be key name and the rest of columns will be
values.
For other QC log types, specify a parser function.
"""
def __init__(self, log_file, parser=None):
"""
Args:
log_file: QC log file or QC dict
"""
self._log_file = log_file
self._parser = parser
self._dict = None
self.__parse()
def to_dict(self):
return self._dict
def __parse(self):
if isinstance(self._log_file, dict):
self._dict = self._log_file
return
if self._parser is None:
d = {}
# can parse TSV
with open(self._log_file, 'r') as fp:
lines = fp.read().strip('\n')
def to_number_or_str(var):
"""Convert to number or return str
"""
try:
if '.' in var:
raise ValueError
return int(var)
except ValueError:
try:
return float(var)
except ValueError:
return var
return None
for i, line in enumerate(lines.split('\n')):
arr = line.split('\t')
if len(arr) == 1:
key = 'value' + str(i)
val = to_number(arr[0])
elif len(arr) == 2:
key = arr[0]
val = to_number(arr[1])
elif len(arr) > 2:
key = arr[0]
val = [to_number(v) for v in arr[1:]]
else:
continue
d[key] = val
self._dict = d
else:
self._dict = self._parser(self._log_file)
class QCPlot(object):
"""Embed image as base64 string and return HTML string.
QCPlot supports all images types supported by HTML <img>.
"""
def __init__(self, plot_file, caption=None, size_pct=100):
self._plot_file = plot_file
self._caption = caption
self._size_pct = size_pct
self._encoded = None
self._img_type = None
self.__encode()
def to_html(self):
html = '''
<figure style="display:inline-block">
<img src="data:image/{img_type};base64,{encoded}" alt="{caption}" height="{size_pct}%"/>
<figcaption style="text-align:center">{caption}</figcaption>
</figure>
'''
return html.format(
img_type=self._img_type,
size_pct=self._size_pct,
encoded=self._encoded,
caption='' if self._caption is None else self._caption)
def __encode(self):
self._encoded = b64encode(
open(self._plot_file, 'rb').read()).decode("utf-8")
class QCCategory(object):
"""QCCategory can have a child QCCategory and HTML will be resursively
stacked. This is useful for having subcategories.
"""
def __init__(self, cat_name, html_head='', html_foot='',
parser=None, map_key_desc=None, parent=None):
"""
Args:
cat_name: category name
map_key_desc: map (keyname to description) for QCLog.
For example of samtools flagstat
'mapped_qc_failed' : 'Mapped(QC-failed)'
parser: use it as default parser for all children QC logs
"""
self._cat_name = cat_name
self._html_head = html_head
self._html_foot = html_foot
self._parser = parser
self._map_key_desc = map_key_desc
self._qc_logs = {}
self._qc_plots = {}
self._child_categories = []
if parent is not None:
parent.add_category(self)
def add_category(self, qc_category):
self._child_categories.append(qc_category)
def add_log(self, log_file, key=None):
assert(key not in self._qc_logs)
self._qc_logs[key] = QCLog(
log_file,
parser=self._parser,
)
def add_plot(self, plot_file, key=None, caption=None, size_pct=100):
assert(key not in self._qc_plots)
self._qc_plots[key] = QCPlot(
plot_file,
caption=caption if caption is not None else key,
size_pct=size_pct
)
def to_dict(self):
"""Convert into dict. Plots will be ignored.
"""
d = {}
for key, qc_log in self._qc_logs.items():
d_ = qc_log.to_dict()
if len(d_) > 0:
d[key] = d_
for cat in self._child_categories:
d_ = cat.to_dict()
if len(d_) > 0:
d[cat._cat_name] = d_
return d
def to_html(self):
"""Print HTML only if there are contents to be shown
"""
html = ''
html += self.__qc_logs_to_html()
html += self.__qc_plots_to_html()
for cat in self._child_categories:
html += cat.to_html()
if html == '':
return ''
else:
return self._html_head + html + self._html_foot
def __qc_logs_to_html(self):
"""Print HTML only if there are contents to be shown
Make an HTML table of qc_logs. For example,
rep1 rep2
-------+-------+--------
key1 | val1 | val1
"""
if len(self._qc_logs) == 0:
return ''
html = '<table border="1" style="border-collapse:collapse">'
# make HTML header row
header = '<tr><th bgcolor="#EEEEEE">'
arr = [' ']
if len(self._qc_logs) == 1 and \
list(self._qc_logs.keys())[0] is None:
# skip header for single qc log
arr += ['Description']
else:
arr += self._qc_logs.keys()
header += '</th><th bgcolor="#EEEEEE">'.join(arr) + '</th></tr>\n'
# declared as dict but will be used as set with empty values
all_keys = {}
# contents
for qc_log_k, qc_log_val in self._qc_logs.items():
new_keys = dict.fromkeys(
k for k in qc_log_val.to_dict() if k not in all_keys.keys())
for new_key in new_keys:
all_keys[new_key] = None
content = ''
for key in all_keys.keys():
if self._map_key_desc is None:
long_key_name = key
else:
long_key_name = self._map_key_desc[key]
content += '<tr><th bgcolor="#EEEEEE" style="text-align:left">'\
'{}</th><td>'.format(long_key_name)
qc_log_content = []
for qc_log_key, qc_log in self._qc_logs.items():
d = qc_log.to_dict()
if key not in d:
val = 'N/A'
else:
val = str(d[key])
qc_log_content.append(val)
content += '</td><td>'.join(qc_log_content)
content += '</td></tr>\n'
html += header
html += content
html += '</table><br>\n'
return html
def __qc_plots_to_html(self):
html = ''
for k, qc_plot in self._qc_plots.items():
html += qc_plot.to_html()
return html