@@ -202,7 +202,8 @@ def compare_vcf(args):
202
202
203
203
for k , v in input_variant_dict .items ():
204
204
columns = v .row_str .rstrip ().split ('\t ' )
205
- phaseable = columns [7 ] == 'H'
205
+ # phaseable = columns[7] == 'H'
206
+ phaseable = 'H' in columns [7 ].split (';' )
206
207
if phaseable :
207
208
phasable_count += 1
208
209
else :
@@ -258,7 +259,7 @@ def compare_vcf(args):
258
259
259
260
if benchmark_indel :
260
261
ref_base , alt_base = input_variant_dict [key ].reference_bases , input_variant_dict [key ].alternate_bases [0 ]
261
- if len (ref_base ) == 1 and len (alt_base ) == 1 :
262
+ if len (ref_base ) == 1 and len (alt_base ) == 1 or len ( input_variant_dict [ key ]. alternate_bases ) > 1 :
262
263
del input_variant_dict [key ]
263
264
264
265
for key in list (truth_variant_dict .keys ()):
@@ -304,6 +305,7 @@ def compare_vcf(args):
304
305
tp_set = set ()
305
306
fp_qual_dict = defaultdict (float )
306
307
tp_qual_dict = defaultdict (float )
308
+ gt_mismatch_count = 0
307
309
for key , vcf_infos in input_variant_dict .items ():
308
310
pos = key if args .ctg_name is not None else key [1 ]
309
311
contig = args .ctg_name if args .ctg_name is not None else key [0 ]
@@ -323,8 +325,6 @@ def compare_vcf(args):
323
325
324
326
ref_base = vcf_infos .reference_bases
325
327
alt_base = vcf_infos .alternate_bases [0 ]
326
- # if alt_base == '.':
327
- # alt_base = ref_base
328
328
genotype = vcf_infos .genotype
329
329
qual = vcf_infos .qual
330
330
try :
@@ -359,6 +359,8 @@ def compare_vcf(args):
359
359
continue
360
360
361
361
genotype_match = skip_genotyping or (truth_genotype == genotype )
362
+ if not genotype_match :
363
+ gt_mismatch_count += 1
362
364
if truth_ref_base == ref_base and truth_alt_base == alt_base and genotype_match :
363
365
tp_snv = tp_snv + 1 if is_snv else tp_snv
364
366
tp_ins = tp_ins + 1 if is_ins else tp_ins
@@ -388,7 +390,8 @@ def compare_vcf(args):
388
390
fp_fn_set .add (key )
389
391
390
392
truth_set .add (key )
391
-
393
+ if not skip_genotyping :
394
+ print ('[INFO] Genotype mismatch count/Total fp_fn count: {}/{}' .format (gt_mismatch_count , len (fp_fn_set )))
392
395
for key , vcf_infos in truth_variant_dict .items ():
393
396
pos = key if args .ctg_name is not None else key [1 ]
394
397
contig = args .ctg_name if args .ctg_name is not None else key [0 ]
0 commit comments