Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 8f1a631

Browse files
committedFeb 13, 2019
fix bug for case of absent format fields
1 parent 137ff84 commit 8f1a631

File tree

1 file changed

+19
-11
lines changed

1 file changed

+19
-11
lines changed
 

‎vcf2tsv.py

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import re
77
import subprocess
88

9-
version = '0.3.3'
9+
version = '0.3.4'
1010

1111

1212
def __main__():
@@ -65,29 +65,37 @@ def vcf2tsv(query_vcf, out_tsv, skip_info_data, skip_genotype_data, keep_rejecte
6565
else:
6666
gt_present_header = 1
6767

68-
header_line = '\t'.join(fixed_columns_header)
68+
#header_line = '\t'.join(fixed_columns_header)
69+
header_tags = fixed_columns_header
6970
if skip_info_data is False:
70-
header_line = '\t'.join(fixed_columns_header) + '\t' + '\t'.join(sorted(info_columns_header))
71+
#header_line = '\t'.join(fixed_columns_header) + '\t' + '\t'.join(sorted(info_columns_header))
72+
header_tags = fixed_columns_header + sorted(info_columns_header)
7173
if len(sample_columns_header) > 0:
7274
if skip_genotype_data is False:
73-
header_line = '\t'.join(fixed_columns_header) + '\t' + '\t'.join(sorted(info_columns_header)) + '\t' + '\t'.join(sample_columns_header) + '\t' + '\t'.join(sorted(format_columns_header)) + '\tGT'
75+
#header_line = '\t'.join(fixed_columns_header) + '\t' + '\t'.join(sorted(info_columns_header)) + '\t' + '\t'.join(sample_columns_header) + '\t' + '\t'.join(sorted(format_columns_header)) + '\tGT'
76+
header_tags = fixed_columns_header + sorted(info_columns_header) + sample_columns_header + sorted(format_columns_header) + ['GT']
7477
else:
75-
header_line = '\t'.join(fixed_columns_header) + '\t' + '\t'.join(sorted(info_columns_header))
78+
#header_line = '\t'.join(fixed_columns_header) + '\t' + '\t'.join(sorted(info_columns_header))
79+
header_tags = fixed_columns_header + sorted(info_columns_header)
7680
else:
7781
if len(sample_columns_header) > 0:
7882
if skip_genotype_data is False:
79-
header_line = '\t'.join(fixed_columns_header) + '\t' + '\t'.join(sample_columns_header) + '\t' + '\t'.join(sorted(format_columns_header)) + '\tGT'
83+
#header_line = '\t'.join(fixed_columns_header) + '\t' + '\t'.join(sample_columns_header) + '\t' + '\t'.join(sorted(format_columns_header)) + '\tGT'
84+
header_tags = fixed_columns_header + sample_columns_header + sorted(format_columns_header) + ['GT']
8085
else:
81-
header_line = '\t'.join(fixed_columns_header)
82-
86+
#header_line = '\t'.join(fixed_columns_header)
87+
header_tags = fixed_columns_header
88+
header_line = '\t'.join(header_tags)
89+
8390
out.write('#https://github.com/sigven/vcf2tsv version=' + str(version) + '\n')
8491
if print_data_type_header is True:
85-
header_tags = header_line.rstrip().split('\t')
92+
#header_tags = header_line.rstrip().split('\t')
8693
header_types = []
8794
for h in header_tags:
8895
if h in column_types:
8996
header_types.append(str(column_types[h]))
90-
header_line_type = '\t'.join(fixed_columns_header_type) + '\t' + '\t'.join(header_types)
97+
#header_line_type = '\t'.join(fixed_columns_header_type) + '\t' + '\t'.join(header_types)
98+
header_line_type = '\t'.join(fixed_columns_header_type + header_types)
9199
out.write('#' + str(header_line_type) + '\n')
92100
out.write(str(header_line) + '\n')
93101
else:
@@ -154,7 +162,7 @@ def vcf2tsv(query_vcf, out_tsv, skip_info_data, skip_genotype_data, keep_rejecte
154162
vcf_info_data.append(str(variant_info.get(info_field)))
155163
else:
156164
print('vcf2tsv.py WARNING:\tINFO tag ' + str(info_field) + ' is defined in the VCF header as type \'Integer\', yet parsed as other type:' + str(type(variant_info.get(info_field))))
157-
vcf_info_data.append(re.sub('\(|\)', '', variant_info.get(info_field).encode('ascii','ignore').decode('ascii')))
165+
vcf_info_data.append(re.sub(r'\(|\)', '', variant_info.get(info_field).encode('ascii','ignore').decode('ascii')))
158166

159167
#print(str(vcf_info_data))
160168
#dictionary, with sample names as keys, values being genotype data (dictionary with format tags as keys)

0 commit comments

Comments
 (0)
Please sign in to comment.