|
6 | 6 | import re
|
7 | 7 | import subprocess
|
8 | 8 |
|
9 |
| -version = '0.3.3' |
| 9 | +version = '0.3.4' |
10 | 10 |
|
11 | 11 |
|
12 | 12 | def __main__():
|
@@ -65,29 +65,37 @@ def vcf2tsv(query_vcf, out_tsv, skip_info_data, skip_genotype_data, keep_rejecte
|
65 | 65 | else:
|
66 | 66 | gt_present_header = 1
|
67 | 67 |
|
68 |
| - header_line = '\t'.join(fixed_columns_header) |
| 68 | + #header_line = '\t'.join(fixed_columns_header) |
| 69 | + header_tags = fixed_columns_header |
69 | 70 | if skip_info_data is False:
|
70 |
| - header_line = '\t'.join(fixed_columns_header) + '\t' + '\t'.join(sorted(info_columns_header)) |
| 71 | + #header_line = '\t'.join(fixed_columns_header) + '\t' + '\t'.join(sorted(info_columns_header)) |
| 72 | + header_tags = fixed_columns_header + sorted(info_columns_header) |
71 | 73 | if len(sample_columns_header) > 0:
|
72 | 74 | if skip_genotype_data is False:
|
73 |
| - header_line = '\t'.join(fixed_columns_header) + '\t' + '\t'.join(sorted(info_columns_header)) + '\t' + '\t'.join(sample_columns_header) + '\t' + '\t'.join(sorted(format_columns_header)) + '\tGT' |
| 75 | + #header_line = '\t'.join(fixed_columns_header) + '\t' + '\t'.join(sorted(info_columns_header)) + '\t' + '\t'.join(sample_columns_header) + '\t' + '\t'.join(sorted(format_columns_header)) + '\tGT' |
| 76 | + header_tags = fixed_columns_header + sorted(info_columns_header) + sample_columns_header + sorted(format_columns_header) + ['GT'] |
74 | 77 | else:
|
75 |
| - header_line = '\t'.join(fixed_columns_header) + '\t' + '\t'.join(sorted(info_columns_header)) |
| 78 | + #header_line = '\t'.join(fixed_columns_header) + '\t' + '\t'.join(sorted(info_columns_header)) |
| 79 | + header_tags = fixed_columns_header + sorted(info_columns_header) |
76 | 80 | else:
|
77 | 81 | if len(sample_columns_header) > 0:
|
78 | 82 | if skip_genotype_data is False:
|
79 |
| - header_line = '\t'.join(fixed_columns_header) + '\t' + '\t'.join(sample_columns_header) + '\t' + '\t'.join(sorted(format_columns_header)) + '\tGT' |
| 83 | + #header_line = '\t'.join(fixed_columns_header) + '\t' + '\t'.join(sample_columns_header) + '\t' + '\t'.join(sorted(format_columns_header)) + '\tGT' |
| 84 | + header_tags = fixed_columns_header + sample_columns_header + sorted(format_columns_header) + ['GT'] |
80 | 85 | else:
|
81 |
| - header_line = '\t'.join(fixed_columns_header) |
82 |
| - |
| 86 | + #header_line = '\t'.join(fixed_columns_header) |
| 87 | + header_tags = fixed_columns_header |
| 88 | + header_line = '\t'.join(header_tags) |
| 89 | + |
83 | 90 | out.write('#https://github.com/sigven/vcf2tsv version=' + str(version) + '\n')
|
84 | 91 | if print_data_type_header is True:
|
85 |
| - header_tags = header_line.rstrip().split('\t') |
| 92 | + #header_tags = header_line.rstrip().split('\t') |
86 | 93 | header_types = []
|
87 | 94 | for h in header_tags:
|
88 | 95 | if h in column_types:
|
89 | 96 | header_types.append(str(column_types[h]))
|
90 |
| - header_line_type = '\t'.join(fixed_columns_header_type) + '\t' + '\t'.join(header_types) |
| 97 | + #header_line_type = '\t'.join(fixed_columns_header_type) + '\t' + '\t'.join(header_types) |
| 98 | + header_line_type = '\t'.join(fixed_columns_header_type + header_types) |
91 | 99 | out.write('#' + str(header_line_type) + '\n')
|
92 | 100 | out.write(str(header_line) + '\n')
|
93 | 101 | else:
|
@@ -154,7 +162,7 @@ def vcf2tsv(query_vcf, out_tsv, skip_info_data, skip_genotype_data, keep_rejecte
|
154 | 162 | vcf_info_data.append(str(variant_info.get(info_field)))
|
155 | 163 | else:
|
156 | 164 | print('vcf2tsv.py WARNING:\tINFO tag ' + str(info_field) + ' is defined in the VCF header as type \'Integer\', yet parsed as other type:' + str(type(variant_info.get(info_field))))
|
157 |
| - vcf_info_data.append(re.sub('\(|\)', '', variant_info.get(info_field).encode('ascii','ignore').decode('ascii'))) |
| 165 | + vcf_info_data.append(re.sub(r'\(|\)', '', variant_info.get(info_field).encode('ascii','ignore').decode('ascii'))) |
158 | 166 |
|
159 | 167 | #print(str(vcf_info_data))
|
160 | 168 | #dictionary, with sample names as keys, values being genotype data (dictionary with format tags as keys)
|
|
0 commit comments