-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsamplesheet_parser.py
executable file
·121 lines (94 loc) · 3.08 KB
/
samplesheet_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/usr/bin/env python
import argparse
import json
from pprint import pprint
def parse_header_section(path_to_sample_sheet):
header_lines = []
header = {}
with open(path_to_sample_sheet, 'r') as f:
for line in f:
if line.strip().startswith('[Header]'):
continue
if line.strip().startswith('[Reads]'):
break
else:
header_lines.append(line.strip().rstrip(','))
for line in header_lines:
header_key = line.split(',')[0].lower().replace(" ", "_")
if len(line.split(',')) > 1:
header_value = line.split(',')[1]
else:
header_value = ""
if header_key != "":
header[header_key] = header_value
return header
def parse_reads_section(path_to_sample_sheet):
reads_lines = []
reads = []
with open(path_to_sample_sheet, 'r') as f:
for line in f:
if line.strip().startswith('[Reads]'):
break
for line in f:
if line.strip().startswith('[Settings]'):
break
reads_lines.append(line.strip().rstrip(','))
for line in reads_lines:
if line != "":
read_len = int(line.split(',')[0])
reads.append(read_len)
return reads
def parse_settings_section(path_to_sample_sheet):
settings_lines = []
settings = {}
with open(path_to_sample_sheet, 'r') as f:
for line in f:
if line.strip().startswith('[Settings]'):
break
for line in f:
if line.strip().startswith('[Data]'):
break
settings_lines.append(line.strip().rstrip(','))
for line in settings_lines:
settings_key = line.split(',')[0].lower().replace(" ", "_")
if len(line.split(',')) > 1:
settings_value = line.split(',')[1]
else:
settings_value = ""
if settings_key != "":
settings[settings_key] = settings_value
return settings
def parse_data_section(path_to_sample_sheet):
data = []
with open(path_to_sample_sheet, 'r') as f:
for line in f:
if not line.strip().startswith('[Data]'):
continue
else:
break
data_header = [x.lower() for x in next(f).strip().split(',')]
for line in f:
data_line = {}
for idx, data_element in enumerate(line.strip().split(',')):
try:
data_line[data_header[idx]] = data_element
except IndexError as e:
pass
data.append(data_line)
return data
def main(args):
sample_sheet = {}
header = parse_header_section(args.sample_sheet)
settings = parse_settings_section(args.sample_sheet)
reads = parse_reads_section(args.sample_sheet)
data = parse_data_section(args.sample_sheet)
sample_sheet['header'] = header
sample_sheet['settings'] = settings
sample_sheet['reads'] = reads
sample_sheet['data'] = data
print(json.dumps(sample_sheet))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('sample_sheet')
args = parser.parse_args()
main(args)