-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclean_csv.py
88 lines (63 loc) · 2.32 KB
/
clean_csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import os
def parse_ap_client_data( csv_file ):
basename = os.path.splitext(csv_file)[0]
print "Preparing to parse CSV file %s.csv"%(basename)
with open(basename+'.csv','r') as f:
stuff = f.readlines()
# strip whitespace:
stuff = [z.strip() for z in stuff]
# find blank lines:
blank_lines = [i for i,z in enumerate(stuff) if z=='']
# first and last lines always blank
# middle line is where file is split
# -----
ap_stuff = []
if len(blank_lines) > 1:
start = blank_lines[0]
finish = blank_lines[1]
else:
start = 0
finish = blank_lines[0]
for i in range(start,finish):
ap_stuff.append( stuff[i] )
with open(basename+'_ap.csv','w') as f:
for txt in ap_stuff:
if txt<>'':
f.write("%s\n" % txt )
print "Finished writing AP data to file %s_ap.csv"%(basename)
# -----
client_stuff = []
if len(blank_lines) > 1:
start = blank_lines[1]
finish = blank_lines[2]
else:
start = blank_lines[0]
finish = len(stuff)
for i in range(start, finish):
client_stuff.append( stuff[i] )
with open(basename+'_client.csv','w') as f:
for j,txt in enumerate(client_stuff):
if txt<>'':
# here is where you clean out extra commas
# there should only be 7 columns
# or 6 commas
# (or, index 5 with python's n-1 indexing)
# comma index for this row of text
txt_list = list(txt)
ci = [i for i,z in enumerate(txt_list) if z==","]
# at every comma index above the 5th,
# turn the char at taht position into ';'
if len(ci)>5:
for k in ci[6:]:
txt_list[k] = ';'
# recombine char list into a string
txt = ''.join(txt_list)
f.write("%s\n" % txt )
print "Finished writing client data to file %s_client.csv"%(basename)
if __name__=="__main__":
for f in os.listdir('.'):
if f.endswith('.csv'):
if '_ap' not in f and '_client' not in f:
print f
parse_ap_client_data(f)
print "\n\n\nAll done!\n\n\n\n"