|
3 | 3 | #author@shibin
|
4 | 4 | #2015.10.09
|
5 | 5 |
|
6 |
| -import StringIO |
7 | 6 | import xlwt
|
8 | 7 | import json
|
9 | 8 | import sys
|
10 | 9 |
|
11 | 10 |
|
12 |
| -def filter_none(one_list): |
13 |
| - for i in range(len(one_list)): |
14 |
| - if one_list[i] == None: |
15 |
| - one_list[i] = "" |
16 |
| - return one_list |
| 11 | +def patch_none(row): |
| 12 | + for i in range(len(row)): |
| 13 | + if row[i] == None: |
| 14 | + row[i] = "" |
| 15 | + return row |
17 | 16 |
|
18 | 17 |
|
19 |
| -each_encode = lambda x: [i.encode('utf-8') for i in x] |
| 18 | +patch_encode = lambda row: [ele.encode('utf-8') for ele in row] |
20 | 19 |
|
21 | 20 |
|
22 |
| -def make_xls_file(header_list, data_list): |
23 |
| - headers = each_encode(filter_none(header_list)) |
24 |
| - datas = [each_encode(filter_none(row)) for row in data_list] |
| 21 | +def patch_str(row): |
| 22 | + for i in range(len(row)): |
| 23 | + if type(row[i]) != unicode: |
| 24 | + row[i] = str(row[i]) |
| 25 | + return row |
25 | 26 |
|
26 |
| - mem_file = StringIO.StringIO() |
| 27 | + |
| 28 | +def patch_datas(datas): |
| 29 | + datas = map(patch_none,datas) |
| 30 | + datas = map(patch_str,datas) |
| 31 | + datas = map(patch_encode,datas) |
| 32 | + |
| 33 | + return datas |
| 34 | + |
| 35 | + |
| 36 | +def load_files(fin): |
| 37 | + headers = set() |
| 38 | + objs = [] |
| 39 | + for line in fin: |
| 40 | + obj = json.loads(line) |
| 41 | + for key in obj: |
| 42 | + headers.add(key) |
| 43 | + objs.append(obj) |
| 44 | + |
| 45 | + headers = list(headers) |
| 46 | + datas = [] |
| 47 | + for obj in objs: |
| 48 | + row = [] |
| 49 | + for head in headers: |
| 50 | + row.append(obj.get(head,'')) |
| 51 | + datas.append(row) |
| 52 | + |
| 53 | + return (headers,datas) |
| 54 | + |
| 55 | + |
| 56 | +def make_xls(headers,datas): |
27 | 57 | wb = xlwt.Workbook(encoding='utf-8', style_compression=0)
|
28 | 58 | ws = wb.add_sheet('Sheet1')
|
| 59 | + |
29 | 60 | r = 0
|
30 | 61 | c = 0
|
31 |
| - for header in headers: |
32 |
| - ws.write(r, c, header) |
| 62 | + for head in headers: |
| 63 | + ws.write(r, c, head) |
33 | 64 | c += 1
|
34 | 65 |
|
35 |
| - for data in datas: |
| 66 | + for row in datas: |
36 | 67 | r += 1
|
37 | 68 | c = 0
|
38 |
| - for datai in data: |
39 |
| - ws.write(r, c, datai) |
| 69 | + for ele in row: |
| 70 | + ws.write(r, c, ele) |
40 | 71 | c += 1
|
41 |
| - wb.save(mem_file) |
42 |
| - mem_file.flush() |
43 |
| - return mem_file |
44 | 72 |
|
| 73 | + return wb |
45 | 74 |
|
46 |
| -def chg_doc(doc): |
47 |
| - newdoc = {} |
48 |
| - for k in doc: |
49 |
| - newdoc[k] = doc[k] |
50 | 75 |
|
51 |
| - for k in newdoc: |
52 |
| - v = newdoc[k] |
53 |
| - if v == None: |
54 |
| - newdoc[k] = "" |
55 |
| - elif type(v) != unicode: |
56 |
| - newdoc[k] = str(v) |
57 |
| - return newdoc |
| 76 | +def main(fin,fout): |
| 77 | + headers,datas = load_files(fin) |
| 78 | + datas = patch_datas(datas) |
| 79 | + wb = make_xls(headers,datas) |
58 | 80 |
|
| 81 | + wb.save(fout) |
| 82 | + fout.flush() |
59 | 83 |
|
60 |
| -def readfile(fin): |
61 |
| - header_list = [] |
62 |
| - data_list = [] |
63 |
| - for line in fin: |
64 |
| - doc = json.loads(line) |
65 |
| - doc = chg_doc(doc) |
66 |
| - if header_list == []: |
67 |
| - header_list = doc.keys() |
68 |
| - data = [] |
69 |
| - for k in header_list: |
70 |
| - data.append(doc[k]) |
71 |
| - data_list.append(data) |
72 |
| - return header_list, data_list |
73 |
| - |
74 |
| - |
75 |
| -def writefile(mem_file, fout): |
76 |
| - fout.write(mem_file.getvalue()) |
77 |
| - fout.close() |
78 |
| - |
79 |
| - |
80 |
| -def main(fin, fout): |
81 |
| - header_list, data_list = readfile(fin) |
82 |
| - mem_file = make_xls_file(header_list, data_list) |
83 |
| - writefile(mem_file, fout) |
84 | 84 |
|
85 | 85 |
|
86 | 86 | if __name__ == '__main__':
|
|
0 commit comments