forked from chinese-poetry/chinese-poetry
-
Notifications
You must be signed in to change notification settings - Fork 23
/
Copy pathchange_others.py
51 lines (36 loc) · 1.37 KB
/
change_others.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from collections import defaultdict
import opencc
import json
import os
from tqdm import tqdm
cc = opencc.OpenCC('t2s')
base='wudai'
new_json=[]
settled=defaultdict(bool)
for file in tqdm(os.listdir(base)):
if 'db' in file or 'md' in file or 'error' in file:
continue
with open(base+'/'+file,encoding='utf8') as f:
all_data=json.load(f)
for data in all_data:
new_dict={}
for k,v in data.items():
if k=='volume' or k =='tags' or k=='notes' or k=='no#' or k=='biography' or k=='desc' or k=='name' or k=='rhythmic':
pass
elif k=='title':
new_dict['title']=cc.convert(v)
elif k=='paragraphs':
new_dict['content']=[cc.convert(text) for text in v]
else:
new_dict[k]=cc.convert(v)
if len(new_dict)==0:
continue
new_json.append(new_dict)
# new_json=list(set(new_json))
# new_json = [dict(t) for t in set([tuple(d.items()) for d in new_json])]
content=json.dumps(new_json,ensure_ascii=False,indent=2)
with open('五代的词/data.json','w',encoding='utf8') as f:
# json.dump(content,f,ensure_ascii=False)
f.write(content)
s = cc.convert('採菊東籬下,悠然見南山')
print(s)