-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
71 lines (57 loc) · 1.88 KB
/
utils.py
File metadata and controls
71 lines (57 loc) · 1.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import json
import os
# Read a chunk from a partial index
def read_chunk(file, chunk_size):
res_dict = dict()
lines = file.readlines(chunk_size)
for line in lines:
data = json.loads(line)
data_dict = {data[0]: data[1]}
res_dict.update(data_dict)
return res_dict if len(res_dict) > 0 else None
def initialize_directory(root_dir):
if not os.path.exists(root_dir):
os.mkdir(root_dir)
path = os.path.join(root_dir, "partial_indexes")
if os.path.exists(path):
return
os.mkdir(path)
def dump_json(obj, path):
with open(path, "w") as f:
json_dump = json.dumps(obj, indent=4)
f.write(json_dump)
def load_json(path):
if not os.path.exists(path):
print(path, "does not exist")
return
with open(path, "r") as f:
obj = json.load(f)
return obj
def dump_jsonl(dict_obj, path):
if not isinstance(dict_obj, dict):
raise TypeError("dict_obj is not of Dictionary type")
with open(path, "w") as outfile:
for i in dict_obj.items():
item_dump = json.dumps(i)
outfile.write(item_dump)
outfile.write("\n")
# dump dict to jsonl sorted by key
def dump_jsonl_sorted(dict_obj, path):
if type(dict_obj) is not type(dict()):
raise TypeError("dict_obj is not of Dictionary type")
with open(path, "w") as outfile:
for key, value in sorted(dict_obj.items(), key=lambda item: int(item[0])):
item_dump = json.dumps([key, value])
outfile.write(item_dump)
outfile.write("\n")
def load_jsonl(path):
if not os.path.exists(path):
print(path, "does not exist")
return
dict_obj = dict()
with open(path, "r") as file:
for line in file:
data = json.loads(line)
data_dict = {data[0]: data[1]}
dict_obj.update(data_dict)
return dict_obj