-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprepare_topic_files.py
73 lines (44 loc) · 1.48 KB
/
prepare_topic_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import h5py
import json
LDA_dir="./topic/"
prepared_files = "./prepared_files/"
#####load lda_topics#####
lda = h5py.File(LDA_dir + "lda_topics.h5", 'r+')
LDA_test_gt = list(lda["test_gt"])
print(len(LDA_test_gt))
LDA_val_gt = list(lda["val_gt"])
print(len(LDA_val_gt))
LDA_train_gt = list(lda["train_gt"])
print(len(LDA_train_gt))
#####load Image IDs####
# test img
with open(prepared_files+ "captions_test.json") as image_ids:
test_image_ids = json.load(image_ids) # 1000
test_img_list = list(test_image_ids["image_ids"])
# print(len(test_img_list))
# train img
with open(prepared_files + "captions_train.json") as image_ids:
train_image_ids = json.load(image_ids) # 1000
train_img_list = list(train_image_ids["image_ids"])
# print(len(train_img_list))
# val img
with open(prepared_files + "captions_val.json") as image_ids:
val_image_ids = json.load(image_ids) # 1000
val_img_list = list(val_image_ids["image_ids"])
# print(len(val_img_list))
TEST_lda_dict= {}
TRAIN_lda_dict= {}
VAL_lda_dict= {}
for img in range(len(test_img_list)):
TEST_lda_dict.update( {img : LDA_test_gt[img].tolist() } )
for img in range(len(train_img_list)):
TRAIN_lda_dict.update({ img:LDA_train_gt[img].tolist() })
for img in range(len(val_img_list)):
VAL_lda_dict.update({img:LDA_val_gt[img].tolist()})
coco_dict = {
"train" : TRAIN_lda_dict ,
"test" : TEST_lda_dict ,
"val" : VAL_lda_dict
}
with open('./topic/topics.json', 'w') as outfile:
json.dump(coco_dict, outfile)