-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathdata_provider.py
208 lines (147 loc) · 7.93 KB
/
data_provider.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
#-*- coding: utf-8 -*-
"""
Data provider: to provide train/val/test data to the model
"""
import random
import numpy as np
import os
import h5py
from collections import OrderedDict
import json
from opt import *
import random
import math
class DataProvision:
def __init__(self, options):
self._options = options
self._splits = {'train', 'val', 'test'}
self._ids = {} # video ids: a dictionary containing train/val/test id lists
self._sizes = {} # size of train/val/test split data
self._localization = {} # time stamp data
self._anchors = list(range(self._options['c3d_resolution'], (self._options['num_anchors'] + 1) * self._options['c3d_resolution'], self._options['c3d_resolution'])) # proposal anchors (in frame number)
print('Data Size:')
for split in self._splits:
proposal_data = json.load(open(os.path.join(self._options['proposal_data_path'], 'thumos14_temporal_proposal_%s.json'%split), 'r'))
tmp_ids = list(proposal_data.keys())
self._ids[split] = tmp_ids
self._sizes[split] = len(self._ids[split])
self._localization[split] = proposal_data
print('%s-split: %d videos.'%(split, self._sizes[split]))
# feature dictionary
print('Loading c3d features ...')
features = h5py.File(self._options['feature_data_path'], 'r')
self._feature_ids = features.keys()
self._features = {video_id:features[video_id]['c3d_features'].value for video_id in self._feature_ids}
# load label weight data
print('Loading anchor weight data ...')
self._proposal_weight = json.load(open(self._options['anchor_weight_path'], 'r'))
if not self._options['use_weight']:
self._proposal_weight = np.ones(shape=(self._options['num_anchors'], 2)) / 2.0
# when using tensorflow built-in function: tf.nn.weighted_cross_entropy_with_logits()
for i in range(len(self._proposal_weight)):
self._proposal_weight[i][0] /= self._proposal_weight[i][1]
self._proposal_weight[i][1] = 1.
print('Done loading.')
def get_size(self, split):
return self._sizes[split]
def get_ids(self, split):
return self._ids[split]
def get_localization(self):
return self._localization
def get_anchors(self):
return self._anchors
def get_iou(self, pred, gt):
start_pred, end_pred = pred
start, end = gt
intersection = max(0, min(end, end_pred) - max(start, start_pred))
union = min(max(end, end_pred) - min(start, start_pred), end-start + end_pred-start_pred)
iou = float(intersection) / (union + 1e-8)
return iou
def get_intersection(self, region1, region2):
start1, end1 = region1
start2, end2 = region2
start = max(start1, start2)
end = min(end1, end2)
return (start, end)
# process batch data: to generate formatted tensor and necessary mask
def process_batch_data(self, batch_data):
data_length = []
for data in batch_data:
data_length.append(data.shape[0])
max_length = max(data_length)
dim = batch_data[0].shape[1]
out_batch_data = np.zeros(shape=(len(batch_data), max_length, dim), dtype='float32')
out_batch_data_mask = np.zeros(shape=(len(batch_data), max_length), dtype='int32')
for i, data in enumerate(batch_data):
effective_len = data.shape[0]
out_batch_data[i, :effective_len, :] = data
out_batch_data_mask[i, :effective_len] = 1
out_batch_data = np.asarray(out_batch_data, dtype='float32')
out_batch_data_mask = np.asarray(out_batch_data_mask, dtype='int32')
return out_batch_data, out_batch_data_mask
# generate train/val/test data
def iterate_batch(self, split, batch_size):
ids = list(self._ids[split])
if split == 'train':
print('Randomly shuffle training data ...')
random.shuffle(ids)
current = 0
while True:
batch_feature = []
batch_proposal = []
c3d_resolution = self._options['c3d_resolution']
for sample_id in range(batch_size):
vid = ids[sample_id+current]
feature = self._features[vid]
feature_len = feature.shape[0]
# sampling
if split == 'train':
sample_len = self._options['sample_len']
else:
sample_len = feature_len
# starting feature id relative to original video
start_feat_id = random.randint(0, max((feature_len-sample_len), 0))
end_feat_id = min(start_feat_id+sample_len, feature_len)
feature = feature[start_feat_id:end_feat_id]
start_frame_id = start_feat_id * c3d_resolution + c3d_resolution / 2
end_frame_id = (end_feat_id - 1) * c3d_resolution + c3d_resolution / 2
batch_feature.append(feature)
# the ground truth proposal and caption should be changed according to the sampled stream
localization = self._localization[split][vid]
framestamps = localization['framestamps']
n_anchors = self._options['num_anchors']
# generate proposal groud truth data
gt_proposal = np.zeros(shape=(sample_len, n_anchors), dtype=np.int16)
for stamp_id, stamp in enumerate(framestamps):
start = stamp[0]
end = stamp[1]
# only need to check whether proposals that have end point at region of (frame_check_start, frame_check_end) are "correct" proposals
start_point = max((start + end) / 2, 0)
end_point = end + (end - start + 1)
frame_check_start, frame_check_end = self.get_intersection((start_point, end_point + 1), (start_frame_id, end_frame_id+1))
feat_check_start, feat_check_end = frame_check_start // c3d_resolution, frame_check_end // c3d_resolution
for feat_id in range(feat_check_start, feat_check_end + 1):
frame_id = feat_id*c3d_resolution + c3d_resolution/2
for anchor_id, anchor in enumerate(self._anchors):
pred = (frame_id + 1- anchor, frame_id + 1)
tiou = self.get_iou(pred, (start, end + 1))
if tiou > 0.5:
gt_proposal[feat_id-start_feat_id, anchor_id] = 1
batch_proposal.append(gt_proposal)
# generate tensor numpy data
batch_feature, batch_feature_mask = self.process_batch_data(batch_feature)
batch_proposal, _ = self.process_batch_data(batch_proposal)
# serve as a tuple
batch_data = {'video_feat': batch_feature, 'proposal': batch_proposal, 'video_feat_mask': batch_feature_mask, 'proposal_weight': np.array(self._proposal_weight)}
yield batch_data
current = current + batch_size
if current + batch_size > self.get_size(split):
current = 0
# at the end of list, shuffle it
if split == 'train':
print('Randomly shuffle training data ...')
random.shuffle(ids)
print('The new shuffled ids are:')
print('%s, %s, %s, ..., %s'%(ids[0], ids[1], ids[2], ids[-1]))
else:
break