1
+ from __future__ import absolute_import
2
+ from __future__ import division
3
+ from __future__ import print_function
4
+
5
+ import tensorflow as tf
6
+ from tensorflow .python .tools import inspect_checkpoint as chkp
7
+ from tensorflow .python .ops import math_ops
8
+ from tensorflow .python .ops import array_ops
9
+ from tensorflow .python .ops import nn_ops
10
+ from tensorflow .python .ops import init_ops
11
+ from tensorflow .python .ops import variable_scope as vs
12
+ from tensorflow .python .framework import constant_op
13
+ from tensorflow .python .framework import dtypes
14
+
15
+ import numpy as np
16
+ import os
17
+ from os import listdir
18
+ from os .path import isfile , join , isdir
19
+ from random import shuffle , choice
20
+ from PIL import Image
21
+ import sys
22
+ import json
23
+ import collections
24
+
25
+
26
+ input_width = 224
27
+ input_height = 224
28
+ num_channels = 3
29
+ slim = tf .contrib .slim
30
+ n_hidden1 = 4096
31
+ n_hidden2 = 4096
32
+ feature_size = 4096
33
+ learnError = 0
34
+ n_epochs = 1
35
+ batch_size = 2
36
+ min_steps = batch_size
37
+
38
+ lr = 1e-8
39
+
40
+
41
+ def loadData (jsonData , inPath ):
42
+ batchPaths = []
43
+ for vid in jsonData .keys ():
44
+ # VIRAT format
45
+ dirName = '_' .join (vid .split ('.' )[0 ].split ('_' )[2 :])
46
+
47
+ # Other dataset file name format
48
+ # dirName = '_'.join(vid.split('.')[0])
49
+
50
+ vidPath = join (inPath ,dirName )
51
+ batchPaths = batchPaths + sorted ([str (join (vidPath , f ) + '/' ) for f in listdir (vidPath ) if isdir (join (vidPath , f ))])
52
+ return batchPaths
53
+
54
+ def loadMiniBatch (vidFilePath ):
55
+ vidName = vidFilePath .split ('/' )[- 3 ]
56
+ frameList = sorted ([join (vidFilePath , f ) for f in listdir (vidFilePath ) if isfile (join (vidFilePath , f )) and f .endswith ('.png' )])
57
+ frameList = sorted (frameList , key = lambda x : int (x .split ('/' )[- 1 ].split ('.' )[0 ]))
58
+ its = [iter (frameList ), iter (frameList [1 :])]
59
+ segments = zip (* its )
60
+ minibatch = []
61
+ for segment in segments :
62
+ im = []
63
+ numFrames = 0
64
+ for j , imFile in enumerate (segment ):
65
+ img = Image .open (imFile )
66
+ img = img .resize ((input_width , input_height ), Image .ANTIALIAS )
67
+ im .append (np .array (img ))
68
+ numFrames += 1
69
+ minibatch .append (np .stack (im ))
70
+ return vidFilePath , minibatch
71
+
72
+ def broadcast (tensor , shape ):
73
+ return tensor + tf .zeros (shape , dtype = tensor .dtype )
74
+
75
+ def RNNCell (W , B , inputs , state ):
76
+ """Most basic RNN: output = new_state = act(W * input + U * state + B)."""
77
+ one = constant_op .constant (1 , dtype = dtypes .int32 )
78
+ add = math_ops .add
79
+ multiply = math_ops .multiply
80
+ sigmoid = math_ops .sigmoid
81
+ activation = math_ops .tanh
82
+
83
+ gate_inputs = math_ops .matmul (array_ops .concat ([inputs , state ], 1 ), W )
84
+ gate_inputs = nn_ops .bias_add (gate_inputs , B )
85
+ output = sigmoid (gate_inputs )
86
+ return output , output
87
+
88
+ def lstm_cell (W , b , forget_bias , inputs , state ):
89
+ one = constant_op .constant (1 , dtype = dtypes .int32 )
90
+ add = math_ops .add
91
+ multiply = math_ops .multiply
92
+ sigmoid = math_ops .sigmoid
93
+ activation = math_ops .sigmoid
94
+ # activation = math_ops.tanh
95
+
96
+ c , h = array_ops .split (value = state , num_or_size_splits = 2 , axis = one )
97
+
98
+ gate_inputs = math_ops .matmul (array_ops .concat ([inputs , h ], 1 ), W )
99
+ gate_inputs = nn_ops .bias_add (gate_inputs , b )
100
+ # i = input_gate, j = new_input, f = forget_gate, o = output_gate
101
+ i , j , f , o = array_ops .split (value = gate_inputs , num_or_size_splits = 4 , axis = one )
102
+
103
+ forget_bias_tensor = constant_op .constant (forget_bias , dtype = f .dtype )
104
+
105
+ new_c = add (multiply (c , sigmoid (add (f , forget_bias_tensor ))), multiply (sigmoid (i ), activation (j )))
106
+ new_h = multiply (activation (new_c ), sigmoid (o ))
107
+ new_state = array_ops .concat ([new_c , new_h ], 1 )
108
+
109
+ return new_h , new_state
110
+
111
+
112
+ jsonData = json .load (open (sys .argv [1 ]))
113
+ vidPath = sys .argv [2 ]
114
+ modelPath = sys .argv [3 ]
115
+ activeLearningInput = sys .argv [4 ]
116
+
117
+ if activeLearningInput == "1" :
118
+ activeLearning = True
119
+ else :
120
+ activeLearning = False
121
+
122
+ batch = loadData (jsonData , vidPath )
123
+
124
+
125
+ inputs = tf .placeholder (tf .float32 , (None , 224 , 224 , 3 ), name = 'inputs' )
126
+ learning_rate = tf .placeholder (tf .float32 , [])
127
+ is_training = tf .placeholder (tf .bool )
128
+
129
+ # Setup RNN
130
+ init_state1 = tf .placeholder (tf .float32 , [1 , n_hidden1 ])
131
+ W_RNN1 = vs .get_variable ("W1" , shape = [feature_size + n_hidden1 , n_hidden1 ])
132
+ b_RNN1 = vs .get_variable ("b1" , shape = [n_hidden1 ], initializer = init_ops .zeros_initializer (dtype = tf .float32 ))
133
+ curr_state1 = init_state1
134
+
135
+ # Setup LSTM
136
+ #init_state1 = tf.placeholder(tf.float32, [1, 2*n_hidden1])
137
+ #W_lstm1 = vs.get_variable("W1", shape=[feature_size + n_hidden1, 4*n_hidden1])
138
+ #b_lstm1 = vs.get_variable("b1", shape=[4*n_hidden1], initializer=init_ops.zeros_initializer(dtype=tf.float32))
139
+ #curr_state1 = broadcast(init_state1, [tf.shape(xs)[0], 2*n_hidden1])
140
+
141
+
142
+ W_fc1 = tf .Variable (tf .truncated_normal ([n_hidden1 , feature_size ], stddev = 0.1 ))
143
+ b_fc1 = tf .Variable (tf .constant (0.1 , shape = [feature_size ]))
144
+
145
+ scope = 'vgg_16'
146
+ fc_conv_padding = 'VALID'
147
+ dropout_keep_prob = 0.8
148
+
149
+ r , g , b = tf .split (axis = 3 , num_or_size_splits = 3 , value = inputs * 255.0 )
150
+ VGG_MEAN = [103.939 , 116.779 , 123.68 ]
151
+ VGG_inputs = tf .concat (values = [b - VGG_MEAN [0 ], g - VGG_MEAN [1 ], r - VGG_MEAN [2 ]], axis = 3 )
152
+
153
+ with tf .variable_scope (scope , 'vgg_16' , [VGG_inputs ]) as sc :
154
+ end_points_collection = sc .original_name_scope + '_end_points'
155
+ # Collect outputs for conv2d, fully_connected and max_pool2d.
156
+ with slim .arg_scope ([slim .conv2d , slim .fully_connected , slim .max_pool2d ],
157
+ outputs_collections = end_points_collection ):
158
+ net = slim .repeat (inputs , 2 , slim .conv2d , 64 , [3 , 3 ], scope = 'conv1' )
159
+ net = slim .max_pool2d (net , [2 , 2 ], scope = 'pool1' )
160
+ net = slim .repeat (net , 2 , slim .conv2d , 128 , [3 , 3 ], scope = 'conv2' )
161
+ net = slim .max_pool2d (net , [2 , 2 ], scope = 'pool2' )
162
+ net = slim .repeat (net , 3 , slim .conv2d , 256 , [3 , 3 ], scope = 'conv3' )
163
+ net = slim .max_pool2d (net , [2 , 2 ], scope = 'pool3' )
164
+ net = slim .repeat (net , 3 , slim .conv2d , 512 , [3 , 3 ], scope = 'conv4' )
165
+ net = slim .max_pool2d (net , [2 , 2 ], scope = 'pool4' )
166
+ net = slim .repeat (net , 3 , slim .conv2d , 512 , [3 , 3 ], scope = 'conv5' )
167
+ net = slim .max_pool2d (net , [2 , 2 ], scope = 'pool5' )
168
+
169
+ # Use conv2d instead of fully_connected layers.
170
+ net = slim .conv2d (net , 4096 , [7 , 7 ], padding = fc_conv_padding , scope = 'fc6' )
171
+ net = slim .dropout (net , dropout_keep_prob , is_training = is_training ,
172
+ scope = 'dropout6' )
173
+ net = slim .conv2d (net , 4096 , [1 , 1 ], scope = 'fc7' )
174
+ vgg16_Features = tf .reshape (net , (- 1 ,4096 ))
175
+ # Convert end_points_collection into a end_point dict.
176
+ end_points = slim .utils .convert_collection_to_dict (end_points_collection )
177
+
178
+ RNN_inputs = tf .reshape (vgg16_Features [0 ,:], (- 1 , feature_size ))
179
+
180
+ h_1 , curr_state1 = RNNCell (W_RNN1 , b_RNN1 , RNN_inputs , curr_state1 )
181
+
182
+ fc1 = tf .matmul (h_1 , W_fc1 ) + b_fc1
183
+ print (fc1 [0 ,:].shape , vgg16_Features [1 ,:].shape )
184
+ sseLoss1 = tf .square (tf .subtract (fc1 [0 ,:], vgg16_Features [1 ,:]))
185
+ mask = tf .greater (sseLoss1 , learnError * tf .ones_like (sseLoss1 ))
186
+ sseLoss1 = tf .multiply (sseLoss1 , tf .cast (mask , tf .float32 ))
187
+ sseLoss = tf .reduce_mean (sseLoss1 )
188
+
189
+ # Optimization
190
+ train_op = tf .train .GradientDescentOptimizer (learning_rate ).minimize (sseLoss )
191
+
192
+
193
+ #####################
194
+ ### Training loop ###
195
+ #####################
196
+
197
+ init = tf .global_variables_initializer ()
198
+
199
+ saver = tf .train .Saver (tf .get_collection (tf .GraphKeys .GLOBAL_VARIABLES , scope = "vgg_16" ))
200
+ with tf .Session () as sess :
201
+ # Initialize parameters
202
+ sess .run (init )
203
+ saver .restore (sess , "./vgg_16.ckpt" )
204
+ saver = tf .train .Saver (max_to_keep = 0 )
205
+ avgPredError = 1.0
206
+
207
+ ### In case of interruption, load parameters from the last iteration (ex: 29)
208
+ #saver.restore(sess, './model_stacked_lstm_29')
209
+ ### And update the loop to account for the previous iterations
210
+ #for i in range(29,n_epochs):
211
+ step = 0
212
+ new_state = np .random .uniform (- 0.5 ,high = 0.5 ,size = (1 ,n_hidden1 ))
213
+
214
+ for i in range (n_epochs ):
215
+ # Run 1 epoch
216
+ loss = []
217
+ shuffle (batch )
218
+
219
+ for miniBatchPath in batch :
220
+ new_state = np .random .uniform (- 0.5 ,high = 0.5 ,size = (1 ,n_hidden1 ))
221
+ avgPredError = 0
222
+ vidName , minibatches = loadMiniBatch (miniBatchPath )
223
+ segCount = 0
224
+ predError = collections .deque (maxlen = 30 )
225
+ print ('Video:' , vidName )
226
+ for x_train in minibatches :
227
+ segCount += 1
228
+ ret = sess .run ([train_op , sseLoss , sseLoss1 , curr_state1 , fc1 ], feed_dict = {inputs : x_train , is_training : True , init_state1 : new_state , learning_rate :lr })
229
+ new_state = ret [3 ]
230
+
231
+ if activeLearning :
232
+ if ret [1 ]/ avgPredError > 1.5 :
233
+ lr = 1e-8
234
+ #print('Gating n_steps=', segCount, avgPredError, ret[1])
235
+ else :
236
+ #print('NOT Gating n_steps=', segCount, avgPredError, ret[1])
237
+ lr = 1e-10
238
+ predError .append (ret [1 ])
239
+ avgPredError = np .mean (predError )
240
+
241
+ path = modelPath + str (i + 1 )
242
+ save_path = saver .save (sess , path )
0 commit comments