Skip to content

Commit 357d091

Browse files
committed
update
1 parent d3151af commit 357d091

4 files changed

+1046
-0
lines changed

Zacks_VGG_RNN.py

+242
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
from __future__ import absolute_import
2+
from __future__ import division
3+
from __future__ import print_function
4+
5+
import tensorflow as tf
6+
from tensorflow.python.tools import inspect_checkpoint as chkp
7+
from tensorflow.python.ops import math_ops
8+
from tensorflow.python.ops import array_ops
9+
from tensorflow.python.ops import nn_ops
10+
from tensorflow.python.ops import init_ops
11+
from tensorflow.python.ops import variable_scope as vs
12+
from tensorflow.python.framework import constant_op
13+
from tensorflow.python.framework import dtypes
14+
15+
import numpy as np
16+
import os
17+
from os import listdir
18+
from os.path import isfile, join, isdir
19+
from random import shuffle, choice
20+
from PIL import Image
21+
import sys
22+
import json
23+
import collections
24+
25+
26+
input_width = 224
27+
input_height = 224
28+
num_channels = 3
29+
slim = tf.contrib.slim
30+
n_hidden1 = 4096
31+
n_hidden2 = 4096
32+
feature_size = 4096
33+
learnError = 0
34+
n_epochs = 1
35+
batch_size = 2
36+
min_steps = batch_size
37+
38+
lr = 1e-8
39+
40+
41+
def loadData(jsonData, inPath):
42+
batchPaths = []
43+
for vid in jsonData.keys():
44+
# VIRAT format
45+
dirName = '_'.join(vid.split('.')[0].split('_')[2:])
46+
47+
# Other dataset file name format
48+
# dirName = '_'.join(vid.split('.')[0])
49+
50+
vidPath = join(inPath,dirName)
51+
batchPaths = batchPaths + sorted([str(join(vidPath, f) + '/') for f in listdir(vidPath) if isdir(join(vidPath, f))])
52+
return batchPaths
53+
54+
def loadMiniBatch(vidFilePath):
55+
vidName = vidFilePath.split('/')[-3]
56+
frameList = sorted([join(vidFilePath, f) for f in listdir(vidFilePath) if isfile(join(vidFilePath, f)) and f.endswith('.png')])
57+
frameList = sorted(frameList, key=lambda x: int(x.split('/')[-1].split('.')[0]))
58+
its = [iter(frameList), iter(frameList[1:])]
59+
segments = zip(*its)
60+
minibatch = []
61+
for segment in segments:
62+
im = []
63+
numFrames = 0
64+
for j, imFile in enumerate(segment):
65+
img = Image.open(imFile)
66+
img = img.resize((input_width, input_height), Image.ANTIALIAS)
67+
im.append(np.array(img))
68+
numFrames += 1
69+
minibatch.append(np.stack(im))
70+
return vidFilePath, minibatch
71+
72+
def broadcast(tensor, shape):
73+
return tensor + tf.zeros(shape, dtype=tensor.dtype)
74+
75+
def RNNCell(W, B, inputs, state):
76+
"""Most basic RNN: output = new_state = act(W * input + U * state + B)."""
77+
one = constant_op.constant(1, dtype=dtypes.int32)
78+
add = math_ops.add
79+
multiply = math_ops.multiply
80+
sigmoid = math_ops.sigmoid
81+
activation = math_ops.tanh
82+
83+
gate_inputs = math_ops.matmul(array_ops.concat([inputs, state], 1), W)
84+
gate_inputs = nn_ops.bias_add(gate_inputs, B)
85+
output = sigmoid(gate_inputs)
86+
return output, output
87+
88+
def lstm_cell(W, b, forget_bias, inputs, state):
89+
one = constant_op.constant(1, dtype=dtypes.int32)
90+
add = math_ops.add
91+
multiply = math_ops.multiply
92+
sigmoid = math_ops.sigmoid
93+
activation = math_ops.sigmoid
94+
# activation = math_ops.tanh
95+
96+
c, h = array_ops.split(value=state, num_or_size_splits=2, axis=one)
97+
98+
gate_inputs = math_ops.matmul(array_ops.concat([inputs, h], 1), W)
99+
gate_inputs = nn_ops.bias_add(gate_inputs, b)
100+
# i = input_gate, j = new_input, f = forget_gate, o = output_gate
101+
i, j, f, o = array_ops.split(value=gate_inputs, num_or_size_splits=4, axis=one)
102+
103+
forget_bias_tensor = constant_op.constant(forget_bias, dtype=f.dtype)
104+
105+
new_c = add(multiply(c, sigmoid(add(f, forget_bias_tensor))), multiply(sigmoid(i), activation(j)))
106+
new_h = multiply(activation(new_c), sigmoid(o))
107+
new_state = array_ops.concat([new_c, new_h], 1)
108+
109+
return new_h, new_state
110+
111+
112+
jsonData = json.load(open(sys.argv[1]))
113+
vidPath = sys.argv[2]
114+
modelPath = sys.argv[3]
115+
activeLearningInput = sys.argv[4]
116+
117+
if activeLearningInput == "1":
118+
activeLearning = True
119+
else:
120+
activeLearning = False
121+
122+
batch = loadData(jsonData, vidPath)
123+
124+
125+
inputs = tf.placeholder(tf.float32, (None, 224, 224, 3), name='inputs')
126+
learning_rate = tf.placeholder(tf.float32, [])
127+
is_training = tf.placeholder(tf.bool)
128+
129+
# Setup RNN
130+
init_state1 = tf.placeholder(tf.float32, [1, n_hidden1])
131+
W_RNN1 = vs.get_variable("W1", shape=[feature_size+n_hidden1, n_hidden1])
132+
b_RNN1 = vs.get_variable("b1", shape=[n_hidden1], initializer=init_ops.zeros_initializer(dtype=tf.float32))
133+
curr_state1 = init_state1
134+
135+
# Setup LSTM
136+
#init_state1 = tf.placeholder(tf.float32, [1, 2*n_hidden1])
137+
#W_lstm1 = vs.get_variable("W1", shape=[feature_size + n_hidden1, 4*n_hidden1])
138+
#b_lstm1 = vs.get_variable("b1", shape=[4*n_hidden1], initializer=init_ops.zeros_initializer(dtype=tf.float32))
139+
#curr_state1 = broadcast(init_state1, [tf.shape(xs)[0], 2*n_hidden1])
140+
141+
142+
W_fc1 = tf.Variable(tf.truncated_normal([n_hidden1, feature_size], stddev=0.1))
143+
b_fc1 = tf.Variable(tf.constant(0.1, shape=[feature_size]))
144+
145+
scope = 'vgg_16'
146+
fc_conv_padding = 'VALID'
147+
dropout_keep_prob=0.8
148+
149+
r, g, b = tf.split(axis=3, num_or_size_splits=3, value=inputs * 255.0)
150+
VGG_MEAN = [103.939, 116.779, 123.68]
151+
VGG_inputs = tf.concat(values=[b - VGG_MEAN[0], g - VGG_MEAN[1], r - VGG_MEAN[2]], axis=3)
152+
153+
with tf.variable_scope(scope, 'vgg_16', [VGG_inputs]) as sc:
154+
end_points_collection = sc.original_name_scope + '_end_points'
155+
# Collect outputs for conv2d, fully_connected and max_pool2d.
156+
with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
157+
outputs_collections=end_points_collection):
158+
net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
159+
net = slim.max_pool2d(net, [2, 2], scope='pool1')
160+
net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
161+
net = slim.max_pool2d(net, [2, 2], scope='pool2')
162+
net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
163+
net = slim.max_pool2d(net, [2, 2], scope='pool3')
164+
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
165+
net = slim.max_pool2d(net, [2, 2], scope='pool4')
166+
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
167+
net = slim.max_pool2d(net, [2, 2], scope='pool5')
168+
169+
# Use conv2d instead of fully_connected layers.
170+
net = slim.conv2d(net, 4096, [7, 7], padding=fc_conv_padding, scope='fc6')
171+
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
172+
scope='dropout6')
173+
net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
174+
vgg16_Features = tf.reshape(net, (-1,4096))
175+
# Convert end_points_collection into a end_point dict.
176+
end_points = slim.utils.convert_collection_to_dict(end_points_collection)
177+
178+
RNN_inputs = tf.reshape(vgg16_Features[0,:], (-1, feature_size))
179+
180+
h_1, curr_state1 = RNNCell(W_RNN1, b_RNN1, RNN_inputs, curr_state1)
181+
182+
fc1 = tf.matmul(h_1, W_fc1) + b_fc1
183+
print(fc1[0,:].shape, vgg16_Features[1,:].shape)
184+
sseLoss1 = tf.square(tf.subtract(fc1[0,:], vgg16_Features[1,:]))
185+
mask = tf.greater(sseLoss1, learnError * tf.ones_like(sseLoss1))
186+
sseLoss1 = tf.multiply(sseLoss1, tf.cast(mask, tf.float32))
187+
sseLoss = tf.reduce_mean(sseLoss1)
188+
189+
# Optimization
190+
train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(sseLoss)
191+
192+
193+
#####################
194+
### Training loop ###
195+
#####################
196+
197+
init = tf.global_variables_initializer()
198+
199+
saver = tf.train.Saver(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="vgg_16"))
200+
with tf.Session() as sess:
201+
# Initialize parameters
202+
sess.run(init)
203+
saver.restore(sess, "./vgg_16.ckpt")
204+
saver = tf.train.Saver(max_to_keep=0)
205+
avgPredError = 1.0
206+
207+
### In case of interruption, load parameters from the last iteration (ex: 29)
208+
#saver.restore(sess, './model_stacked_lstm_29')
209+
### And update the loop to account for the previous iterations
210+
#for i in range(29,n_epochs):
211+
step = 0
212+
new_state = np.random.uniform(-0.5,high=0.5,size=(1,n_hidden1))
213+
214+
for i in range(n_epochs):
215+
# Run 1 epoch
216+
loss = []
217+
shuffle(batch)
218+
219+
for miniBatchPath in batch:
220+
new_state = np.random.uniform(-0.5,high=0.5,size=(1,n_hidden1))
221+
avgPredError = 0
222+
vidName, minibatches = loadMiniBatch(miniBatchPath)
223+
segCount = 0
224+
predError = collections.deque(maxlen=30)
225+
print('Video:', vidName)
226+
for x_train in minibatches:
227+
segCount += 1
228+
ret = sess.run([train_op, sseLoss, sseLoss1, curr_state1, fc1], feed_dict = {inputs: x_train, is_training: True, init_state1: new_state, learning_rate:lr})
229+
new_state = ret[3]
230+
231+
if activeLearning:
232+
if ret[1]/avgPredError > 1.5:
233+
lr = 1e-8
234+
#print('Gating n_steps=', segCount, avgPredError, ret[1])
235+
else:
236+
#print('NOT Gating n_steps=', segCount, avgPredError, ret[1])
237+
lr = 1e-10
238+
predError.append(ret[1])
239+
avgPredError = np.mean(predError)
240+
241+
path = modelPath + str(i+1)
242+
save_path = saver.save(sess, path)

0 commit comments

Comments
 (0)