Skip to content

Commit 142cadf

Browse files
committed
Concatenate weight matrices for one big
1 parent 257e8aa commit 142cadf

File tree

1 file changed

+65
-54
lines changed

1 file changed

+65
-54
lines changed

lstm.py

+65-54
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
Description: A minimal LSTM layer for use in TensorFlow networks.
66
"""
77
import tensorflow as tf
8-
8+
import numpy as np
9+
from pprint import pprint
910

1011
def weight_variable(shape):
1112
"""
@@ -43,57 +44,40 @@ class LayerLSTM(object):
4344
Returns:
4445
None, but LayerLSTM.h and LayerLSTM.c are useful to pull out.
4546
"""
46-
def __init__(
47-
self,
48-
x, # Input to LSTM. Might be a placeholder, might not.
49-
init_c, # Initial C state of LSTM. Definite placeholder.
50-
init_h,
51-
hidden_dim
52-
):
47+
def __init__(self, xs, init_c, init_h):
5348
"""
5449
Function: __init__(self, args)
5550
Args:
5651
All the args passed through to instantiate LayerLSTM.
5752
Returns:
58-
None, but it does step the LSTM forward one, defining self.h and self.c
53+
None
5954
"""
60-
# Pass in input and initial LSTM states.
61-
self.x = x
55+
self.xs = xs
6256
# Be sure not to assign init_c and init_h any values
63-
# They may very well be placeholders.
57+
6458
self.init_c = init_c
6559
self.init_h = init_h
6660
# Common parameters
67-
self.input_dim = int(x.get_shape()[-1])
68-
self.hidden_dim = hidden_dim
69-
# Define counters.
70-
# self.current_step = 0
71-
self.step()
61+
# input_dim = tf.shape(x)[-1]
62+
# hidden_dim = tf.shape(init_c)[-1]
63+
self.input_dim = xs.get_shape().as_list()[-1]
64+
n_steps = xs.get_shape().as_list()[0]
65+
self.hidden_dim = init_c.get_shape().as_list()[-1]
66+
self.counter = 0
67+
self.hs = []
68+
self.cs = []
69+
for k in range(n_steps):
70+
self.step(tf.expand_dims(xs[k,:], 0))
71+
self.hs.append(self.h)
72+
self.cs.append(self.c)
73+
self.H = tf.concat(0, self.hs)
74+
self.C = tf.concat(0, self.cs)
7275

73-
def step(self):
74-
"""
75-
Function: step(self)
76-
Args:
77-
self: The args attached to self are enough to step network forward
78-
through the LSTM layer.
79-
Returns:
80-
None: Just grab self.h and self.c from instance of LayerLSTM.
81-
"""
82-
##############################################
83-
### Detatch Paramters ###
84-
##############################################
76+
def step(self, x):
8577
input_dim = self.input_dim
8678
hidden_dim = self.hidden_dim
87-
x = self.x
88-
init_h = self.init_h
89-
init_c = self.init_c
9079

9180

92-
##############################################
93-
### Define Weights ###
94-
##############################################
95-
# Define weight matrices and bias vectors.
96-
9781
# Input gate.
9882
W_i = weight_variable([input_dim, hidden_dim])
9983
U_i = weight_variable([hidden_dim, hidden_dim])
@@ -120,24 +104,51 @@ def step(self):
120104
# We have to define expressions self.h and self.c
121105
# The initial h and c states are possibly placeholders.
122106

107+
W = tf.concat(1,[W_i, W_f, W_c, W_o])
108+
109+
U = tf.concat(1,[U_i, U_f, U_c, U_o])
110+
111+
B = tf.concat(0, [b_i, b_f, b_c, b_o])
112+
if self.counter < 1:
113+
H = tf.matmul(x, W) + tf.matmul(self.init_h, U) + B
114+
else:
115+
H = tf.matmul(x, W) + tf.matmul(self.h, U) + B
116+
117+
118+
i, f, c, o = tf.split(1,4,H)
119+
123120
# Input gate activation.
124-
ingate = tf.nn.sigmoid(
125-
tf.matmul(x, W_i) + tf.matmul(init_h, U_i) + b_i
126-
)
127-
# Candidate gate activation.
128-
cgate = tf.nn.tanh(
129-
tf.matmul(x, W_c) + tf.matmul(init_h, U_c) + b_c
130-
)
131-
# Forget gate activation.
132-
fgate = tf.nn.sigmoid(
133-
tf.matmul(x, W_f) + tf.matmul(init_h, U_f) + b_f
134-
)
135-
# We make a new candidate state and attach to self.
136-
self.c = tf.mul(ingate, cgate) + tf.mul(fgate, init_c)
137-
138-
# Use the new c state to compute output gate activation.
139-
ogate = tf.nn.sigmoid(tf.matmul(x, W_o) + tf.matmul(init_h, U_o) + \
140-
tf.matmul(self.c, V_o) + b_o)
121+
igate = tf.nn.sigmoid(i)
122+
fgate = tf.nn.sigmoid(f)
123+
cgate = tf.nn.tanh(c)
124+
if self.counter < 1:
125+
self.c = tf.mul(igate, cgate) + tf.mul(fgate, self.init_c)
126+
else:
127+
self.c = tf.mul(igate, cgate) + tf.mul(fgate, self.c)
141128

129+
ogate = tf.nn.sigmoid(o + tf.matmul(self.c, V_o))
142130
# Compute a new value of h to expose to class.
143131
self.h = tf.mul(ogate, tf.nn.tanh(self.c))
132+
self.counter += 1
133+
134+
def test_LayerLSTM():
135+
n_in = 400
136+
n_hid = 40
137+
n_steps = 25
138+
xs = tf.placeholder(tf.float32, shape=[n_steps, n_in])
139+
init_c = tf.placeholder(tf.float32, shape=[1,n_hid])
140+
init_h = tf.placeholder(tf.float32, shape=[1,n_hid])
141+
lstm = LayerLSTM(xs, init_c, init_h)
142+
sess = tf.Session()
143+
sess.run(tf.global_variables_initializer())
144+
feed_dict={lstm.init_c:np.random.rand(1,n_hid),
145+
lstm.init_h:np.random.rand(1,n_hid),
146+
lstm.xs:np.random.rand(n_steps,n_in)}
147+
C, H = sess.run([lstm.H, lstm.C],feed_dict=feed_dict)
148+
print(C)
149+
print(H)
150+
print(H.shape)
151+
152+
153+
if __name__ == "__main__":
154+
test_LayerLSTM()

0 commit comments

Comments
 (0)