1
+ import tensorflow as tf
2
+ from model import Model
3
+ import numpy as np
4
+ from preproc import *
5
+ import gensim
6
+ from sklearn .model_selection import train_test_split
7
+
8
+ def get_data (vocabsize ,ninput ,model ):
9
+ tweets = get_tweets ()[1 :]
10
+ full_set = create_training_set (tweets ,n_input = ninput )
11
+ vectors = convertSamplesToVectors (full_set ,model )
12
+ x_set , y_set = createTraining (vectors ,ninput - 1 ,1 ,vocabsize )
13
+ data_train , data_test , labels_train , labels_test = train_test_split (x_set , y_set , test_size = 0.2 , random_state = 42 )
14
+ labels_train = np .reshape (labels_train ,(len (labels_train ),vocabsize ))
15
+ labels_test = np .reshape (labels_test ,(len (labels_test ),vocabsize ))
16
+ return data_train , labels_train ,data_test , labels_test
17
+
18
+ def shuffle (a , b , rand_state ):
19
+ rand_state .shuffle (a )
20
+ rand_state .shuffle (b )
21
+
22
+ def config1 ():
23
+ conf = dict ()
24
+ conf ['n_inputs' ] = 300
25
+ conf ['n_classes' ] = 300
26
+ conf ['n_timesteps' ] = 4
27
+ conf ['hidden_size' ] = 512
28
+ conf ['hidden_size_small' ] = 300
29
+ return conf
30
+
31
+ def get_batch (data_x ,data_y ,current ,batch_size ):
32
+ batchX = data_x [current :current + batch_size ]
33
+ batchY = data_y [current :current + batch_size ]
34
+ return batchX , batchY
35
+
36
+
37
+ def get_var (all_vars ,name ):
38
+ for i in range (len (all_vars )):
39
+ if all_vars [i ].name .startswith (name ):
40
+ return all_vars [i ]
41
+ return None
42
+ def main ():
43
+ run_var = '2_input_d_3_dense_d_50_drop_40_data_b32'
44
+ batch_size = 32
45
+
46
+ #load config and hyper params
47
+ config = config1 ()
48
+
49
+ data = tf .placeholder ("float" ,[None ,config ['n_timesteps' ],config ['n_inputs' ]])
50
+ labels = tf .placeholder ("float" ,[None ,config ['n_classes' ]])
51
+ drop_prob = tf .placeholder (tf .float32 ,name = 'drop_prob' )
52
+ drop_prob2 = tf .placeholder (tf .float32 , name = 'drop_prob2' )
53
+
54
+ #create model
55
+ model = Model (data , labels ,drop_prob ,drop_prob2 ,config )
56
+
57
+ #load word embedding
58
+ word_2_vec_model = gensim .models .Word2Vec .load ('word_model_all_300_50_donald_larger_dataset_better' )
59
+ trainX , trainY , testX , testY = get_data (config ['n_inputs' ], config ['n_timesteps' ] + 1 , word_2_vec_model )
60
+
61
+ with tf .Session () as sess :
62
+ sess .run (tf .global_variables_initializer ())
63
+
64
+ writer = tf .summary .FileWriter ('%s/%s' % ("./real" , run_var ), graph = sess .graph )
65
+ tf .summary .scalar ("loss" ,model .loss )
66
+
67
+
68
+ #tensorboard
69
+ all_vars = tf .global_variables ()
70
+
71
+ lstm_0b = get_var (all_vars ,'prediction/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/bias' )
72
+ lstm_1b = get_var (all_vars ,'prediction/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/bias' )
73
+
74
+ lstm_0k = get_var (all_vars ,'prediction/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel' )
75
+ lstm_1k = get_var (all_vars ,'prediction/rnn/multi_rnn_cell/cell_1/basic_lstm_cell/kernel' )
76
+
77
+
78
+ tf .summary .histogram ("prediction/cell_0/basic_lstm_cell/kernel" , lstm_0k )
79
+ tf .summary .histogram ("prediction/cell_1/basic_lstm_cell/kernel" , lstm_1k )
80
+
81
+ tf .summary .histogram ("prediction/cell_0/basic_lstm_cell/bias" , lstm_0b )
82
+ tf .summary .histogram ("prediction/cell_1/basic_lstm_cell/bias" , lstm_1b )
83
+
84
+ fc0_w = get_var (all_vars ,'prediction/fc0/weights' )
85
+ fc1_w = get_var (all_vars ,'prediction/fc1/weights' )
86
+ fc2_w = get_var (all_vars ,'prediction/fc2/weights' )
87
+
88
+ fc0_b = get_var (all_vars ,'prediction/fc0/biases' )
89
+ fc1_b = get_var (all_vars ,'prediction/fc1/biases' )
90
+ fc2_b = get_var (all_vars ,'prediction/fc2/biases' )
91
+
92
+ tf .summary .histogram ("prediction/fc0/weights" , fc0_w )
93
+ tf .summary .histogram ("prediction/fc1/weights" , fc1_w )
94
+ tf .summary .histogram ("prediction/fc2/weights" , fc2_w )
95
+
96
+ tf .summary .histogram ("prediction/fc0/biases" , fc0_b )
97
+ tf .summary .histogram ("prediction/fc1/biases" , fc1_b )
98
+ tf .summary .histogram ("prediction/fc2/biases" , fc2_b )
99
+
100
+ summary_op = tf .summary .merge_all ()
101
+
102
+ validation_summary = tf .summary .scalar ("validation_loss" ,model .loss )
103
+
104
+ saver = tf .train .Saver ()
105
+ print ('starting' )
106
+ for epoch in range (50 ):
107
+ iter = 0
108
+ while iter < len (trainX ) - batch_size :
109
+ batch_x ,batch_y = get_batch (data_x = trainX , data_y = trainY , current = iter ,batch_size = batch_size )
110
+ batch_x = batch_x .reshape ((batch_size , config ['n_timesteps' ], config ['n_inputs' ]))
111
+ summary , _ = sess .run ([summary_op ,model .optimize ], feed_dict = {data : batch_x , labels : batch_y , drop_prob : 6.0 ,drop_prob2 :5.0 })
112
+ writer .add_summary (summary ,(epoch * len (trainX ))+ iter )
113
+ iter = iter + batch_size
114
+
115
+ los_test , validation_summ = sess .run ([model .loss , validation_summary ], feed_dict = {data : testX , labels : testY , drop_prob : 1.0 , drop_prob2 :1.0 })
116
+ writer .add_summary (validation_summ ,(epoch * len (trainX )))
117
+ print ("epoch" + str (epoch ))
118
+ print ("Testing Loss:" , los_test )
119
+ #print out a few predicted words so we can see improvements
120
+ predction_number = 15
121
+ for i in range (0 ,predction_number ):
122
+ pred , prediction_loss = sess .run ([model .prediction , model .loss ], feed_dict = {data : trainX [i :i + 1 ], labels : trainY [predction_number :predction_number + 1 ], drop_prob : 1.0 , drop_prob2 :1.0 })
123
+
124
+ x_in = np .reshape (trainX [i ],(config ['n_timesteps' ],300 ))
125
+ pred1 = np .reshape (pred [0 ],(1 ,300 ))
126
+ c_word = np .reshape (trainY [i ],(1 ,300 ))
127
+
128
+ sentance = convertSamplesToVectors ([x_in ],word_2_vec_model ,True )
129
+ word_predicted = convertSamplesToVectors ([pred1 ],word_2_vec_model ,True )
130
+ correct_word = convertSamplesToVectors ([c_word ],word_2_vec_model ,True )
131
+ print (" " .join (sentance [0 ]) + " " + word_predicted [0 ][0 ])
132
+ print (" " .join (sentance [0 ]) + " " + correct_word [0 ][0 ])
133
+ print ("similarity score :" + str (word_2_vec_model .similarity (word_predicted [0 ][0 ], correct_word [0 ][0 ])))
134
+ print ('prediction_loss :' + str (prediction_loss ))
135
+ print (" " )
136
+ print ("__________________" )
137
+ print (" " )
138
+ if epoch % 2 == 0 :
139
+ file_name = './trained_models2/_saved_' + str (epoch ) + '_' + str (run_var )
140
+ saver .save (sess ,file_name ,global_step = epoch * len (trainX ))
141
+ print ('saved' )
142
+ print (file_name )
143
+ print (" " )
144
+ shuffle (trainX ,train )
145
+ file_name = './trained_models2/_saved_final' + str (run_var )
146
+ saver .save (sess ,file_name )
147
+
148
+ if __name__ == '__main__' :
149
+ main ()
0 commit comments