-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathexperiment_builder.py
463 lines (384 loc) · 22.3 KB
/
experiment_builder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
from comet_ml import OfflineExperiment # needed at top for Comet plugin
from collections import defaultdict, OrderedDict
import torch
import torch.nn as nn
import tqdm
import time
from sklearn.metrics import f1_score, precision_score, recall_score
import torch.nn.functional as F
from utils import *
import configparser
LABEL_MAPPING = {0: 'hateful', 1: 'abusive', 2: 'normal', 3: 'spam'}
DEBUG = False
config = configparser.ConfigParser()
config.read(os.path.join(ROOT_DIR, 'config.ini'))
class ExperimentBuilder(nn.Module):
def __init__(self, network_model, device, hyper_params, data_map,
train_data, valid_data, test_data, experiment_flag,
data_provider, experiment):
"""
Initializes an ExperimentBuilder object. Such an object takes care of running training and evaluation of a deep net
on a given dataset. It also takes care of saving per epoch models and automatically inferring the best val model
to be used for evaluating the test set metrics.
"""
super(ExperimentBuilder, self).__init__()
self.experiment = experiment # comet experiment
self.experiment_flag = experiment_flag
self.experiment_name = hyper_params['experiment_name']
self.model = network_model
self.model.reset_parameters()
self.device = device
self.seed = hyper_params['seed']
self.num_epochs = hyper_params['num_epochs']
self.starting_epoch = 0
self.state = dict()
self.data_provider = data_provider
# re-initialize network parameters
self.data_map = data_map
self.train_data_raw = train_data
self.valid_data_raw = valid_data
self.test_data_raw = test_data
self.train_data = []
self.valid_data = []
self.test_data = []
self.train_data_tweets = None
self.valid_data_tweets = None
self.test_data_tweets = None
self.confusion_matrix = torch.zeros(4, 4) # number of classes
# build extra layer of model
self.preprocess_data()
self.criterion = nn.CrossEntropyLoss().to(self.device) # send the loss computation to the GPU
self.optimizer = torch.optim.Adam(self.model.parameters(), weight_decay=1e-4, lr=hyper_params["learning_rate"])
# self.scheduler = optim.lr_scheduler.CosineAnnealingLR(self.optimizer, T_max=self.num_epochs, eta_min=1e-4)
self.scheduler = None
# Generate the directory names
self.experiment_folder = hyper_params['results_dir']
self.experiment_saved_models = os.path.abspath(os.path.join(self.experiment_folder, "saved_models"))
# Set best models to be at 0 since we are just starting
self.best_val_model_idx = 0
self.best_val_model_criteria = 0.
if not os.path.exists(self.experiment_folder): # If experiment directory does not exist
os.makedirs(self.experiment_folder) # create the experiment directory
if not os.path.exists(self.experiment_saved_models):
os.makedirs(self.experiment_saved_models) # create the experiment saved models directory
def get_num_parameters(self):
total_num_params = 0
for param in self.parameters():
total_num_params += np.prod(param.shape)
return total_num_params
def forward_pass_helper(self, x):
tweet_input, feature_input = x[0], x[1]
if self.experiment_flag == 2 or self.experiment_flag == 4:
# tweet level processing
feature_input = feature_input.to(self.device)
tweet_input = tweet_input.to(self.device)
feature_out = self.model.forward(feature_input, layer_key='feature', flatten_flag=True)
tweet_out = self.model.forward(tweet_input, layer_key='tweet', flatten_flag=True)
out = torch.cat((tweet_out.cpu(), feature_out.cpu()), 1).to(self.device)
return self.model.layer_dict['fc_layer'](out)
elif self.experiment_flag == 5:
# tweet level processing
tweet_input = tweet_input.to(self.device)
tweet_out = self.model.forward(tweet_input, layer_key='tweet', flatten_flag=True)
feature_out = np.sum([self.model.forward(item.to(self.device), layer_key='user-timeline').cpu() for item in feature_input])
out = torch.cat((tweet_out.cpu(), feature_out.cpu()), 1).to(self.device)
return self.model.layer_dict['fc_layer'](out)
else: #experiments 1 & 3
tweet_input = tweet_input.to(self.device)
out = self.model.forward(tweet_input, flatten_flag=True, layer_key='tweet') # forward the data in the model
return self.model.layer_dict['fc_layer'](out)
def run_train_iter(self, x, y, stats, experiment_key='train'):
"""
Receives the inputs and targets for the model and runs a training iteration. Returns loss and accuracy metrics.
:param x: The inputs to the model. A numpy array of shape batch_size, channels, height, width
:param y: The targets for the model. A numpy array of shape batch_size, num_classes
:return: the loss and accuracy for this batch
"""
# sets model to training mode
# (in case batch normalization or other methods have different procedures for training and evaluation)
self.train()
self.optimizer.zero_grad() # set all weight grads from previous training iters to 0
y = y.to(self.device)
out = self.forward_pass_helper(x) # forward the data in the model
loss = self.criterion(out, y)
loss.backward() # backpropagate to compute gradients for current iter loss
self.optimizer.step() # update network parameters
_, predicted = torch.max(out.data, 1) # get argmax of predictions
accuracy = np.mean(list(predicted.eq(y.data).cpu())) # compute accuracy
stats['{}_acc'.format(experiment_key)].append(accuracy)
stats['{}_loss'.format(experiment_key)].append(loss.data.detach().cpu().numpy())
self.compute_f_metrics(stats, y, predicted, experiment_key)
def run_evaluation_iter(self, x, y, stats, experiment_key='valid'):
"""
Receives the inputs and targets for the model and runs an evaluation iterations. Returns loss and accuracy metrics.
:param x: The inputs to the model. A numpy array of shape batch_size, channels, height, width
:param y: The targets for the model. A numpy array of shape batch_size, num_classes
:return: the loss and accuracy for this batch
"""
self.eval() # sets the system to validation mode
y = y.to(self.device)
out = self.forward_pass_helper(x)
loss = self.criterion(out, y)
_, predicted = torch.max(out.data, 1) # get argmax of predictions
accuracy = np.mean(list(predicted.eq(y.data).cpu()))
if experiment_key == 'test':
for t, p in zip(y.data.view(-1), predicted.cpu().view(-1)):
self.confusion_matrix[t.long(), p.long()] += 1
stats['{}_acc'.format(experiment_key)].append(accuracy) # compute accuracy
stats['{}_loss'.format(experiment_key)].append(loss.data.detach().cpu().numpy())
self.compute_f_metrics(stats, y, predicted, experiment_key)
return predicted
def save_model(self, model_save_dir, model_save_name, model_idx):
"""
Save the network parameter state and current best val epoch idx and best val accuracy.
:param model_save_name: Name to use to save model without the epoch index
:param model_idx: The index to save the model with.
:param best_validation_model_idx: The index of the best validation model to be stored for future use.
:param best_validation_model_acc: The best validation accuracy to be stored for use at test time.
:param model_save_dir: The directory to store the state at.
:param state: The dictionary containing the system state.
"""
# Save state each epoch
path = os.path.join(model_save_dir, "{}_{}.pt".format(model_save_name, str(model_idx)))
torch.save(self.state_dict(), f=path)
def load_model(self, model_save_dir, model_save_name, model_idx):
"""
Load the network parameter state and the best val model idx and best val acc to be compared with the future val accuracies, in order to choose the best val model
:param model_save_dir: The directory to store the state at.
:param model_save_name: Name to use to save model without the epoch index
:param model_idx: The index to save the model with.
"""
path = os.path.join(model_save_dir, "{}_{}.pt".format(model_save_name, str(model_idx)))
self.load_state_dict(torch.load(f=path))
def remove_excess_models(self):
dir_list_list = [dir_names for (_, dir_names, _) in os.walk(self.experiment_folder)]
for dir_list in dir_list_list:
if 'saved_models' in dir_list:
path = os.path.abspath(os.path.join(self.experiment_folder, 'saved_models'))
file_list_list = [file_names for (_, _, file_names) in os.walk(path)]
for file_list in file_list_list:
for file in file_list:
epoch = file.split('_')[-1]
epoch = epoch.replace('.pt', '')
if int(epoch) != self.best_val_model_idx:
os.remove(os.path.join(path, file))
@staticmethod
def compute_f_metrics(stats, y_true, predicted, type_key):
f1score_overall = f1_score(
y_true.cpu().detach().numpy(),
predicted.cpu().detach().numpy(),
average='weighted'
)
stats[type_key + '_f_score'].append(f1score_overall)
precision_overall = precision_score(
y_true.cpu().detach().numpy(),
predicted.cpu().detach().numpy(),
average='weighted'
)
stats[type_key + '_precision'].append(precision_overall)
recall_overall = recall_score(
y_true.cpu().detach().numpy(),
predicted.cpu().detach().numpy(),
average='weighted'
)
stats[type_key + '_recall'].append(recall_overall)
f1scores = f1_score(
y_true.cpu().detach().numpy(),
predicted.cpu().detach().numpy(),
average=None
)
precision = precision_score(
y_true.cpu().detach().numpy(),
predicted.cpu().detach().numpy(),
average=None
)
recall = recall_score(
y_true.cpu().detach().numpy(),
predicted.cpu().detach().numpy(),
average=None
)
for i in range(len(f1scores)):
stats[type_key + '_f_score_' + LABEL_MAPPING[i]].append(f1scores[i])
stats[type_key + '_precision_' + LABEL_MAPPING[i]].append(precision[i])
stats[type_key + '_recall_' + LABEL_MAPPING[i]].append(recall[i])
def save_best_performing_model(self, epoch_stats, epoch_idx):
criteria = epoch_stats['valid_f_score_hateful']
if criteria > self.best_val_model_criteria: # if current epoch's mean val acc is greater than the saved best val acc then
self.best_val_model_criteria = criteria # set the best val model acc to be current epoch's val accuracy
self.best_val_model_idx = epoch_idx # set the experiment-wise best val idx to be the current epoch's idx
@staticmethod
def iter_logs(stats, start_time, index):
# Log results to terminal
out_string = "".join(["{}: {:0.4f}\n".format(key, value)
for key, value in stats.items() if key != 'epoch'])
epoch_elapsed_time = (time.time() - start_time) / 60 # calculate time taken for epoch
epoch_elapsed_time = "{:.4f}".format(epoch_elapsed_time)
print("\n===Epoch {}===\n{}===Elapsed time: {} mins===".format(index, out_string, epoch_elapsed_time))
def extract_sample_data(self, sample_ids):
embedded_tweets = []
embedded_context_tweets = []
embedded_topic_words = []
embedded_timeline_list = []
for _id in sample_ids:
embedded_tweet = self.data_map[_id]['embedded_tweet']
if self.experiment_flag == 2:
# concatenates retweet/favorite to tweet
retweet_count, favorite_count = self.data_map[_id]['retweet_count'], self.data_map[_id]['favorite_count']
features = torch.Tensor([[retweet_count, favorite_count] for _ in range(np.array(embedded_tweet).shape[0])])
embedded_tweet = np.concatenate((embedded_tweet, features), -1)
# adds context tweet
embedded_context_tweets.append(self.data_map[_id]['embedded_context_tweet'])
if self.experiment_flag == 3:
# concatenates retweet/favorite to tweet
retweet_count, favorite_count = self.data_map[_id]['retweet_count'], self.data_map[_id]['favorite_count']
features = torch.Tensor([[retweet_count, favorite_count] for _ in range(np.array(embedded_tweet).shape[0])])
embedded_tweet = np.concatenate((embedded_tweet, features), -1)
if self.experiment_flag == 4:
embedded_topic_words.append(self.data_map[_id]['embedded_topic_words'])
if self.experiment_flag == 5:
embedded_timeline_list = [torch.Tensor(tweet).float()
for tweet in self.data_map[_id]['embedded_user_timeline']]
# append main tweet
embedded_tweets.append(embedded_tweet)
# get all tweets as corpus and do LDA from that
if self.experiment_flag == 4:
return torch.Tensor(embedded_tweets).float(), torch.Tensor(embedded_topic_words).float()
if self.experiment_flag == 2:
return torch.Tensor(embedded_tweets).float(), torch.Tensor(embedded_context_tweets).float()
elif self.experiment_flag == 1 or self.experiment_flag == 3: # experiments 1 and 3
return torch.Tensor(embedded_tweets).float(), torch.Tensor(embedded_tweets).float()
elif self.experiment_flag == 5:
return torch.Tensor(embedded_tweets).float(), embedded_timeline_list
@staticmethod
def flatten_embedding(out):
"""
Flattens tweet embedding to have dim (batch_size, 1, embed_dim)
:param out:
:return:
"""
out = F.max_pool1d(out, out.shape[-1])
out = out.permute([0, 2, 1])
return out
def build_model(self, data_sample):
# build model
embedded_tweet, features_tweet = data_sample # first element, tuple, first value in tuple
self.model.build_layers(embedded_tweet.shape, 'tweet')
embedded_tweet_out = self.model.forward(torch.zeros(embedded_tweet.shape), layer_key='tweet')
if self.experiment_flag == 1 or self.experiment_flag == 3:
out = embedded_tweet_out
elif self.experiment_flag == 2 or self.experiment_flag == 4:
self.model.build_layers(features_tweet.shape, 'feature')
features_tweet_out = self.model.forward(torch.zeros(features_tweet.shape), layer_key='feature')
out = torch.cat((embedded_tweet_out, features_tweet_out), 1)
elif self.experiment_flag == 5:
self.model.build_layers(embedded_tweet.shape, 'user-timeline')
user_timeline_tweet_out = self.model.forward(torch.zeros(embedded_tweet.shape), layer_key='user-timeline')
out = torch.cat((embedded_tweet_out, user_timeline_tweet_out), 1)
self.model.build_fc_layer(out.shape)
# send model to device
if torch.cuda.device_count() > 1:
self.model = nn.DataParallel(self.model).cuda()
self.model.to(self.device)
self.model = self.model.module
else:
self.model.to(self.device) # sends the model from the cpu to the gpu
def extract_tweets(self, ids):
return [(self.data_map[_id]['tweet'],self.data_map[_id]['label']) for _id in ids]
def preprocess_data(self):
print("Preprocessing train data")
start = time.time()
self.train_data = [(self.extract_sample_data(x), y) for x, y in self.train_data_raw]
self.train_data_tweets = [self.extract_tweets(x) for x, y in self.train_data_raw]
self.build_model(self.train_data[0][0])
print("Preprocessing train data finished in {:.2f} minutes".format((time.time() - start) / 60))
print("Preprocessing valid data")
self.valid_data = [(self.extract_sample_data(x), y) for x, y in self.valid_data_raw]
self.valid_data_tweets = [self.extract_tweets(x) for x, y in self.valid_data_raw]
print("Preprocessing test data")
self.test_data = [(self.extract_sample_data(x), y) for x, y in self.test_data_raw]
self.test_data_tweets = [self.extract_tweets(x) for x, y in self.test_data_raw]
def run_experiment(self):
"""
Runs experiment train and evaluation iterations, saving the model and best val model and val model accuracy after each epoch
:return: The summary current_epoch_losses from starting epoch to total_epochs.
"""
train_stats = OrderedDict()
for epoch_idx in range(self.num_epochs):
epoch_start_time = time.time()
epoch_stats = defaultdict(list)
with tqdm.tqdm(total=len(self.train_data)) as pbar_train: # create a progress bar for training
for x, y in self.train_data: # get data batches
self.run_train_iter(x=x, y=y, stats=epoch_stats) # take a training iter step
pbar_train.update(1)
pbar_train.set_description(
"{} Epoch {}: f-score-hateful: {:.4f}, accuracy: {:.4f}".format('Train', epoch_idx,
np.mean(epoch_stats[
'train_f_score_hateful']),
np.mean(
epoch_stats['train_acc'])))
with tqdm.tqdm(total=len(self.valid_data)) as pbar_val: # create a progress bar for validation
for x, y in self.valid_data: # get data batches
self.run_evaluation_iter(x=x, y=y, stats=epoch_stats) # run a validation iter
pbar_val.update(1) # add 1 step to the progress bar
pbar_val.set_description(
"{} Epoch {}: f-score-hateful: {:.4f}, accuracy: {:.4f}".format('Valid', epoch_idx,
np.mean(epoch_stats[
'valid_f_score_hateful']),
np.mean(
epoch_stats['valid_acc'])))
# learning rate
if self.scheduler is not None:
self.scheduler.step()
epoch_stats['learning_rate'] = self.optimizer.param_groups[0]['lr']
# save to train stats
for key, value in epoch_stats.items():
epoch_stats[key] = np.mean(value)
if not DEBUG:
self.experiment.log_metric(name=key, value=epoch_stats[key], step=epoch_idx)
epoch_stats['epoch'] = epoch_idx
train_stats["epoch_{}".format(epoch_idx)] = epoch_stats
if DEBUG:
self.iter_logs(epoch_stats, epoch_start_time, epoch_idx)
self.save_model(model_save_dir=self.experiment_saved_models,
model_save_name="train_model",
model_idx=epoch_idx)
self.save_best_performing_model(epoch_stats=epoch_stats, epoch_idx=epoch_idx)
### EXPERIMENTS END ###
# save train statistics
prepare_output_file(filename="{}/{}".format(self.experiment_folder, "train_statistics_{}.csv".format(self.seed)),
output=list(train_stats.values()))
print("Generating test set evaluation metrics with best model index {}".format(self.best_val_model_idx))
self.load_model(model_save_dir=self.experiment_saved_models,
model_idx=self.best_val_model_idx,
model_save_name="train_model")
test_stats = defaultdict(list)
with tqdm.tqdm(total=len(self.test_data)) as pbar_test: # ini a progress bar
for i, (x, y) in enumerate(self.test_data): # sample batch
preds = self.run_evaluation_iter(x=x, y=y, stats=test_stats, experiment_key='test')
pbar_test.update(1) # update progress bar status
pbar_test.set_description("loss: {:.4f}, accuracy: {:.4f}".format(np.mean(test_stats['test_loss']),
np.mean(test_stats['test_acc'])))
for j, pred in enumerate(preds):
print("Pred: {} Label: {}\n{}\n".format(pred,
self.test_data_tweets[i][j][1],
self.test_data_tweets[i][j][0]))
print(self.confusion_matrix)
# save to test stats
for key, value in test_stats.items():
test_stats[key] = np.mean(value)
if not DEBUG:
self.experiment.log_metric(name=key, value=test_stats[key])
merge_dict = dict(list(test_stats.items()) +
list(train_stats["epoch_{}".format(self.best_val_model_idx)].items()))
merge_dict['epoch'] = self.best_val_model_idx
merge_dict['seed'] = self.seed
merge_dict['title'] = self.experiment_name
merge_dict['num_epochs'] = self.num_epochs
for key, value in merge_dict.items():
if isinstance(value, float):
merge_dict[key] = np.around(value, 4)
print(merge_dict)
prepare_output_file(filename="{}/{}".format(self.experiment_folder, "results.csv"),
output=[merge_dict])
self.remove_excess_models()
return train_stats, test_stats