-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhelper_functions.py
369 lines (216 loc) · 10.8 KB
/
helper_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
# Imports
import os
import json
import torch
import argparse
from torch import nn
from torchvision import datasets, transforms, models
from collections import OrderedDict
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def load_data_dir():
""" Loading and setting directory
Returns : a dictionary with associated directories
"""
data_dir = 'flowers'
train_dir = data_dir + '/train'
valid_dir = data_dir + '/valid'
test_dir = data_dir + '/test'
data_dirs = {'train': train_dir , 'valid': valid_dir, 'test': test_dir}
return data_dirs
########## Data Transformations
train_transforms = transforms.Compose([transforms.RandomRotation(20),
transforms.RandomResizedCrop(200),
transforms.RandomHorizontalFlip(),
transforms.RandomVerticalFlip(),
transforms.ColorJitter(brightness=0.4, contrast= 0.3,saturation=0.2 , hue=0.1 ),
transforms.CenterCrop(200),
transforms.Resize(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],
[0.229, 0.224, 0.225])])
valid_transforms = transforms.Compose([transforms.Resize(224),
transforms.CenterCrop(200),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],
[0.229, 0.224, 0.225])])
test_transforms = transforms.Compose([transforms.Resize(224),
transforms.CenterCrop(200),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],
[0.229, 0.224, 0.225])])
data_transforms = {'train': train_transforms, 'valid': valid_transforms, 'test': test_transforms}
def load_datasets():
data_dirs = load_data_dir()
train_data = datasets.ImageFolder(data_dirs['train'], transform = data_transforms ['train'])
valid_data = datasets.ImageFolder(data_dirs['valid'], transform = data_transforms['valid'])
test_data = datasets.ImageFolder(data_dirs['test'], transform = data_transforms['test'])
image_datasets = {'train': train_data, 'valid': valid_data, 'test': test_data}
return image_datasets
def load_dataloaders():
image_datasets = load_datasets()
trainloader = torch.utils.data.DataLoader(image_datasets['train'], batch_size = 32, shuffle = True)
validloader = torch.utils.data.DataLoader(image_datasets['valid'], batch_size = 32)
testloader = torch.utils.data.DataLoader(image_datasets['test'], batch_size = 32 )
dataloaders = {'train': trainloader, 'valid': validloader, 'test': testloader}
return dataloaders
# These functions to map class numbers from model.class_to_idx to its proper names from jason file
def get_category_names():
with open('cat_to_name.json', 'r') as f:
cat_to_name = json.load(f)
return cat_to_name
def load_labels():
""" helpere function to extract the proper category name of flowers matching their class names in numbers """
image_datasets = load_datasets()
class_to_idx = image_datasets['train'].class_to_idx
cat_to_name = get_category_names()
labels_dict= {}
# for k, v in model.class_to_idx.items():
for k, v in class_to_idx.items():
for k_1, v_1 in cat_to_name.items():
if k == k_1:
labels_dict[v_1] = v
return labels_dict
def get_input():
parser = argparse.ArgumentParser()
parser.add_argument('--arch', type = str, default = 'densenet', choices=['densenet','alexnet','vgg'],
help = 'Model Architectures')
parser.add_argument('--lr', type = float, default = 0.001,
help = 'Learning Rate')
parser.add_argument('--h_units', type = int, default = 1024,
help = 'Number of hidden units')
parser.add_argument('--epochs', type = int, default = 3,
help = 'Number of Epochs')
parser.add_argument('--disable_gpu', action= 'store_true',
help = 'Disabling GPU')
in_args = parser.parse_args()
assert in_args.epochs > 0, " invalid value for number of epochs"
return in_args
in_model=get_input().arch.lower()
def classifier( in_model='densenet', dropout = 0.5, hidden_units = 1024):
""" This function has defaulted values in case the user has not picked something sothat it would not crash
it will take the user input and search for the model that matched the user selection and based on the preprocess
Args : user selection of a model name from 3 sugeestions
Returns : A model based on what was provided by the user or default vgg model after fitting it to the data
"""
if in_model == 'densenet':
model = models.densenet121(pretrained= True) # 1024
in_feautes = 1024
elif in_model == 'alexnet':
model = models.alexnet(pretrained= True) # 9216
in_feautes = 9216
elif in_model == 'vgg':
model = models.vgg19(pretrained= True) # input is 25088
in_feautes = 25088
for param in model.parameters():
param.requires_grad = False
# replicate the classifier of the pretrained model to fit our data
classifier = nn.Sequential(OrderedDict([
("fc1", nn.Linear(in_feautes, hidden_units)),
("relu", nn.ReLU()),
("dropout", nn.Dropout(dropout)),
("fc2", nn.Linear(hidden_units, 102)), # our out_features will be the same no matter what model we pick to represent the 102 classes of flowers
("output", nn.LogSoftmax(dim=1))]))
model.classifier = classifier
return model
def training_loop (model, train_dataloader,valid_dataloader, criterion, optimizer, device=device):
model.to(device)
model.train()
train_loss = 0 # track our training loss
train_accuracy = 0
steps = 0
print_every = 26
#loop through our data
for X, y in train_dataloader:
#make sure that we move our data to the GPU if it is available
X = X.to(device)
y = y.to(device)
steps+=1
#get the log probabilities
y_pred = model(X)
#use the log probabilities to get our loss
loss = criterion(y_pred, y)
#keep track of our training loss
train_loss += loss.item()
train_accuracy += torch.mean(torch.eq(y_pred.argmax(dim=1), y).type(torch.FloatTensor))
# zero the gradient
optimizer.zero_grad()
# backpropagation
loss.backward()
# gardient descent
optimizer.step()
if steps % print_every == 0:
train_loss /= print_every
train_accuracy /= len(train_dataloader)
model.eval()
v_loss = 0
v_accuracy = 0
with torch.no_grad():
for X, y in valid_dataloader:
#Setting data to the target device
X = X.to(device)
y = y.to(device)
# Do forward pass
y_pred = model(X)
#Calculate the loss
loss = criterion(y_pred, y)
v_loss += loss.item()
v_accuracy += torch.mean(torch.eq(y_pred.argmax(dim=1), y).type(torch.FloatTensor))
v_loss/= len(valid_dataloader)
v_accuracy /= len(valid_dataloader)
v_accuracy*=100
print(f"Train Loss : {train_loss : .5f} | Train Accuracy: { train_accuracy : .2f}% | Validation Loss : {v_loss : .5f} | Valiadtion Accuracy: {v_accuracy : .2f}%\n")
def testing_model(model, data_loader, criterion, device=device):
""" Returns a dictionary containing the results of model predicting on our dataloader of our test set """
loss = 0
accuracy = 0
model.to(device)
model.eval()
with torch.inference_mode():
for X, y in data_loader:
#set them to the device
X = X.to(device)
y = y.to(device)
# Make predictions
y_pred = model(X)
#calculate the loss
loss = criterion(y_pred, y)
loss += loss.item()
accuracy += torch.mean(torch.eq(y_pred.argmax(dim=1), y).type(torch.FloatTensor))
loss/= len (data_loader)
accuracy /= len(data_loader)
accuracy *= 100
#Return the results as a dictionary
return {"model_name" :type(model).__name__ , # only works when model was created with a class
"model_loss": loss.item(),
"model_accuracy" : accuracy}
# Helping function to validate user input
def vaildating_input(answer):
# answer.lower()
answer = answer.lower()
if answer == 'y' or answer =='n':
print(f" Your choice is {answer}, and working on it\n")
return answer
else:
print(f"Your choice is {answer} and does not match the provided options\n")
# loads a checkpoint and rebuilds the model
def load_checkpoint(file_path):
checkpoint = torch.load(file_path)
model_name = checkpoint['model_name']
state_dict = checkpoint['state_dict']
epochs= checkpoint['epochs']
class_to_idx = checkpoint['class_to_idx']
optimizer= checkpoint['optimizer']
classifier = checkpoint['classifier']
if model_name =='DenseNet':
model = models.densenet121(pretrained= True)
elif model_name == 'VGG':
model = models.vgg19(pretrained = True)
elif model_name =='AlexNet':
model = models.alexnet(pretrained= True)
for param in model.parameters():
param.requires_grad = False
model.classifier= classifier
model.class_to_idx = class_to_idx
model.load_state_dict(state_dict)
return model