-
Notifications
You must be signed in to change notification settings - Fork 30
/
Copy path2-Assignment.py
75 lines (64 loc) · 2.38 KB
/
2-Assignment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#%%
# starting with imports
import torch
#%%
# here is some iteration -- lowering the number
# of hidden parameters until we no longer can get the
# gradiens to vanish
# this is a bit of dynamic model generation, which is
# a kind of meta-learning
#%%
# inputs and outputs, just random values -- we'll work with real
# data in subsequent videos
inputs = torch.rand(1, 1, 64, 64)
outputs = torch.rand(1, 2)
#%%
# keep track of how many learning steps it took
# at each number of parameters
learning_steps = []
#%%
for number_of_parameters in range(256, 1, -1):
class Model(torch.nn.Module):
def __init__(self):
super().__init__()
self.layer_one = torch.nn.Linear(64, number_of_parameters)
self.activation_one = torch.nn.ReLU()
self.layer_two = torch.nn.Linear(number_of_parameters, number_of_parameters)
self.activation_two = torch.nn.ReLU()
# this is a pretty big number -- because we are flattening
# which turned 64 * 256 into a flat array like tensor
self.shape_outputs = torch.nn.Linear(number_of_parameters * 64, 2)
def forward(self, inputs):
buffer = self.layer_one(inputs)
buffer = self.activation_one(buffer)
buffer = self.layer_two(buffer)
buffer = self.activation_two(buffer)
buffer = buffer.flatten(start_dim=1)
return self.shape_outputs(buffer)
model = Model()
loss_function = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
# a limit on how much learning we do
for i in range(10000):
# the optimizer reaches into the model and will zero out
optimizer.zero_grad()
results = model(inputs)
loss = loss_function(results, outputs)
loss.backward()
optimizer.step()
# now -- look for vanishing gradients
gradients = 0.0
for parameter in model.parameters():
gradients += parameter.grad.data.sum()
if abs(gradients) <= 0.0001:
learning_steps.append((number_of_parameters, i, results))
break
#%%
learning_steps
#%%
# looks like it still learns -- but it gets a lot harder when
# the number of parameters converges to the number of outputs
import matplotlib.pyplot as plt
plt.style.use('ggplot')
learning_steps = [step[1] for step in learning_steps]
plt.plot(learning_steps)