added autoencoders, recurrent networks, MTL,etc

Coderx7 · Coderx7 · commit b396bc8dcbdb · 2019-12-08T11:35:07.000+03:30
diff --git a/MultiTaskLearning.py b/MultiTaskLearning.py
@@ -15,7 +15,7 @@
 from torch import optim 
 from torchvision import datasets, transforms, models
 import matplotlib.pyplot as plt 
-from sklearn import metrics
+# from sklearn import metrics
 %matplotlib inline
 
 #%%
@@ -336,20 +336,23 @@ def forward(self, input_imgs):
         return prd_color, prd_gender, prd_region, prd_fighting, prd_alingment
     
     def _set_freeze_(self, status):
-        for p in self.features:
+        for n,p in self.features.named_parameters():
             p.requires_grad = status
+        # for m in self.features.children():
+        #     for p in m.parameters():
+        #         p.requires_grad=status    
+
 
     def freeze_feature_layers(self):
-        self._set_freeze_(True)
+        self._set_freeze_(False)
 
     def unfreeze_feature_layers(self):
-        self._set_freeze_(False)
+        self._set_freeze_(True)
 
 
 model = Resnet18_multiTaskNet(True, True)
 print(model)
 
-
 #%%
 # now lets train our model 
 # we can have different optimizers for each head or a single one for the whole model
diff --git a/Pytorch_basic_introduction.py b/Pytorch_basic_introduction.py
@@ -129,8 +129,9 @@
 # we have other options such as new_tensor, new_empty, new_full, new_zeros
 new_tensor_full = tensor_special.new_full(size=(2,2), fill_value=.3)
 print(new_tensor_full)
-
-
+# we create a new tensor with the same dtype and device 
+new_tensor_newtensor = tensor_special.new_tensor(np.random.uniform(-1,1,size=(2,2)))
+print(new_tensor_newtensor)
 
 #%%
 # now  that we learnt how to create a new tensor, initialize a tensor, specify different dtypes, device, etc
diff --git a/autoencoders.py b/autoencoders.py
@@ -880,7 +880,7 @@ def visualize_grid2(imgs, label, normalize=True):
 # https://jaan.io/what-is-variational-autoencoder-vae-tutorial/
 # https://www.youtube.com/watch?v=uaaqyVS9-rM
 # http://blog.shakirm.com/2015/10/machine-learning-trick-of-the-day-4-reparameterisation-tricks/
-
+# https://www.reddit.com/r/MLQuestions/comments/dl7mya/a_few_more_questions_about_vaes/
 
 # we'll also have an example concerning words(in NLP domain) and see how we can 
 # leverage VAEs in that domain as well. for now lets see how we can implement this
@@ -895,7 +895,20 @@ def visualize_grid2(imgs, label, normalize=True):
 # will help you grasp one aspect very good!
 #  
 # now lets define our VAE model . 
+
+
 class VAE(nn.Module):
+    
+
+    def conv(self, in_dim, out_dim, k_size=3, stride=2, padding=1, batch_norm=True, bias=False):
+        return nn.Sequential(nn.Conv2d(in_dim, out_dim, k_size, stride, padding, bias=bias),
+                             nn.BatchNorm2d(out_dim) if batch_norm else nn.Identity(),
+                             nn.ReLU())
+
+    def deconv(self, in_dim, out_dim, k_size=3, stride=2, padding=1, batch_norm=True, bias=False):
+        return nn.Sequential(nn.ConvTranspose2d(in_dim, out_dim, k_size, stride, padding, bias=bias),
+                             nn.BatchNorm2d(out_dim) if batch_norm else nn.Identity(),
+                             nn.ReLU())
     def __init__(self, embedding_size=100):
         super().__init__()
 
@@ -913,11 +926,26 @@ def __init__(self, embedding_size=100):
         #   density. 
         # We can sample from this distribution to get noisy values of the 
         # representations z .
-      
+
         self.fc1 = nn.Linear(28*28, 512)
-        self.fc1_mu = nn.Linear(512, self.embedding_size) # mean
+        self.encoder = nn.Sequential(self.conv(3,768),
+                                     self.conv(768,512),
+                                     self.conv(512,256),
+                                     nn.MaxPool2d(2,2),#16
+                                     self.conv(256,128),
+                                     self.conv(128,64),
+                                     nn.MaxPool2d(2,2),#8
+                                     self.conv(64, 32),
+                                     nn.MaxPool2d(2,2),#4
+                                     self.conv(32, 16),
+                                     nn.MaxPool2d(2,2),#2x2
+                                     self.conv(16, 8),
+                                     nn.MaxPool2d(2,2),#1x1
+                                     )
+
+        self.fc1_mu = nn.Linear(8, self.embedding_size) # mean
         # we use log since we want to prevent getting negative variance
-        self.fc1_std = nn.Linear(512, self.embedding_size) #logvariance
+        self.fc1_std = nn.Linear(8, self.embedding_size) #logvariance
 
         # our decoder will accept a randomly sampled vector using
         # our mu and std. 
@@ -939,14 +967,23 @@ def __init__(self, embedding_size=100):
         # log-likelihood logpϕ(x∣z) whose units are nats. This measure tells us how 
         # effectively the decoder has learned to reconstruct an input image x given
         # its latent representation z.
-        self.decoder = nn.Sequential( nn.Linear(self.embedding_size, 512), 
-                                      nn.ReLU(),
-                                      nn.Linear(512, 28*28),
-                                      # in normal situations we wouldnt use sigmoid
-                                      # but since we want our values to be in [0,1]
-                                      # we use sigmoid. for loss we will then have  
-                                      # to use, plain BCE (and specifically not BCEWithLogits)
-                                      nn.Sigmoid())
+        self.decoder = nn.Sequential(nn.Linear(self.embedding_size, 8*1*1),
+                                    deconv(8, 768,kernel_size=4,stride=2),
+                                    deconv(768,512,kernel_size=4,stride=2),
+                                    deconv(512, 256 ,kernel_size=4,stride=2),
+                                    deconv(256,128,kernel_size=4,stride=2),
+                                    deconv(128,3,kernel_size=4,stride=2),
+                                    # deconv(64,32,kernel_size=4,stride=2),
+                                    # deconv(32,3,kernel_size=4,stride=2),
+                                    nn.Sigmoid())
+        # self.decoder = nn.Sequential( nn.Linear(self.embedding_size, 512), 
+        #                               nn.ReLU(),
+        #                               nn.Linear(512, 28*28),
+        #                               # in normal situations we wouldnt use sigmoid
+        #                               # but since we want our values to be in [0,1]
+        #                               # we use sigmoid. for loss we will then have  
+        #                               # to use, plain BCE (and specifically not BCEWithLogits)
+        #                               nn.Sigmoid())
 
 
 
@@ -1029,8 +1066,9 @@ def reparamtrization_trick(self, mu, logvar):
         return mu + eps*std
     # 
     def encode(self, input):
-        input = input.view(input.size(0), -1)
-        output = F.relu(self.fc1(input))
+        # input = input.view(input.size(0), -1)
+        # output = F.relu(self.fc1(input))
+        output = self.encoder(input)
         # we dont use activations here
         mu = self.fc1_mu(output)
         log_var = self.fc1_std(output)
@@ -1186,6 +1224,11 @@ def loss_function(outputs, inputs, mu, logvar, reduction ='mean', use_mse = Fals
 #%%
 # now lets train :
 epochs = 50
+dataset_train = datasets.CIFAR10('cifar10', train=True, download=True,transform=transforms.ToTensor())
+dataset_test = datasets.CIFAR10('cifar10', train=False, download=True,transform=transforms.ToTensor())
+
+dataloader_train = torch.utils.data.DataLoader(dataset_train,batch_size=128,shuffle=True)
+dataloader_test = torch.utils.data.DataLoader(dataset_test,batch_size=128,shuffle=False)
 
 embeddingsize = 2
 interval = 2000
diff --git a/basic_Pytorch_introduction_NeuralNetworks.py b/basic_Pytorch_introduction_NeuralNetworks.py
@@ -918,6 +918,10 @@ def forward(self, inputs):
 # Note that torch.nn.ModuleDict.update with other unordered mapping types 
 # (e.g., Python's plain dict) does not preserve the order of the merged mapping.
 
+# important note: the forward pass for this wont happen!
+# becasue there is not a forwardpass implemented for moduledict
+# its just a container. Seqeuntial however, does support forward
+#
 class sequential_net6(nn.Module):
     def __init__(self):
         super().__init__()
@@ -932,7 +936,17 @@ def __init__(self):
         self.model['maxpool1'] = nn.MaxPool2d(2, 2)
 
     def forward(self, inputs):
-        return self.model(inputs)
+        # simply doing : 
+        # return self.model(inputs)
+        # wont work as moduledict dosnt implemet forward()
+        # so you must manually forward all layers here. i.e do 
+        out = inputs
+        # named_children() iterates over all higherlevel blocks/modules only
+        # we dont use modules as biases and weights are also modules which dont
+        # implement forward!!
+        for n,m in self.model.named_children():
+            out =  m(out)
+        return out
 
 # as you can see, our foward function has become a one liner! 
 # whats the benifit of doing this ? 
@@ -1042,7 +1056,7 @@ def forward(self, x):
 
 print(sequentialnet5)
 print(sequentialnet6)
-
+print(sequentialnet6(torch.rand(2,3,24,24)))
 print(sequentialnet7)
 print(sequentialnet8)
 print(sequentialnet9)
@@ -1235,6 +1249,39 @@ def forward(self, input):
 print(e)
 print(acc_val)
 print(acc_train)
+#%%
+# from torchvision import models
+# import torch.nn as nn 
+
+# print(dir(models))
+# # lets use resnet18! by setting pretrained = True, we'll also be down-
+# #loading the imagenet weights. 
+# resnet18 = models.resnet18(pretrained=False)
+# # lets print the model 
+# print(f'\nORIGINAL MODEL : \n{resnet18}\n')
+# resnet18.fc = nn.Linear(resnet18.fc.in_features,10)
+# # we can freeze all layers except the one(s) we want
+# # in one go using named_children like this: 
+# for n,m in resnet18.named_children():
+#     if n!='fc':
+#         print(n)
+#         for param in m.parameters():
+#             param.requires_grad=False
+
+# for n,m in resnet18.named_children():
+#     for param in m.parameters():
+#             print(f'{n} : {param.requires_grad}')
+
+
+# #using named parameters: 
+# for name, param in resnet18.named_parameters():
+#     param.requires_grad = False
+#     print(f'name: {name} requires_grad : {param.requires_grad}')        
+
+# print('\nUnfreezing the new FC layer')
+# for name, param in resnet18.fc.named_parameters():
+#     param.requires_grad = True      
+#     print(f'name: {name} requires_grad : {param.requires_grad}')
 
 #%%
 # OK we are ready to see how fine-tuning works in Pytorch 
diff --git a/recurrent neural networks.py b/recurrent neural networks.py