voc training notebook

holli · Oct 2, 2018 · 4710c50 · 4710c50
1 parent f0b9940
commit 4710c50
Show file tree

Hide file tree

Showing 7 changed files with 818 additions and 40 deletions.
diff --git a/README.md b/README.md
@@ -19,9 +19,13 @@ Pretrained weights are available at: **http://www.ollihuotari.com/data/yolov3_py
 - **https://github.com/holli/yolov3_pytorch/blob/master/notebooks/eval_coco_map.ipynb**
   - map metric on coco evaluation data set. Just to make sure that this implementation is close enough to original implementation
 
+# Support / Commits
+
+Submit suggestions or feature requests as a GitHub Issue or Pull Request. Preferably create a test to show whats happening and what should happen.
+
 # Other Implementations
 
-There are some good pytorch implementations previously but they were using original cfg files to create the model. This works well but it's harder to modify and test other approaches. Some of them didn't include yolov3-tiny model or didn't work with using images of different sizes (e.g. 608 pixel sizes instead of default 416). Some better ones include:
+There are some good pytorch implementations previously but many of them were using original cfg files to create the model. This works well but it's harder to modify and test other approaches. Some of them didn't include yolov3-tiny model or didn't work with using images of different sizes (e.g. 608 pixel sizes instead of default 416). Some nicer ones include:
 
 - https://github.com/marvis/pytorch-yolo3
 - https://github.com/andy-yun/pytorch-0.4-yolov3

diff --git a/notebooks/train_voc.ipynb b/notebooks/train_voc.ipynb
diff --git a/notebooks/voc_utils.py b/notebooks/voc_utils.py
@@ -0,0 +1,172 @@
+import torch.nn as nn
+from fastai.imports import *
+from fastai.dataset import *
+from matplotlib import patches, patheffects
+
+def get_voc_md(data_filenames, sz=416, data_max_lines=False, tfms_trn=None, tfms_val=None):
+    if not tfms_trn:
+        #tfms_trn = [RandomRotate(10, tfm_y=TfmType.COORD), RandomLighting(0.20, 0.20), RandomBlur()]
+        #tfms_trn = [RandomLighting(0.20, 0.20), RandomBlur()]
+        tfms_trn = [RandomBlur()]
+        # tfms_trn = image_gen(normalizer=None, denorm=None, sz=sz, crop_type=CropType.RANDOM,
+        tfms_trn = image_gen(normalizer=None, denorm=None, sz=sz, crop_type=CropType.NO,
+                        max_zoom=1.2, tfm_y=TfmType.COORD, tfms=tfms_trn)
+    if not tfms_val:
+        tfms_val = image_gen(normalizer=None, denorm=None, sz=sz, crop_type=CropType.NO,
+        # tfms_val = image_gen(normalizer=None, denorm=None, sz=sz, crop_type=CropType.CENTER,
+                        max_zoom=1,   tfm_y=TfmType.COORD, tfms=[])
+
+    data_lines = []
+    for f in data_filenames:
+        with open(f, 'r') as file:
+            arr = file.readlines()
+        arr = [s.rstrip('\n') for s in arr]
+        data_lines.append(arr)
+
+    if data_max_lines:
+        if type(data_max_lines) == int:
+            data_max_lines = [data_max_lines, data_max_lines]
+        for i in range(len(data_lines)):
+            data_lines[i] = data_lines[i][:data_max_lines[i]]
+            len(data_lines[i])
+
+    datasets = [
+        VocDataset(data_lines[0], transform=tfms_trn, path='', sz=sz), # train
+        VocDataset(data_lines[1], transform=tfms_val, path='', sz=sz), # valid
+        VocDataset(data_lines[0], transform=tfms_val, path='', sz=sz), # fix
+        VocDataset(data_lines[1], transform=tfms_trn, path='', sz=sz), # aug
+        None, None # test datasets
+    ]
+
+    md = ImageData(path = "/tmp", datasets=datasets, bs=32, num_workers=2, classes=VocDataset.CLASS_NAMES)
+    md.trn_dl.pre_pad = md.val_dl.pre_pad = md.fix_dl.pre_pad = md.aug_dl.pre_pad = False
+
+    return md
+
+
+# To be used for example with https://github.com/rafaelpadilla/Object-Detection-Metrics
+# python pascalvoc.py --gtfolder /tmp/ai_mAP_1/ground --detfolder /tmp/ai_mAP_1/pred -gtcoords rel -detcoords rel -imgsize 416,416 --noplot
+def create_detection_files(validation_ds, tmp_dir='/tmp/ai_mAP_1', remove_old=True):
+    for p in ['pred', 'ground']:
+        p = os.path.join(tmp_dir, p)
+        if os.path.exists(p):
+            if remove_old:
+                for f in glob.glob(os.path.join(p, "*.txt")):
+                    os.remove(f)
+        else:
+            os.makedirs(p)
+
+    for i in range(len(md.val_ds.fnames)):
+        imgfile = md.val_ds.fnames[i]
+        img_org = Image.open(imgfile).convert('RGB')
+        img_resized = img_org.resize((sz, sz))
+        img_torch = image2torch(img_resized).cuda()
+        all_boxes = model.predict_img(img_torch)[0]
+        boxes = nms(all_boxes, 0.4)
+
+        fname = os.path.split(imgfile)[-1]
+        fname = fname.replace('.png','.txt').replace('.jpg','.txt')
+        det_fname = os.path.join(tmp_dir, 'pred', fname)
+        with open(det_fname, 'w') as f:
+            for box in boxes:
+                box = np.array([b.item() for b in box])
+                box[:2] -= box[2:4]/2
+                arr = [int(box[-1]), box[-2]] + list(box[0:4])
+                s = ' '.join([str(a) for a in arr]) + '\n'
+                _ = f.write(s)
+
+        g_fname = os.path.join(tmp_dir, 'ground', fname)
+        with open(g_fname, 'w') as f:
+            for box in md.val_ds.get_y(i):
+                box = np.array(box)
+                box[1:3] -= box[-2:]/2
+                arr = [int(box[0])] + list(box[1:5])
+                s = ' '.join([str(a) for a in arr]) + '\n'
+                _ = f.write(s)
+
+
+
+# class VocDataset(Dataset):
+# /home/ohu/koodi/data/voc/VOCdevkit/VOC2007/JPEGImages/000012.jpg
+# /home/ohu/koodi/data/voc/VOCdevkit/VOC2007/labels/000012.txt
+# Parsing from https://pjreddie.com/media/files/voc_label.py
+class VocDataset(FilesDataset):
+    CLASS_NAMES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
+                    'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor')
+
+    def __init__(self, fnames, transform, path, sz):
+        super().__init__(fnames, transform, path)
+        self.sz = sz
+
+    # Data is in center_x, center_y, width, height
+    # VocDataset.read_labels('/home/ohu/koodi/data/voc/VOCdevkit/VOC2007/labels/000009.txt', 0.03)
+    @staticmethod
+    def read_labels(lab_path, min_box_scale=0.03):
+        if os.path.exists(lab_path) and os.path.getsize(lab_path):
+            all_truths = np.loadtxt(lab_path)
+            all_truths = all_truths.reshape(all_truths.size//5, 5) # to avoid single truth problem
+        else:
+            all_truths = np.array([])
+
+        truths = []
+        for t in all_truths:
+            if t[3] < min_box_scale or t[4] < min_box_scale:
+                continue
+            #truths.append([all_truths[i][0], all_truths[i][1], truths[i][2], truths[i][3], truths[i][4]])
+            truths.append(t)
+        return np.array(truths)
+
+
+    def get_y(self, i):
+        path = os.path.join(self.path, self.fnames[i])
+        path = path.replace('images', 'labels').replace('JPEGImages', 'labels').replace('.jpg', '.txt').replace('.png','.txt')
+        # print(path)
+        arr = self.read_labels(path, 0.03)
+        return arr
+
+    def get_c(self):
+        return 20 # class numbers gmm?
+
+    def get(self, tfm, x, y): # override so that tfm only handels part of the thingie
+#         return (x,y) if tfm is None else tfm(x,y)
+        w,h = x.shape[0], x.shape[1]
+        #return x, y
+
+        y1 = y[:, 0:1]
+        y2 = y[:, 1:]
+        y2[:, :2] -= y2[:, 2:]/2 # x1, y1, w, h
+        y2[:, 2:] += y2[:, :2]   # x1, y1, x2, y2
+        y2[:, :] *= [h, w, h, w] # pixels
+        #y2 *= model.width
+
+        # swap y,x to x,y
+        y2[:, 0], y2[:, 1] = y2[:, 1].copy(), y2[:, 0].copy()
+        y2[:, 2], y2[:, 3] = y2[:, 3].copy(), y2[:, 2].copy()
+
+        y2 = y2.reshape(-1)
+
+        x, y2 = tfm(x,y2)
+
+        y2 = y2.reshape(-1, 4)
+
+        y2[:, 2:] -= y2[:, :2] 
+        y2[:, :2] += y2[:, 2:]/2
+
+        # y2 /= model.width
+        y2 /= self.sz
+        # swap y,x to x,y
+        y2[:, 1], y2[:, 0] = y2[:, 0].copy(), y2[:, 1].copy()
+        y2[:, 3], y2[:, 2] = y2[:, 2].copy(), y2[:, 3].copy()
+
+        y = np.concatenate((y1, y2), axis=1)[:50] # max 50 items
+        y = y[(y[:, 3] > 0.001) & (y[:, 4] > 0.001)]
+
+        if y.shape[0] < 50:
+            y_pad = np.zeros((50-y.shape[0], 5))
+            y = np.concatenate((y, y_pad), 0)
+
+        y = y.reshape(-1)
+        return x, y
+
+
+
diff --git a/yolov3_pytorch/utils.py b/yolov3_pytorch/utils.py
@@ -163,20 +163,19 @@ def plot_img_detections(img, result_boxes, **kwargs):
     return plot_img_boxes(img, boxes, classes, extras=extras, **kwargs)
 
 
-def plot_img_data(batch, rows=2, figsize=(12, 8), **kwargs):
-    # print(batch[0].shape, batch[1].shape)
+def plot_img_data(x, y, rows=2, figsize=(12, 8), **kwargs):
     _, axes = plt.subplots(rows, 3, figsize=figsize)
 
     for j, ax in enumerate(axes.flat):
-        if j >= len(batch[1]):
+        if j >= len(y):
             break
-        targets = batch[1][j]
-        if isinstance(batch[0], torch.Tensor):
+        targets = y[j]
+        if isinstance(targets, torch.Tensor):
             targets = targets.clone().reshape(-1,5)
             classes = targets[:, 0].cpu().numpy().astype(int)
         else:
             classes = targets[:, 0].astype(int)
-        plot_img_boxes(batch[0][j], targets[:, 1:], classes, plt_ax=ax, **kwargs)
+        plot_img_boxes(x[j], targets[:, 1:], classes, plt_ax=ax, **kwargs)
 
     plt.tight_layout()
 

diff --git a/yolov3_pytorch/yolov3.py b/yolov3_pytorch/yolov3.py
@@ -37,7 +37,7 @@ def forward_yolo(self, xb):
         x = torch.cat([x, xb[-3]], 1)
         x, y2 = self.yolo_2_prep(x)
 
-        return y0, y1, y2
+        return [y0, y1, y2]
 
 
 ###################################################################

diff --git a/yolov3_pytorch/yolov3_base.py b/yolov3_pytorch/yolov3_base.py
@@ -42,37 +42,45 @@ def predict_img(self, imgs, conf_thresh=0.25):
         outputs = self.forward(imgs)
         return self.boxes_from_output(outputs, conf_thresh)
 
-    # def freeze_backbone(self, requires_grad=False):
-    #     for _, p in self.backbone.named_parameters():
-    #         p.requires_grad = requires_grad
-    # def unfreeze(self):
-    #     for _, p in self.named_parameters():
-    #         p.requires_grad = True
-    # def freeze_info(self, print_all=False):
-    #     d = defaultdict(set)
-    #     for name, param in self.named_parameters():
-    #         if print_all:
-    #             print(f"{name}: {param.requires_grad}")
-    #         else:
-    #             d[name.split('.')[0]].add(param.requires_grad)
-    #     if not print_all:
-    #         for k,v in d.items():
-    #             print(k, ': ', v)        
-
-    # def load_only_backbone(self, h5_path):
-    #     state_dict = torch.load(h5_path)
-
-    #     for k in list(state_dict.keys()):
-    #         if k.startswith(('yolo_0_pre.15', 'yolo_1_pre.20')):
-    #             del state_dict[k]
-
-    #     # Renaming some keys if needed for compatibility
-    #     # state_dict = type(state_dict_org)()
-    #     # for k_old in list(state_dict.keys()):
-    #     #     k_new = k_old.replace('backend', 'backbone')
-    #     #     state_dict[k_new] = state_dict_org[k_old]
-
-    #     return self.load_state_dict(state_dict, strict=False)
+    def freeze_backbone(self, requires_grad=False):
+        for _, p in self.backbone.named_parameters():
+            p.requires_grad = requires_grad
+    def unfreeze(self):
+        for _, p in self.named_parameters():
+            p.requires_grad = True
+    def freeze_info(self, print_all=False):
+        d = defaultdict(set)
+        print("Layer: param.requires_grad")
+        for name, param in self.named_parameters():
+            if print_all:
+                print(f"{name}: {param.requires_grad}")
+            else:
+                d[name.split('.')[0]].add(param.requires_grad)
+        if not print_all:
+            for k,v in d.items():
+                print(k, ': ', v)        
+
+    def load_backbone(self, h5_path):
+        state_old = self.state_dict()
+        state_new = torch.load(h5_path)
+
+        skipped_layers = []
+        for k in list(state_new.keys()):
+            if state_old[k].shape != state_new[k].shape:
+                skipped_layers.append(k)
+                del state_new[k]
+
+        # for k in list(state_dict.keys()):
+        #     if k.startswith(('yolo_0_pre.15', 'yolo_1_pre.20')):
+        #         del state_dict[k]
+
+        # Renaming some keys if needed for compatibility
+        # state_dict = type(state_dict_org)()
+        # for k_old in list(state_dict.keys()):
+        #     k_new = k_old.replace('backend', 'backbone')
+        #     state_dict[k_new] = state_dict_org[k_old]
+
+        return self.load_state_dict(state_new, strict=False), skipped_layers
 
 
 ###################################################################

diff --git a/yolov3_pytorch/yolov3_tiny.py b/yolov3_pytorch/yolov3_tiny.py
@@ -54,7 +54,7 @@ def forward_yolo(self, xb):
         x_up = torch.cat((x_up, x_b_0), 1)
         y1 = self.yolo_1_pre(x_up)
 
-        return y0, y1
+        return [y0, y1]
 
 
 ###################################################################