merge conflicts fix

kprokofi · kprokofi · commit dda89d186635 · 2021-04-19T15:54:53.000+03:00
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -14,7 +14,10 @@ jobs:
         python-version: [3.7]
 
     steps:
-    - uses: actions/checkout@v2
+    - name: Checkout repository and submodules
+      uses: actions/checkout@v2
+      with:
+        submodules: recursive
     - name: Set up Python ${{ matrix.python-version }}
       uses: actions/setup-python@v2
       with:
@@ -33,6 +36,12 @@ jobs:
       run: |
         python -m pip install --upgrade pip pytest pylint
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+    - name: Install torchdet3d
+      run: |
+        python setup.py develop
+    - name: Testing with pytest
+      run: |
+        python -m pytest . -s
     - name: Linting with pylint
       run: |
         python tests/run_pylint.py
diff --git a/README.md b/README.md
@@ -7,7 +7,7 @@ Training includes the following stages:
 - Training a 2d bounding box detection model
 - Training a 3d bounding box regression model
 
-Trained models can be deployed on CPU using [OpenVINO](https://docs.openvinotoolkit.org) framework and then run in [live demo]().
+Trained models can be deployed on CPU using [OpenVINO](https://docs.openvinotoolkit.org) framework and then run in [live demo](demo/demo.py).
 
 ## Installation guide
 ```bash
diff --git a/annotation_converters/objectron_2_coco.py b/annotation_converters/objectron_2_coco.py
@@ -87,9 +87,9 @@ def save_2_coco(output_root, subset_name, data_info, obj_classes, fps_divisor,
     ann_folder = osp.join(output_root, 'annotations')
     img_folder = osp.join(output_root, 'images')
     if not osp.isdir(ann_folder):
-        os.mkdir(ann_folder)
+        os.makedirs(ann_folder, exist_ok=True)
     if not osp.isdir(img_folder):
-        os.mkdir(img_folder)
+        os.makedirs(img_folder, exist_ok=True)
 
     img_id = 0
     ann_id = 0
@@ -142,8 +142,9 @@ def save_2_coco(output_root, subset_name, data_info, obj_classes, fps_divisor,
                 frames[frame_idx] = cv.resize(frames[frame_idx], (w, h))
                 for kp_pixel in keypoints[0]:
                     cv.circle(frames[frame_idx], (kp_pixel[0], kp_pixel[1]), 5, (255, 0, 0), -1)
-                for kp_pixel in keypoints[1]:
-                    cv.circle(frames[frame_idx], (kp_pixel[0], kp_pixel[1]), 5, (0, 0, 255), -1)
+                if len(keypoints) > 1:
+                    for kp_pixel in keypoints[1]:
+                        cv.circle(frames[frame_idx], (kp_pixel[0], kp_pixel[1]), 5, (0, 0, 255), -1)
                 for bbox in bboxes:
                     if bbox is not None:
                         cv.rectangle(frames[frame_idx], (bbox[0], bbox[1]),
diff --git a/annotation_converters/objectron_helpers.py b/annotation_converters/objectron_helpers.py
@@ -91,7 +91,7 @@ def grab_frames(video_file, frame_ids, use_opencv=True):
                 '-pix_fmt', 'rgb24', '-vcodec', 'rawvideo', '-vsync', 'vfr', '-'
             ]
             pipe = subprocess.Popen(
-                command, stdout=subprocess.PIPE, bufsize=151 * frame_size)
+                command, stdout=subprocess.PIPE, bufsize=151 * frame_size, stderr=subprocess.DEVNULL)
             current_frame = np.frombuffer(
                 pipe.stdout.read(frame_size), dtype='uint8').reshape(height, width, 3)
             pipe.stdout.flush()
diff --git a/demo/demo.py b/demo/demo.py
@@ -177,7 +177,7 @@ def main():
                         help='MKLDNN (CPU)-targeted custom layers.Absolute path to a shared library with the kernels '
                              'impl.', type=str, default=None)
     parser.add_argument('--write_video', action='store_true',
-                        help='wether or not to record demo video')
+                        help='whether to save a demo video or not')
     args = parser.parse_args()
 
     if args.cam_id >= 0:
diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,5 @@
 albumentations
-attrdict
+addict
 opencv-python
 numpy
 sklearn
@@ -18,4 +18,5 @@ efficientnet_lite1_pytorch_model
 efficientnet_lite2_pytorch_model
 optuna
 pylint
-isort
+isort
+pytest
diff --git a/tests/__init__.py b/tests/__init__.py
diff --git a/tests/run_pylint.py b/tests/run_pylint.py
@@ -12,9 +12,9 @@
         'configs/detection',
         'torchdet3d/models',
         'build',
-        'deprecated'
-        '.history/',
-        'torchdet3d/models'
+        'deprecated',
+        '.history',
+        'torchdet3d/models',
     ]
 
     to_pylint = []
diff --git a/tests/test_geometry.py b/tests/test_geometry.py
@@ -0,0 +1,40 @@
+import numpy as np
+
+from torchdet3d.utils import (lift_2d, get_default_camera_matrix,
+                              convert_camera_matrix_2_ndc, project_3d_points,
+                              convert_2d_to_ndc)
+
+
+from objectron.dataset import iou
+from objectron.dataset import box
+
+
+class TestCasesGeometry:
+    test_kps = np.array([[0.47714591, 0.47491544],
+                         [0.73884577, 0.39749265],
+                         [0.18508956, 0.40002537],
+                         [0.74114597, 0.48664019],
+                         [0.18273196, 0.48833901 ],
+                         [0.64639187, 0.46719882],
+                         [0.32766378, 0.46827659],
+                         [0.64726073, 0.51853681],
+                         [0.32699507, 0.51933688]])
+    EPS = 1e-5
+    IOU_THR = 0.5
+
+    def test_reprojection_error(self):
+        kps_3d = lift_2d([self.test_kps], portrait=True)[0]
+        reprojected_kps = project_3d_points(kps_3d, convert_camera_matrix_2_ndc(get_default_camera_matrix()))
+        test_kps_ndc = convert_2d_to_ndc(self.test_kps, portrait=True)
+        assert np.any(np.linalg.norm(test_kps_ndc - reprojected_kps, axis=1) < self.EPS)
+
+    def test_3d_iou_stability(self):
+        np.random.seed(10)
+        noisy_kps = np.clip(self.test_kps + 0.01*np.random.rand(*self.test_kps.shape), 0, 1)
+        lifted_3d_sets = lift_2d([self.test_kps, noisy_kps], portrait=True)
+
+        b1 = box.Box(vertices=lifted_3d_sets[0])
+        b2 = box.Box(vertices=lifted_3d_sets[1])
+
+        loss = iou.IoU(b1, b2)
+        assert loss.iou() > self.IOU_THR
diff --git a/torchdet3d/__init__.py b/torchdet3d/__init__.py
@@ -3,13 +3,13 @@
 import sys
 import os
 
-from .version import __version__
-
 module_path = os.path.abspath(os.path.join(os.path.dirname('__init__.py'), '3rdparty/Objectron'))
 if module_path not in sys.path:
     sys.path.append(module_path)
 
+#pylint: disable = wrong-import-position
 from torchdet3d import builders, evaluation, dataloaders, trainer, models, utils, losses
+from .version import __version__
 
 __author__ = 'Sovrasov Vladislav, Prokofiev Kirill'
 __description__ = 'A library for deep learning 3D object detection in PyTorch'
diff --git a/torchdet3d/evaluation/evaluate.py b/torchdet3d/evaluation/evaluate.py
@@ -7,7 +7,7 @@
 from tqdm import tqdm
 from copy import deepcopy
 
-from .metrics import compute_accuracy, compute_average_distance, compute_metrics_per_cls
+from .metrics import compute_accuracy, compute_average_distance, compute_metrics_per_cls, compute_2d_based_iou
 from torchdet3d.utils import (AverageMeter, mkdir_if_missing, draw_kp, OBJECTRON_CLASSES)
 from torchdet3d.builders import build_augmentations
 from torchdet3d.dataloaders import Objectron
@@ -75,15 +75,17 @@ def visual_test(self):
                     RGB=False,
                     normalized=False,
                     label=label)
-
+    @torch.no_grad()
     def val(self, epoch=None):
         ''' procedure launching main validation '''
         ADD_meter = AverageMeter()
         SADD_meter = AverageMeter()
         ACC_meter = AverageMeter()
+        IOU_meter = AverageMeter()
         ADD_cls_meter = [AverageMeter() for cl in range(self.num_classes)]
         SADD_cls_meter = [AverageMeter() for cl in range(self.num_classes)]
         acc_cls_meter = [AverageMeter() for cl in range(self.num_classes)]
+        IOU__cls_meter = [AverageMeter() for cl in range(self.num_classes)]
 
         # switch to eval mode
         self.model.eval()
@@ -95,16 +97,19 @@ def val(self, epoch=None):
             pred_kp, pred_cats = self.model(imgs, gt_cats)
             # measure metrics
             ADD, SADD = compute_average_distance(pred_kp, gt_kp)
+            IOU = compute_2d_based_iou(pred_kp, gt_kp)
+            acc = compute_accuracy(pred_cats, gt_cats)
+
             for cl, ADD_cls, SADD_cls, acc_cls in compute_metrics_per_cls(pred_kp, gt_kp, gt_cats, pred_cats):
                 ADD_cls_meter[cl].update(ADD_cls, imgs.size(0))
                 SADD_cls_meter[cl].update(SADD_cls, imgs.size(0))
                 acc_cls_meter[cl].update(acc_cls, imgs.size(0))
 
-            acc = compute_accuracy(pred_cats, gt_cats)
             # record loss
             ADD_meter.update(ADD, imgs.size(0))
             SADD_meter.update(SADD, imgs.size(0))
             ACC_meter.update(acc, imgs.size(0))
+            IOU_meter.update(IOU)
             if epoch is not None:
                 # update progress bar
                 loop.set_description(f'Val Epoch [{epoch}/{self.max_epoch}]')
@@ -136,6 +141,7 @@ def val(self, epoch=None):
               f"{ep_mess}"
               f"ADD overall ---> {ADD_meter.avg}\n"
               f"SADD overall ---> {SADD_meter.avg}\n"
+              f"IOU ---> {IOU_meter.avg}\n"
               f"classification accuracy overall ---> {ACC_meter.avg}\n"
               f"{per_class_metr_message}")
 
diff --git a/torchdet3d/evaluation/metrics.py b/torchdet3d/evaluation/metrics.py
@@ -1,4 +1,12 @@
 import torch
+import scipy
+import numpy as np
+
+from objectron.dataset import iou
+from objectron.dataset import box
+
+from torchdet3d.utils import lift_2d
+
 
 def compute_average_distance(pred_kp, gt_kp, num_keypoint=9, **kwargs):
     """Computes Average Distance (ADD) metric."""
@@ -41,3 +49,21 @@ def compute_metrics_per_cls(pred_kp, gt_kp, gt_cats, pred_cats, **kwargs):
         computed_metrics.append((cl, ADD, SADD, acc))
 
     return computed_metrics
+
+def compute_2d_based_iou(pred_kp: torch.Tensor, gt_kp: torch.Tensor):
+    assert len(pred_kp.shape) == 3
+    bs = pred_kp.shape[0]
+    pred_kp_np = pred_kp.cpu().numpy()
+    gt_kp_np = gt_kp.cpu().numpy()
+    total_iou = 0
+    for i in range(bs):
+        kps_3d = lift_2d([pred_kp_np[i], gt_kp_np[i]], portrait=True)
+        b_pred = box.Box(vertices=kps_3d[0])
+        b_gt = box.Box(vertices=kps_3d[1])
+        try:
+            total_iou += iou.IoU(b_pred, b_gt).iou()
+        except scipy.spatial.qhull.QhullError:
+            pass
+        except np.linalg.LinAlgError:
+            pass
+    return total_iou / bs
diff --git a/torchdet3d/utils/__init__.py b/torchdet3d/utils/__init__.py
@@ -1,2 +1,3 @@
 from .utils import *
 from .transforms import *
+from .geometry import *
diff --git a/torchdet3d/utils/geometry.py b/torchdet3d/utils/geometry.py
@@ -0,0 +1,138 @@
+from typing import List, Tuple
+
+import numpy as np
+
+
+__epnp_alpha__ = np.array([[4, -1, -1, -1],
+                           [2, -1, -1,  1],
+                           [2, -1,  1, -1],
+                           [0, -1,  1,  1],
+                           [2,  1, -1, -1],
+                           [0,  1, -1,  1],
+                           [0,  1,  1, -1],
+                           [-2, 1,  1,  1]])
+
+
+def get_default_camera_matrix():
+    return np.array([[1, 0, 0.5],
+                     [0, 1, 0.5],
+                     [0, 0,  1]])
+
+
+def project_3d_points(points: np.array, camera_matrix: np.array):
+    assert len(points.shape) == 2
+    projection = np.matmul(camera_matrix, points.T).T
+    projection /= -projection[:,2].reshape(-1, 1)
+    return projection[:, :-1]
+
+
+def convert_camera_matrix_2_ndc(matrix: np.array, img_shape: Tuple[int, int]=(1, 1)):
+    ndc_mat = np.copy(matrix)
+    ndc_mat[0, 0] *= 2.0 / img_shape[0]
+    ndc_mat[1, 1] *= 2.0 / img_shape[1]
+
+    ndc_mat[0, 2] = -ndc_mat[0, 2] * 2.0 / img_shape[0]  + 1.0
+    ndc_mat[1, 2] = -ndc_mat[1, 2] * 2.0 / img_shape[1]  + 1.0
+
+    return ndc_mat
+
+
+def convert_2d_to_ndc(points: np.array, portrait: bool=False):
+    converted_points = np.zeros_like(points)
+    if portrait:
+        converted_points[:, 0] = points[:, 1] * 2 - 1
+        converted_points[:, 1] = points[:, 0] * 2 - 1
+    else:
+        converted_points[:, 0] = points[:, 0] * 2 - 1
+        converted_points[:, 1] = 1 - points[:, 1] * 2
+    return converted_points
+
+
+def lift_2d(keypoint_sets: List[np.array],
+            camera_matrix: np.array=get_default_camera_matrix(),
+            portrait: bool=False) -> List[np.array]:
+    """
+    Function takes normalized 2d coordinates of 2d keypoints on the image plane,
+    camera matrix in normalized image space and outputs lifted 3d points in camera coordinates,
+    which are defined up to an unknown scale factor
+    """
+    ndc_cam_mat = convert_camera_matrix_2_ndc(camera_matrix)
+    fx = ndc_cam_mat[0, 0]
+    fy = ndc_cam_mat[1, 1]
+    cx = ndc_cam_mat[0, 2]
+    cy = ndc_cam_mat[1, 2]
+
+    lifted_keypoint_sets = []
+
+    for kp_set in keypoint_sets:
+        m = np.zeros((16, 12))
+        assert len(kp_set) == 9
+
+        for i in range(8):
+            kp = kp_set[i + 1]
+            # Convert 2d point from normalized screen coordinates [0, 1] to NDC coordinates([-1, 1]).
+            if portrait:
+                u = kp[1] * 2 - 1
+                v = kp[0] * 2 - 1
+            else:
+                u = kp[0] * 2 - 1
+                v = 1 - kp[1] * 2
+
+            for j in range(4):
+                # For each of the 4 control points, formulate two rows of the
+                # m matrix (two equations).
+                control_alpha = __epnp_alpha__[i, j]
+                m[i * 2, j * 3] = fx * control_alpha
+                m[i * 2, j * 3 + 2] = (cx + u) * control_alpha
+                m[i * 2 + 1, j * 3 + 1] = fy * control_alpha
+                m[i * 2 + 1, j * 3 + 2] = (cy + v) * control_alpha
+
+        mt_m = np.matmul(m.T, m)
+        w, v = np.linalg.eigh(mt_m)
+        assert w.shape[0] == 12
+        control_matrix = v[:, 0].reshape(4, 3)
+        # All 3d points should be in front of camera (z < 0).
+        if control_matrix[0, 2] > 0:
+            control_matrix = -control_matrix
+
+        lifted_kp_set = []
+        lifted_kp_set.append(control_matrix[0, :])
+        vertices = np.matmul(__epnp_alpha__, control_matrix)
+
+        for i in range(8):
+            lifted_kp_set.append(vertices[i, :])
+
+        lifted_kp_set = np.array(lifted_kp_set)
+        lifted_keypoint_sets.append(lifted_kp_set)
+
+    return lifted_keypoint_sets
+
+
+def draw_boxes(boxes=[], clips=[], colors=['r', 'b', 'g', 'k']):
+    """Draw a list of boxes.
+        The boxes are defined as a list of vertices
+    """
+    import matplotlib.pyplot as plt
+    from objectron.dataset import box
+
+    fig = plt.figure(figsize=(10, 10))
+    ax = fig.add_subplot(111, projection='3d')
+    for i, b in enumerate(boxes):
+        x, y, z = b[:, 0], b[:, 1], b[:, 2]
+        ax.scatter(x, y, z, c='r')
+        for e in box.EDGES:
+            ax.plot(x[e], y[e], z[e], linewidth=2, c=colors[i % len(colors)])
+
+    if clips:
+        points = np.array(clips)
+        ax.scatter(points[:, 0], points[:, 1], points[:, 2], s=100, c='k')
+
+    plt.gca().patch.set_facecolor('white')
+    ax.w_xaxis.set_pane_color((0.8, 0.8, 0.8, 1.0))
+    ax.w_yaxis.set_pane_color((0.8, 0.8, 0.8, 1.0))
+    ax.w_zaxis.set_pane_color((0.8, 0.8, 0.8, 1.0))
+
+    # rotate the axes and update
+    ax.view_init(30, 12)
+    plt.draw()
+    plt.savefig('3d_boxes.png')
diff --git a/torchdet3d/utils/utils.py b/torchdet3d/utils/utils.py

Original file line number	Diff line number	Diff line change
`@@ -91,7 +91,7 @@ def grab_frames(video_file, frame_ids, use_opencv=True):`
`91`	`91`	`'-pix_fmt', 'rgb24', '-vcodec', 'rawvideo', '-vsync', 'vfr', '-'`
`92`	`92`	`]`
`93`	`93`	`pipe = subprocess.Popen(`
`94`		`- command, stdout=subprocess.PIPE, bufsize=151 * frame_size)`
	`94`	`+ command, stdout=subprocess.PIPE, bufsize=151 * frame_size, stderr=subprocess.DEVNULL)`
`95`	`95`	`current_frame = np.frombuffer(`
`96`	`96`	`pipe.stdout.read(frame_size), dtype='uint8').reshape(height, width, 3)`
`97`	`97`	`pipe.stdout.flush()`
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,3 @@`
`1`	`1`	`from .utils import *`
`2`	`2`	`from .transforms import *`
	`3`	`+from .geometry import *`