sovrasov
diff --git a/‎annotation_converters/objectron_2_coco.py
Lines changed: 3 additions & 3 deletions b/‎annotation_converters/objectron_2_coco.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎configs/default_config.py
Lines changed: 14 additions & 12 deletions b/‎configs/default_config.py
Lines changed: 14 additions & 12 deletions
diff --git a/‎demo/demo.py
Lines changed: 176 additions & 0 deletions b/‎demo/demo.py
Lines changed: 176 additions & 0 deletions
diff --git a/‎demo/demo_tools.py
Lines changed: 71 additions & 0 deletions b/‎demo/demo_tools.py
Lines changed: 71 additions & 0 deletions
diff --git a/‎requirements.txt
Lines changed: 6 additions & 1 deletion b/‎requirements.txt
Lines changed: 6 additions & 1 deletion
diff --git a/‎scripts/export.py
Lines changed: 74 additions & 0 deletions b/‎scripts/export.py
Lines changed: 74 additions & 0 deletions
@@ -212,9 +212,9 @@ def main():
     for k in data_info:
         print('Converting ' + k)
         stat = save_2_coco(args.output_folder, k, data_info[k], args.obj_classes,
-                           args.fps_divisor, args.res_divisor, not args.only_annotation, ['shoe'])
-        for s in stat:
-            print(f'{s}: {stat[s]}')
+                           args.fps_divisor, args.res_divisor, not args.only_annotation, ['shoe', 'bike'])
+        for c in stat:
+            print(f'{c}: {stat[c]}')
 
 
 if __name__ == '__main__':
 
@@ -1,26 +1,28 @@
 data = dict(
     root="./data",
-    resize=(224,128),
-    batch_size=128,
-    max_epochs=70,
+    resize=(224,224),
+    train_batch_size=64,
+    val_batch_size=32,
+    max_epochs=200,
     num_workers=4,
+    category_list='all',
     normalization=dict(mean=[0.5931, 0.4690, 0.4229],
                        std=[0.2471, 0.2214, 0.2157])
 )
 
-data_parallel = dict(use_parallel=True,
-                     parallel_params=dict(device_ids=[0,1], output_device=0))
+model = dict(name='mobilenetv3_large', pretrained=True, num_classes=1, load_weights='')
 
-model = dict(load_weights='')
+data_parallel = dict(use_parallel=True,
+                     parallel_params=dict(device_ids=[0], output_device=0))
 
-optim = dict(name='sgd', lr=0.01, momentum=0.9, wd=5e-4, betas=(0.9, 0.999), rho=0.9, alpha=0.99)
+optim = dict(name='adam', lr=0.001, momentum=0.9, wd=1e-4, betas=(0.9, 0.999), rho=0.9, alpha=0.99)
 
-scheduler = dict(name='cosine')
+scheduler = dict(name='exp', gamma=0.1, exp_gamma=0.975, steps=[50])
 
-loss=dict(names=['smoothl1', 'cross_entropy'])
+loss=dict(names=['mse', 'add_loss'], coeffs=([1., .1],[]), smoothl1_beta=0.2)
 
-output_dir = './output/exp_1'
+utils=dict(debug_mode = False, random_seeds=5, save_freq=10, print_freq=20, debug_steps=30)
 
-debug_mode = False
+output_dir = './output/exp_0'
 
-regime = 'training'
+regime = dict(type='training', vis_only=False)
@@ -0,0 +1,176 @@
+import argparse
+
+import cv2 as cv
+import glog as log
+import numpy as np
+from openvino.inference_engine import IECore
+
+from demo_tools import load_ie_model
+from torchdet3d.utils import draw_kp
+
+
+OBJECTRON_CLASSES = ('bike', 'book', 'bottle', 'cereal_box', 'camera', 'chair', 'cup', 'laptop', 'shoe')
+
+class Detector:
+    """Wrapper class for object detector"""
+    def __init__(self, ie,  model_path, conf=.6, device='CPU', ext_path=''):
+        self.net = load_ie_model(ie, model_path, device, None, ext_path)
+        self.confidence = conf
+        self.expand_ratio = (1., 1.)
+
+    def get_detections(self, frame):
+        """Returns all detections on frame"""
+        out = self.net.forward(frame)
+        detections = self.__decode_detections(out, frame.shape)
+        return detections
+
+    def __decode_detections(self, out, frame_shape):
+        """Decodes raw SSD output"""
+        detections = []
+
+        for detection in out[0, 0]:
+            label = detection[1]
+            confidence = detection[2]
+            if confidence > self.confidence:
+                left = int(max(detection[3], 0) * frame_shape[1])
+                top = int(max(detection[4], 0) * frame_shape[0])
+                right = int(max(detection[5], 0) * frame_shape[1])
+                bottom = int(max(detection[6], 0) * frame_shape[0])
+                if self.expand_ratio != (1., 1.):
+                    w = (right - left)
+                    h = (bottom - top)
+                    dw = w * (self.expand_ratio[0] - 1.) / 2
+                    dh = h * (self.expand_ratio[1] - 1.) / 2
+                    left = max(int(left - dw), 0)
+                    right = int(right + dw)
+                    top = max(int(top - dh), 0)
+                    bottom = int(bottom + dh)
+
+                detections.append(((left, top, right, bottom), confidence, label))
+
+        if len(detections) > 1:
+            detections.sort(key=lambda x: x[1], reverse=True)
+        return detections
+
+
+class Regressor:
+    """Wrapper class for regression model"""
+    def __init__(self, ie,  model_path, device='CPU', ext_path=''):
+        self.net = load_ie_model(ie, model_path, device, None, ext_path)
+
+    def get_detections(self, frame, detections):
+        """Returns all detections on frame"""
+        outputs = []
+        for rect in detections:
+            cropped_img = self.crop(frame, rect[0])
+            out = self.net.forward(cropped_img)
+            out = self.__decode_detections(out, rect)
+            outputs.append(out)
+        return outputs
+
+    def __decode_detections(self, out, rect):
+        """Decodes raw regression model output"""
+        label = int(rect[2])
+        kp = out[label]
+        kp = self.transform_kp(kp[0], rect[0])
+
+        return (kp, label)
+
+    @staticmethod
+    def transform_kp(kp: np.array, crop_cords: tuple):
+        x0,y0,x1,y1 = crop_cords
+        crop_shape = (x1-x0,y1-y0)
+        kp[:,0] = kp[:,0]*crop_shape[0]
+        kp[:,1] = kp[:,1]*crop_shape[1]
+        kp[:,0] += x0
+        kp[:,1] += y0
+        return kp
+
+    @staticmethod
+    def crop(frame, rect):
+        x0, y0, x1, y1 = rect
+        crop = frame[y0:y1, x0:x1]
+        return crop
+
+def draw_detections(frame, reg_detections, det_detections, reg_only=True):
+    """Draws detections and labels"""
+    for det_out, reg_out in zip(det_detections, reg_detections):
+        left, top, right, bottom = det_out[0]
+        kp = reg_out[0]
+        label = reg_out[1]
+        label = OBJECTRON_CLASSES[label]
+        if not reg_only:
+            cv.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), thickness=2)
+
+        frame = draw_kp(frame, kp, None, RGB=False, normalized=False)
+        label_size, base_line = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 1, 1)
+        top = max(top, label_size[1])
+        cv.rectangle(frame, (left, top - label_size[1]), (left + label_size[0], top + base_line),
+                     (255, 255, 255), cv.FILLED)
+        cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0))
+
+    return frame
+
+def run(params, capture, detector, regressor, write_video=False, resolution = (1280, 720)):
+    """Starts the 3D object detection demo"""
+    fourcc = cv.VideoWriter_fourcc(*'MP4V')
+    fps = 24
+    if write_video:
+        writer_video = cv.VideoWriter('output_video_demo.mp4', fourcc, fps, resolution)
+    win_name = '3D-object-detection'
+    while cv.waitKey(1) != 27:
+        has_frame, frame = capture.read()
+        frame = cv.resize(frame, resolution)
+        if not has_frame:
+            return
+        detections = detector.get_detections(frame)
+        outputs = regressor.get_detections(frame, detections)
+
+        frame = draw_detections(frame, outputs, detections, reg_only=False)
+        cv.imshow(win_name, frame)
+        if write_video:
+            writer_video.write(cv.resize(frame, resolution))
+            writer_video.release()
+    capture.release()
+    cv.destroyAllWindows()
+
+def main():
+    """Prepares data for the 3d object detection demo"""
+
+    parser = argparse.ArgumentParser(description='3d object detection live demo script')
+    parser.add_argument('--video', type=str, default=None, help='Input video')
+    parser.add_argument('--cam_id', type=int, default=-1, help='Input cam')
+    parser.add_argument('--resolution', type=int, nargs='+', help='capture resolution')
+    parser.add_argument('--config', type=str, default=None, required=False,
+                        help='Configuration file')
+    parser.add_argument('--od_model', type=str, required=True)
+    parser.add_argument('--reg_model', type=str, required=True)
+    parser.add_argument('--det_tresh', type=float, required=False, default=0.6)
+    parser.add_argument('--device', type=str, default='CPU')
+    parser.add_argument('-l', '--cpu_extension',
+                        help='MKLDNN (CPU)-targeted custom layers.Absolute path to a shared library with the kernels '
+                             'impl.', type=str, default=None)
+    parser.add_argument('--write_video', type=bool, default=False,
+                        help='if you set this arg to True, the video of the demo will be recoreded')
+    args = parser.parse_args()
+
+    if args.cam_id >= 0:
+        log.info('Reading from cam {}'.format(args.cam_id))
+        cap = cv.VideoCapture(args.cam_id)
+        cap.set(cv.CAP_PROP_FRAME_WIDTH, args.resolution[0])
+        cap.set(cv.CAP_PROP_FRAME_HEIGHT, args.resolution[1])
+        cap.set(cv.CAP_PROP_FOURCC, cv.VideoWriter_fourcc(*'MJPG'))
+    else:
+        assert args.video, "No video input was given"
+        log.info('Reading from {}'.format(args.video))
+        cap = cv.VideoCapture(args.video)
+        cap.set(cv.CAP_PROP_FOURCC, cv.VideoWriter_fourcc(*'MJPG'))
+    assert cap.isOpened()
+    ie = IECore()
+    object_detector = Detector(ie, args.od_model, args.det_tresh, args.device, args.cpu_extension)
+    regressor = Regressor(ie, args.reg_model, args.device, args.cpu_extension)
+    # running demo
+    run(args, cap, object_detector, regressor, args.write_video, tuple(args.resolution))
+
+if __name__ == '__main__':
+    main()
@@ -0,0 +1,71 @@
+import cv2 as cv
+import glog as log
+import os
+import numpy as np
+
+
+class IEModel:
+    """Class for inference of models in the Inference Engine format"""
+    def __init__(self, exec_net, inputs_info, input_key, output_key):
+        self.net = exec_net
+        self.inputs_info = inputs_info
+        self.input_key = input_key
+        self.output_key = output_key
+        self.reqs_ids = []
+
+    def _preprocess(self, img):
+        _, _, h, w = self.get_input_shape()
+        img = np.expand_dims(cv.resize(img, (w, h)).transpose(2, 0, 1), axis=0)
+        return img
+
+    def forward(self, img):
+        """Performs forward pass of the wrapped IE model"""
+        res = self.net.infer(inputs={self.input_key: self._preprocess(img)})
+        return np.copy(res[self.output_key])
+
+    def forward_async(self, img):
+        id_ = len(self.reqs_ids)
+        self.net.start_async(request_id=id_,
+                             inputs={self.input_key: self._preprocess(img)})
+        self.reqs_ids.append(id_)
+
+    def grab_all_async(self):
+        outputs = []
+        for id_ in self.reqs_ids:
+            self.net.requests[id_].wait(-1)
+            res = self.net.requests[id_].output_blobs[self.output_key].buffer
+            outputs.append(np.copy(res))
+        self.reqs_ids = []
+        return outputs
+
+    def get_input_shape(self):
+        """Returns an input shape of the wrapped IE model"""
+        return self.inputs_info[self.input_key].input_data.shape
+
+
+def load_ie_model(ie, model_xml, device, plugin_dir, cpu_extension='', num_reqs=1):
+    """Loads a model in the Inference Engine format"""
+    # Plugin initialization for specified device and load extensions library if specified
+    log.info("Initializing Inference Engine plugin for %s ", device)
+
+    if cpu_extension and 'CPU' in device:
+        ie.add_extension(cpu_extension, 'CPU')
+    # Read IR
+    log.info("Loading network")
+    net = ie.read_network(model_xml, os.path.splitext(model_xml)[0] + ".bin")
+
+    assert len(net.input_info) == 1 or len(net.input_info) == 2, \
+        "Supports topologies with only 1 or 2 inputs"
+    assert len(net.outputs) == 1 or len(net.outputs) == 4 or len(net.outputs) == 5, \
+        "Supports topologies with only 1, 4 or 5 outputs"
+
+    log.info("Preparing input blobs")
+    input_blob = next(iter(net.input_info))
+    out_blob = next(iter(net.outputs))
+    net.batch_size = 1
+
+    # Loading model to the plugin
+    log.info("Loading model to the plugin")
+    exec_net = ie.load_network(network=net, device_name=device, num_requests=num_reqs)
+    model = IEModel(exec_net, net.input_info, input_blob, out_blob)
+    return model
@@ -11,4 +11,9 @@ glog
 torchvision
 torch
 gdown
-icecream
+icecream
+efficientnet_lite_pytorch
+efficientnet_lite0_pytorch_model
+efficientnet_lite1_pytorch_model
+pylint
+isort
@@ -0,0 +1,74 @@
+from subprocess import run, DEVNULL, CalledProcessError
+import argparse
+
+import torch
+import os
+
+from torchdet3d.builders import build_model
+from torchdet3d.utils import load_pretrained_weights, read_py_config
+
+
+def export_onnx(model, snapshot_path, img_size=(128,128), save_path='model.onnx'):
+    # input to inference model
+    dummy_input = torch.rand(size=(1,3,*img_size))
+    dummy_cat = torch.zeros(1, dtype=torch.long)
+    # load checkpoint from config
+    load_pretrained_weights(model, snapshot_path)
+    # convert model to onnx
+    input_names = ["data"]
+    output_names = ["cls_bbox"]
+    with torch.no_grad():
+        model.eval()
+        torch.onnx.export(model, args=dummy_input, f=save_path, verbose=True,
+                      input_names=input_names, output_names=output_names)
+
+def export_mo(onnx_model_path, mean_values, scale_values, save_path):
+    command_line = (f'mo.py --input_model="{onnx_model_path}" '
+                   f'--mean_values="{mean_values}" '
+                   f'--scale_values="{scale_values}" '
+                   f'--output_dir="{save_path}" '
+                   f'--reverse_input_channels ')
+
+    try:
+        run('mo.py -h', stdout=DEVNULL, stderr=DEVNULL, shell=True, check=True)
+    except CalledProcessError as _:
+        print('OpenVINO Model Optimizer not found, please source '
+            'openvino/bin/setupvars.sh before running this script.')
+        return
+
+    run(command_line, shell=True, check=True)
+
+def main():
+    # parse arguments
+    parser = argparse.ArgumentParser(description='converting model to onnx/mo')
+    parser.add_argument('--config', type=str, default=None, required=True,
+                        help='path to configuration file')
+    parser.add_argument('--model_onnx_path', type=str, default='./converted_models/model.onnx', required=False,
+                        help='path where to save the model in onnx format')
+    parser.add_argument('--model_torch_path', type=str, required=False,
+                        help='path where to get the model in .pth format.'
+                             'By default the model will be obtained from config, the lastest epoch')
+    parser.add_argument('--model_mo_path', type=str, default='./converted_models', required=False,
+                        help='path where to save the model in IR format')
+    parser.add_argument('--convert_mo', type=bool, default=True, required=False,
+                        help='argument defines whether or not to convert to IR format')
+
+    args = parser.parse_args()
+    # read config
+    cfg = read_py_config(args.config)
+    if not args.model_torch_path:
+        x = os.listdir(cfg.output_dir)
+        snap=sorted(x, key=lambda z: int(z[5:-4]))[-1]
+        snapshot_path = os.path.join(cfg.output_dir, snap)
+    else:
+        snapshot_path = args.model_torch_path
+    model = build_model(cfg, export_mode=True)
+
+    mean_values = str([s*255 for s in cfg.data.normalization.mean])
+    scale_values = str([s*255 for s in cfg.data.normalization.std])
+    export_onnx(model, snapshot_path, cfg.data.resize, args.model_onnx_path)
+    if args.convert_mo:
+        export_mo(args.model_onnx_path, mean_values, scale_values, args.model_mo_path)
+
+if __name__ == "__main__":
+    main()