Skip to content

Commit 33823a5

Browse files
authored
Merge pull request #6 from sovrasov/kp/regressor
Kp/regressor
2 parents e1c4560 + 2622188 commit 33823a5

25 files changed

+935
-268
lines changed

annotation_converters/objectron_2_coco.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -212,9 +212,9 @@ def main():
212212
for k in data_info:
213213
print('Converting ' + k)
214214
stat = save_2_coco(args.output_folder, k, data_info[k], args.obj_classes,
215-
args.fps_divisor, args.res_divisor, not args.only_annotation, ['shoe'])
216-
for s in stat:
217-
print(f'{s}: {stat[s]}')
215+
args.fps_divisor, args.res_divisor, not args.only_annotation, ['shoe', 'bike'])
216+
for c in stat:
217+
print(f'{c}: {stat[c]}')
218218

219219

220220
if __name__ == '__main__':

configs/default_config.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,28 @@
11
data = dict(
22
root="./data",
3-
resize=(224,128),
4-
batch_size=128,
5-
max_epochs=70,
3+
resize=(224,224),
4+
train_batch_size=64,
5+
val_batch_size=32,
6+
max_epochs=200,
67
num_workers=4,
8+
category_list='all',
79
normalization=dict(mean=[0.5931, 0.4690, 0.4229],
810
std=[0.2471, 0.2214, 0.2157])
911
)
1012

11-
data_parallel = dict(use_parallel=True,
12-
parallel_params=dict(device_ids=[0,1], output_device=0))
13+
model = dict(name='mobilenetv3_large', pretrained=True, num_classes=1, load_weights='')
1314

14-
model = dict(load_weights='')
15+
data_parallel = dict(use_parallel=True,
16+
parallel_params=dict(device_ids=[0], output_device=0))
1517

16-
optim = dict(name='sgd', lr=0.01, momentum=0.9, wd=5e-4, betas=(0.9, 0.999), rho=0.9, alpha=0.99)
18+
optim = dict(name='adam', lr=0.001, momentum=0.9, wd=1e-4, betas=(0.9, 0.999), rho=0.9, alpha=0.99)
1719

18-
scheduler = dict(name='cosine')
20+
scheduler = dict(name='exp', gamma=0.1, exp_gamma=0.975, steps=[50])
1921

20-
loss=dict(names=['smoothl1', 'cross_entropy'])
22+
loss=dict(names=['mse', 'add_loss'], coeffs=([1., .1],[]), smoothl1_beta=0.2)
2123

22-
output_dir = './output/exp_1'
24+
utils=dict(debug_mode = False, random_seeds=5, save_freq=10, print_freq=20, debug_steps=30)
2325

24-
debug_mode = False
26+
output_dir = './output/exp_0'
2527

26-
regime = 'training'
28+
regime = dict(type='training', vis_only=False)

demo/demo.py

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
import argparse
2+
3+
import cv2 as cv
4+
import glog as log
5+
import numpy as np
6+
from openvino.inference_engine import IECore
7+
8+
from demo_tools import load_ie_model
9+
from torchdet3d.utils import draw_kp
10+
11+
12+
OBJECTRON_CLASSES = ('bike', 'book', 'bottle', 'cereal_box', 'camera', 'chair', 'cup', 'laptop', 'shoe')
13+
14+
class Detector:
15+
"""Wrapper class for object detector"""
16+
def __init__(self, ie, model_path, conf=.6, device='CPU', ext_path=''):
17+
self.net = load_ie_model(ie, model_path, device, None, ext_path)
18+
self.confidence = conf
19+
self.expand_ratio = (1., 1.)
20+
21+
def get_detections(self, frame):
22+
"""Returns all detections on frame"""
23+
out = self.net.forward(frame)
24+
detections = self.__decode_detections(out, frame.shape)
25+
return detections
26+
27+
def __decode_detections(self, out, frame_shape):
28+
"""Decodes raw SSD output"""
29+
detections = []
30+
31+
for detection in out[0, 0]:
32+
label = detection[1]
33+
confidence = detection[2]
34+
if confidence > self.confidence:
35+
left = int(max(detection[3], 0) * frame_shape[1])
36+
top = int(max(detection[4], 0) * frame_shape[0])
37+
right = int(max(detection[5], 0) * frame_shape[1])
38+
bottom = int(max(detection[6], 0) * frame_shape[0])
39+
if self.expand_ratio != (1., 1.):
40+
w = (right - left)
41+
h = (bottom - top)
42+
dw = w * (self.expand_ratio[0] - 1.) / 2
43+
dh = h * (self.expand_ratio[1] - 1.) / 2
44+
left = max(int(left - dw), 0)
45+
right = int(right + dw)
46+
top = max(int(top - dh), 0)
47+
bottom = int(bottom + dh)
48+
49+
detections.append(((left, top, right, bottom), confidence, label))
50+
51+
if len(detections) > 1:
52+
detections.sort(key=lambda x: x[1], reverse=True)
53+
return detections
54+
55+
56+
class Regressor:
57+
"""Wrapper class for regression model"""
58+
def __init__(self, ie, model_path, device='CPU', ext_path=''):
59+
self.net = load_ie_model(ie, model_path, device, None, ext_path)
60+
61+
def get_detections(self, frame, detections):
62+
"""Returns all detections on frame"""
63+
outputs = []
64+
for rect in detections:
65+
cropped_img = self.crop(frame, rect[0])
66+
out = self.net.forward(cropped_img)
67+
out = self.__decode_detections(out, rect)
68+
outputs.append(out)
69+
return outputs
70+
71+
def __decode_detections(self, out, rect):
72+
"""Decodes raw regression model output"""
73+
label = int(rect[2])
74+
kp = out[label]
75+
kp = self.transform_kp(kp[0], rect[0])
76+
77+
return (kp, label)
78+
79+
@staticmethod
80+
def transform_kp(kp: np.array, crop_cords: tuple):
81+
x0,y0,x1,y1 = crop_cords
82+
crop_shape = (x1-x0,y1-y0)
83+
kp[:,0] = kp[:,0]*crop_shape[0]
84+
kp[:,1] = kp[:,1]*crop_shape[1]
85+
kp[:,0] += x0
86+
kp[:,1] += y0
87+
return kp
88+
89+
@staticmethod
90+
def crop(frame, rect):
91+
x0, y0, x1, y1 = rect
92+
crop = frame[y0:y1, x0:x1]
93+
return crop
94+
95+
def draw_detections(frame, reg_detections, det_detections, reg_only=True):
96+
"""Draws detections and labels"""
97+
for det_out, reg_out in zip(det_detections, reg_detections):
98+
left, top, right, bottom = det_out[0]
99+
kp = reg_out[0]
100+
label = reg_out[1]
101+
label = OBJECTRON_CLASSES[label]
102+
if not reg_only:
103+
cv.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), thickness=2)
104+
105+
frame = draw_kp(frame, kp, None, RGB=False, normalized=False)
106+
label_size, base_line = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 1, 1)
107+
top = max(top, label_size[1])
108+
cv.rectangle(frame, (left, top - label_size[1]), (left + label_size[0], top + base_line),
109+
(255, 255, 255), cv.FILLED)
110+
cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0))
111+
112+
return frame
113+
114+
def run(params, capture, detector, regressor, write_video=False, resolution = (1280, 720)):
115+
"""Starts the 3D object detection demo"""
116+
fourcc = cv.VideoWriter_fourcc(*'MP4V')
117+
fps = 24
118+
if write_video:
119+
writer_video = cv.VideoWriter('output_video_demo.mp4', fourcc, fps, resolution)
120+
win_name = '3D-object-detection'
121+
while cv.waitKey(1) != 27:
122+
has_frame, frame = capture.read()
123+
frame = cv.resize(frame, resolution)
124+
if not has_frame:
125+
return
126+
detections = detector.get_detections(frame)
127+
outputs = regressor.get_detections(frame, detections)
128+
129+
frame = draw_detections(frame, outputs, detections, reg_only=False)
130+
cv.imshow(win_name, frame)
131+
if write_video:
132+
writer_video.write(cv.resize(frame, resolution))
133+
writer_video.release()
134+
capture.release()
135+
cv.destroyAllWindows()
136+
137+
def main():
138+
"""Prepares data for the 3d object detection demo"""
139+
140+
parser = argparse.ArgumentParser(description='3d object detection live demo script')
141+
parser.add_argument('--video', type=str, default=None, help='Input video')
142+
parser.add_argument('--cam_id', type=int, default=-1, help='Input cam')
143+
parser.add_argument('--resolution', type=int, nargs='+', help='capture resolution')
144+
parser.add_argument('--config', type=str, default=None, required=False,
145+
help='Configuration file')
146+
parser.add_argument('--od_model', type=str, required=True)
147+
parser.add_argument('--reg_model', type=str, required=True)
148+
parser.add_argument('--det_tresh', type=float, required=False, default=0.6)
149+
parser.add_argument('--device', type=str, default='CPU')
150+
parser.add_argument('-l', '--cpu_extension',
151+
help='MKLDNN (CPU)-targeted custom layers.Absolute path to a shared library with the kernels '
152+
'impl.', type=str, default=None)
153+
parser.add_argument('--write_video', type=bool, default=False,
154+
help='if you set this arg to True, the video of the demo will be recoreded')
155+
args = parser.parse_args()
156+
157+
if args.cam_id >= 0:
158+
log.info('Reading from cam {}'.format(args.cam_id))
159+
cap = cv.VideoCapture(args.cam_id)
160+
cap.set(cv.CAP_PROP_FRAME_WIDTH, args.resolution[0])
161+
cap.set(cv.CAP_PROP_FRAME_HEIGHT, args.resolution[1])
162+
cap.set(cv.CAP_PROP_FOURCC, cv.VideoWriter_fourcc(*'MJPG'))
163+
else:
164+
assert args.video, "No video input was given"
165+
log.info('Reading from {}'.format(args.video))
166+
cap = cv.VideoCapture(args.video)
167+
cap.set(cv.CAP_PROP_FOURCC, cv.VideoWriter_fourcc(*'MJPG'))
168+
assert cap.isOpened()
169+
ie = IECore()
170+
object_detector = Detector(ie, args.od_model, args.det_tresh, args.device, args.cpu_extension)
171+
regressor = Regressor(ie, args.reg_model, args.device, args.cpu_extension)
172+
# running demo
173+
run(args, cap, object_detector, regressor, args.write_video, tuple(args.resolution))
174+
175+
if __name__ == '__main__':
176+
main()

demo/demo_tools.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import cv2 as cv
2+
import glog as log
3+
import os
4+
import numpy as np
5+
6+
7+
class IEModel:
8+
"""Class for inference of models in the Inference Engine format"""
9+
def __init__(self, exec_net, inputs_info, input_key, output_key):
10+
self.net = exec_net
11+
self.inputs_info = inputs_info
12+
self.input_key = input_key
13+
self.output_key = output_key
14+
self.reqs_ids = []
15+
16+
def _preprocess(self, img):
17+
_, _, h, w = self.get_input_shape()
18+
img = np.expand_dims(cv.resize(img, (w, h)).transpose(2, 0, 1), axis=0)
19+
return img
20+
21+
def forward(self, img):
22+
"""Performs forward pass of the wrapped IE model"""
23+
res = self.net.infer(inputs={self.input_key: self._preprocess(img)})
24+
return np.copy(res[self.output_key])
25+
26+
def forward_async(self, img):
27+
id_ = len(self.reqs_ids)
28+
self.net.start_async(request_id=id_,
29+
inputs={self.input_key: self._preprocess(img)})
30+
self.reqs_ids.append(id_)
31+
32+
def grab_all_async(self):
33+
outputs = []
34+
for id_ in self.reqs_ids:
35+
self.net.requests[id_].wait(-1)
36+
res = self.net.requests[id_].output_blobs[self.output_key].buffer
37+
outputs.append(np.copy(res))
38+
self.reqs_ids = []
39+
return outputs
40+
41+
def get_input_shape(self):
42+
"""Returns an input shape of the wrapped IE model"""
43+
return self.inputs_info[self.input_key].input_data.shape
44+
45+
46+
def load_ie_model(ie, model_xml, device, plugin_dir, cpu_extension='', num_reqs=1):
47+
"""Loads a model in the Inference Engine format"""
48+
# Plugin initialization for specified device and load extensions library if specified
49+
log.info("Initializing Inference Engine plugin for %s ", device)
50+
51+
if cpu_extension and 'CPU' in device:
52+
ie.add_extension(cpu_extension, 'CPU')
53+
# Read IR
54+
log.info("Loading network")
55+
net = ie.read_network(model_xml, os.path.splitext(model_xml)[0] + ".bin")
56+
57+
assert len(net.input_info) == 1 or len(net.input_info) == 2, \
58+
"Supports topologies with only 1 or 2 inputs"
59+
assert len(net.outputs) == 1 or len(net.outputs) == 4 or len(net.outputs) == 5, \
60+
"Supports topologies with only 1, 4 or 5 outputs"
61+
62+
log.info("Preparing input blobs")
63+
input_blob = next(iter(net.input_info))
64+
out_blob = next(iter(net.outputs))
65+
net.batch_size = 1
66+
67+
# Loading model to the plugin
68+
log.info("Loading model to the plugin")
69+
exec_net = ie.load_network(network=net, device_name=device, num_requests=num_reqs)
70+
model = IEModel(exec_net, net.input_info, input_blob, out_blob)
71+
return model

requirements.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,9 @@ glog
1111
torchvision
1212
torch
1313
gdown
14-
icecream
14+
icecream
15+
efficientnet_lite_pytorch
16+
efficientnet_lite0_pytorch_model
17+
efficientnet_lite1_pytorch_model
18+
pylint
19+
isort

scripts/export.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
from subprocess import run, DEVNULL, CalledProcessError
2+
import argparse
3+
4+
import torch
5+
import os
6+
7+
from torchdet3d.builders import build_model
8+
from torchdet3d.utils import load_pretrained_weights, read_py_config
9+
10+
11+
def export_onnx(model, snapshot_path, img_size=(128,128), save_path='model.onnx'):
12+
# input to inference model
13+
dummy_input = torch.rand(size=(1,3,*img_size))
14+
dummy_cat = torch.zeros(1, dtype=torch.long)
15+
# load checkpoint from config
16+
load_pretrained_weights(model, snapshot_path)
17+
# convert model to onnx
18+
input_names = ["data"]
19+
output_names = ["cls_bbox"]
20+
with torch.no_grad():
21+
model.eval()
22+
torch.onnx.export(model, args=dummy_input, f=save_path, verbose=True,
23+
input_names=input_names, output_names=output_names)
24+
25+
def export_mo(onnx_model_path, mean_values, scale_values, save_path):
26+
command_line = (f'mo.py --input_model="{onnx_model_path}" '
27+
f'--mean_values="{mean_values}" '
28+
f'--scale_values="{scale_values}" '
29+
f'--output_dir="{save_path}" '
30+
f'--reverse_input_channels ')
31+
32+
try:
33+
run('mo.py -h', stdout=DEVNULL, stderr=DEVNULL, shell=True, check=True)
34+
except CalledProcessError as _:
35+
print('OpenVINO Model Optimizer not found, please source '
36+
'openvino/bin/setupvars.sh before running this script.')
37+
return
38+
39+
run(command_line, shell=True, check=True)
40+
41+
def main():
42+
# parse arguments
43+
parser = argparse.ArgumentParser(description='converting model to onnx/mo')
44+
parser.add_argument('--config', type=str, default=None, required=True,
45+
help='path to configuration file')
46+
parser.add_argument('--model_onnx_path', type=str, default='./converted_models/model.onnx', required=False,
47+
help='path where to save the model in onnx format')
48+
parser.add_argument('--model_torch_path', type=str, required=False,
49+
help='path where to get the model in .pth format.'
50+
'By default the model will be obtained from config, the lastest epoch')
51+
parser.add_argument('--model_mo_path', type=str, default='./converted_models', required=False,
52+
help='path where to save the model in IR format')
53+
parser.add_argument('--convert_mo', type=bool, default=True, required=False,
54+
help='argument defines whether or not to convert to IR format')
55+
56+
args = parser.parse_args()
57+
# read config
58+
cfg = read_py_config(args.config)
59+
if not args.model_torch_path:
60+
x = os.listdir(cfg.output_dir)
61+
snap=sorted(x, key=lambda z: int(z[5:-4]))[-1]
62+
snapshot_path = os.path.join(cfg.output_dir, snap)
63+
else:
64+
snapshot_path = args.model_torch_path
65+
model = build_model(cfg, export_mode=True)
66+
67+
mean_values = str([s*255 for s in cfg.data.normalization.mean])
68+
scale_values = str([s*255 for s in cfg.data.normalization.std])
69+
export_onnx(model, snapshot_path, cfg.data.resize, args.model_onnx_path)
70+
if args.convert_mo:
71+
export_mo(args.model_onnx_path, mean_values, scale_values, args.model_mo_path)
72+
73+
if __name__ == "__main__":
74+
main()

0 commit comments

Comments
 (0)