@@ -112,7 +112,7 @@ def __init__(self, verbose=False, workspace=8):
112
112
self .network = None
113
113
self .parser = None
114
114
115
- def create_network (self , onnx_path ):
115
+ def create_network (self , onnx_path , end2end , conf_thres , iou_thres , max_det ):
116
116
"""
117
117
Parse the ONNX graph and create the corresponding TensorRT network definition.
118
118
:param onnx_path: The path to the ONNX graph to load.
@@ -142,6 +142,61 @@ def create_network(self, onnx_path):
142
142
assert self .batch_size > 0
143
143
self .builder .max_batch_size = self .batch_size
144
144
145
+ if end2end :
146
+ previous_output = self .network .get_output (0 )
147
+ self .network .unmark_output (previous_output )
148
+ # output [1, 8400, 85]
149
+ # slice boxes, obj_score, class_scores
150
+ strides = trt .Dims ([1 ,1 ,1 ])
151
+ starts = trt .Dims ([0 ,0 ,0 ])
152
+ bs , num_boxes , temp = previous_output .shape
153
+ shapes = trt .Dims ([bs , num_boxes , 4 ])
154
+ # [0, 0, 0] [1, 8400, 4] [1, 1, 1]
155
+ boxes = self .network .add_slice (previous_output , starts , shapes , strides )
156
+ num_classes = temp - 5
157
+ starts [2 ] = 4
158
+ shapes [2 ] = 1
159
+ # [0, 0, 4] [1, 8400, 1] [1, 1, 1]
160
+ obj_score = self .network .add_slice (previous_output , starts , shapes , strides )
161
+ starts [2 ] = 5
162
+ shapes [2 ] = num_classes
163
+ # [0, 0, 5] [1, 8400, 80] [1, 1, 1]
164
+ scores = self .network .add_slice (previous_output , starts , shapes , strides )
165
+ # scores = obj_score * class_scores => [bs, num_boxes, nc]
166
+ updated_scores = self .network .add_elementwise (obj_score .get_output (0 ), scores .get_output (0 ), trt .ElementWiseOperation .PROD )
167
+
168
+ '''
169
+ "plugin_version": "1",
170
+ "background_class": -1, # no background class
171
+ "max_output_boxes": detections_per_img,
172
+ "score_threshold": score_thresh,
173
+ "iou_threshold": nms_thresh,
174
+ "score_activation": False,
175
+ "box_coding": 1,
176
+ '''
177
+ registry = trt .get_plugin_registry ()
178
+ assert (registry )
179
+ creator = registry .get_plugin_creator ("EfficientNMS_TRT" , "1" )
180
+ assert (creator )
181
+ fc = []
182
+ fc .append (trt .PluginField ("background_class" , np .array ([- 1 ], dtype = np .int32 ), trt .PluginFieldType .INT32 ))
183
+ fc .append (trt .PluginField ("max_output_boxes" , np .array ([max_det ], dtype = np .int32 ), trt .PluginFieldType .INT32 ))
184
+ fc .append (trt .PluginField ("score_threshold" , np .array ([conf_thres ], dtype = np .float32 ), trt .PluginFieldType .FLOAT32 ))
185
+ fc .append (trt .PluginField ("iou_threshold" , np .array ([iou_thres ], dtype = np .float32 ), trt .PluginFieldType .FLOAT32 ))
186
+ fc .append (trt .PluginField ("box_coding" , np .array ([1 ], dtype = np .int32 ), trt .PluginFieldType .INT32 ))
187
+
188
+ fc = trt .PluginFieldCollection (fc )
189
+ nms_layer = creator .create_plugin ("nms_layer" , fc )
190
+
191
+ layer = self .network .add_plugin_v2 ([boxes .get_output (0 ), updated_scores .get_output (0 )], nms_layer )
192
+ layer .get_output (0 ).name = "num"
193
+ layer .get_output (1 ).name = "boxes"
194
+ layer .get_output (2 ).name = "scores"
195
+ layer .get_output (3 ).name = "classes"
196
+ for i in range (4 ):
197
+ self .network .mark_output (layer .get_output (i ))
198
+
199
+
145
200
def create_engine (self , engine_path , precision , calib_input = None , calib_cache = None , calib_num_images = 5000 ,
146
201
calib_batch_size = 8 ):
147
202
"""
@@ -176,7 +231,8 @@ def create_engine(self, engine_path, precision, calib_input=None, calib_cache=No
176
231
# Also enable fp16, as some layers may be even more efficient in fp16 than int8
177
232
self .config .set_flag (trt .BuilderFlag .FP16 )
178
233
self .config .set_flag (trt .BuilderFlag .INT8 )
179
- self .config .int8_calibrator = EngineCalibrator (calib_cache )
234
+ # self.config.int8_calibrator = EngineCalibrator(calib_cache)
235
+ self .config .int8_calibrator = SwinCalibrator (calib_cache )
180
236
if not os .path .exists (calib_cache ):
181
237
calib_shape = [calib_batch_size ] + list (inputs [0 ].shape [1 :])
182
238
calib_dtype = trt .nptype (inputs [0 ].dtype )
@@ -190,7 +246,7 @@ def create_engine(self, engine_path, precision, calib_input=None, calib_cache=No
190
246
191
247
def main (args ):
192
248
builder = EngineBuilder (args .verbose , args .workspace )
193
- builder .create_network (args .onnx )
249
+ builder .create_network (args .onnx , args . end2end , args . conf_thres , args . iou_thres , args . max_det )
194
250
builder .create_engine (args .engine , args .precision , args .calib_input , args .calib_cache , args .calib_num_images ,
195
251
args .calib_batch_size )
196
252
@@ -210,7 +266,17 @@ def main(args):
210
266
help = "The maximum number of images to use for calibration, default: 5000" )
211
267
parser .add_argument ("--calib_batch_size" , default = 8 , type = int ,
212
268
help = "The batch size for the calibration process, default: 8" )
269
+ parser .add_argument ("--end2end" , default = False , action = "store_true" ,
270
+ help = "export the engine include nms plugin, default: False" )
271
+ parser .add_argument ("--conf_thres" , default = 0.4 , type = float ,
272
+ help = "The conf threshold for the nms, default: 0.4" )
273
+ parser .add_argument ("--iou_thres" , default = 0.5 , type = float ,
274
+ help = "The iou threshold for the nms, default: 0.5" )
275
+ parser .add_argument ("--max_det" , default = 100 , type = int ,
276
+ help = "The total num for results, default: 100" )
277
+
213
278
args = parser .parse_args ()
279
+ print (args )
214
280
if not all ([args .onnx , args .engine ]):
215
281
parser .print_help ()
216
282
log .error ("These arguments are required: --onnx and --engine" )
@@ -219,6 +285,7 @@ def main(args):
219
285
parser .print_help ()
220
286
log .error ("When building in int8 precision, --calib_input or an existing --calib_cache file is required" )
221
287
sys .exit (1 )
288
+
222
289
main (args )
223
290
224
291
0 commit comments