@@ -171,7 +171,7 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr
171
171
gpu_handles.push_back (outputs[pyt_idx].data_ptr ());
172
172
}
173
173
}
174
-
174
+ std::cout << " ========== 3 =============== " << std::endl;
175
175
{
176
176
std::unique_ptr<torch::autograd::profiler::RecordProfile> enqueue_profiler_guard;
177
177
if (compiled_engine->profile_execution ) {
@@ -183,19 +183,19 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr
183
183
184
184
// nvinfer1::IExecutionContext::enqueue is not thread safe and we need a mutex for it.
185
185
std::unique_lock<std::mutex> lock (compiled_engine->mu );
186
- std::unique_ptr<TRTEngineProfiler> trt_engine_profiler;
187
- if (compiled_engine->profile_execution ) {
188
- trt_engine_profiler = std::make_unique<TRTEngineProfiler>(compiled_engine->name );
189
- compiled_engine->exec_ctx ->setProfiler (trt_engine_profiler.get ());
190
- }
186
+ // std::unique_ptr<TRTEngineProfiler> trt_engine_profiler;
187
+ // if (compiled_engine->profile_execution) {
188
+ // trt_engine_profiler = std::make_unique<TRTEngineProfiler>(compiled_engine->name);
189
+ // compiled_engine->exec_ctx->setProfiler(trt_engine_profiler.get());
190
+ // }
191
191
compiled_engine->exec_ctx ->enqueueV2 (gpu_handles.data (), stream, nullptr );
192
192
if (compiled_engine->profile_execution ) {
193
- LOG_INFO (std::endl << *trt_engine_profiler);
194
- dump_trace (compiled_engine->trt_engine_profile_path , *trt_engine_profiler);
193
+ LOG_INFO (std::endl << *compiled_engine-> trt_engine_profiler );
194
+ dump_trace (compiled_engine->trt_engine_profile_path , *compiled_engine-> trt_engine_profiler );
195
195
compiled_engine->dump_engine_layer_info ();
196
196
}
197
197
}
198
-
198
+ std::cout << " ========== 4 =============== " << std::endl;
199
199
return outputs;
200
200
}
201
201
0 commit comments