Skip to content

Commit 0471f2d

Browse files
committed
chore: minor fixes
Signed-off-by: Dheeraj Peri <[email protected]>
1 parent 0e7f4fe commit 0471f2d

File tree

2 files changed

+14
-9
lines changed

2 files changed

+14
-9
lines changed

core/runtime/TRTEngine.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,10 +142,14 @@ TRTEngine::TRTEngine(
142142
num_io = std::make_pair(inputs, outputs);
143143
}
144144

145+
#ifndef NDEBUG
146+
this->enable_profiling();
147+
#endif
145148
LOG_DEBUG(*this);
146149
}
147150

148151
TRTEngine::~TRTEngine() {
152+
trt_engine_profiler.reset();
149153
exec_ctx.reset();
150154
cuda_engine.reset();
151155
rt.reset();
@@ -154,6 +158,7 @@ TRTEngine::~TRTEngine() {
154158
void TRTEngine::disable_profiling() {
155159
torch::cuda::synchronize(device_info.id);
156160
profile_execution = false;
161+
trt_engine_profiler.reset();
157162
exec_ctx = make_trt(cuda_engine->createExecutionContext());
158163
TORCHTRT_CHECK((exec_ctx.get() != nullptr), "Unable to recreate TensorRT execution context");
159164
}

core/runtime/execute_engine.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr
171171
gpu_handles.push_back(outputs[pyt_idx].data_ptr());
172172
}
173173
}
174-
174+
std::cout << "========== 3 ===============" << std::endl;
175175
{
176176
std::unique_ptr<torch::autograd::profiler::RecordProfile> enqueue_profiler_guard;
177177
if (compiled_engine->profile_execution) {
@@ -183,19 +183,19 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr
183183

184184
// nvinfer1::IExecutionContext::enqueue is not thread safe and we need a mutex for it.
185185
std::unique_lock<std::mutex> lock(compiled_engine->mu);
186-
std::unique_ptr<TRTEngineProfiler> trt_engine_profiler;
187-
if (compiled_engine->profile_execution) {
188-
trt_engine_profiler = std::make_unique<TRTEngineProfiler>(compiled_engine->name);
189-
compiled_engine->exec_ctx->setProfiler(trt_engine_profiler.get());
190-
}
186+
// std::unique_ptr<TRTEngineProfiler> trt_engine_profiler;
187+
// if (compiled_engine->profile_execution) {
188+
// trt_engine_profiler = std::make_unique<TRTEngineProfiler>(compiled_engine->name);
189+
// compiled_engine->exec_ctx->setProfiler(trt_engine_profiler.get());
190+
// }
191191
compiled_engine->exec_ctx->enqueueV2(gpu_handles.data(), stream, nullptr);
192192
if (compiled_engine->profile_execution) {
193-
LOG_INFO(std::endl << *trt_engine_profiler);
194-
dump_trace(compiled_engine->trt_engine_profile_path, *trt_engine_profiler);
193+
LOG_INFO(std::endl << *compiled_engine->trt_engine_profiler);
194+
dump_trace(compiled_engine->trt_engine_profile_path, *compiled_engine->trt_engine_profiler);
195195
compiled_engine->dump_engine_layer_info();
196196
}
197197
}
198-
198+
std::cout << "========== 4 ===============" << std::endl;
199199
return outputs;
200200
}
201201

0 commit comments

Comments
 (0)