chore: minor fixes

peri044 · peri044 · commit 0471f2d2aae0 · 2022-11-21T21:28:44.000-08:00
Signed-off-by: Dheeraj Peri &lt;peri.dheeraj@gmail.com&gt;
diff --git a/core/runtime/TRTEngine.cpp b/core/runtime/TRTEngine.cpp
@@ -142,10 +142,14 @@ TRTEngine::TRTEngine(
     num_io = std::make_pair(inputs, outputs);
   }
 
+  #ifndef NDEBUG
+    this->enable_profiling();
+  #endif
   LOG_DEBUG(*this);
 }
 
 TRTEngine::~TRTEngine() {
+  trt_engine_profiler.reset();
   exec_ctx.reset();
   cuda_engine.reset();
   rt.reset();
@@ -154,6 +158,7 @@ TRTEngine::~TRTEngine() {
 void TRTEngine::disable_profiling() {
   torch::cuda::synchronize(device_info.id);
   profile_execution = false;
+  trt_engine_profiler.reset();
   exec_ctx = make_trt(cuda_engine->createExecutionContext());
   TORCHTRT_CHECK((exec_ctx.get() != nullptr), "Unable to recreate TensorRT execution context");
 }
diff --git a/core/runtime/execute_engine.cpp b/core/runtime/execute_engine.cpp
@@ -171,7 +171,7 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr
       gpu_handles.push_back(outputs[pyt_idx].data_ptr());
     }
   }
-
+  std::cout << "========== 3 ===============" << std::endl;
   {
     std::unique_ptr<torch::autograd::profiler::RecordProfile> enqueue_profiler_guard;
     if (compiled_engine->profile_execution) {
@@ -183,19 +183,19 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr
 
     // nvinfer1::IExecutionContext::enqueue is not thread safe and we need a mutex for it.
     std::unique_lock<std::mutex> lock(compiled_engine->mu);
-    std::unique_ptr<TRTEngineProfiler> trt_engine_profiler;
-    if (compiled_engine->profile_execution) {
-      trt_engine_profiler = std::make_unique<TRTEngineProfiler>(compiled_engine->name);
-      compiled_engine->exec_ctx->setProfiler(trt_engine_profiler.get());
-    }
+    // std::unique_ptr<TRTEngineProfiler> trt_engine_profiler;
+    // if (compiled_engine->profile_execution) {
+    //   trt_engine_profiler = std::make_unique<TRTEngineProfiler>(compiled_engine->name);
+    //   compiled_engine->exec_ctx->setProfiler(trt_engine_profiler.get());
+    // }
     compiled_engine->exec_ctx->enqueueV2(gpu_handles.data(), stream, nullptr);
     if (compiled_engine->profile_execution) {
-      LOG_INFO(std::endl << *trt_engine_profiler);
-      dump_trace(compiled_engine->trt_engine_profile_path, *trt_engine_profiler);
+      LOG_INFO(std::endl << *compiled_engine->trt_engine_profiler);
+      dump_trace(compiled_engine->trt_engine_profile_path, *compiled_engine->trt_engine_profiler);
       compiled_engine->dump_engine_layer_info();
     }
   }
-
+  std::cout << "========== 4 ===============" << std::endl;
   return outputs;
 }
 

Original file line number	Diff line number	Diff line change
`@@ -171,7 +171,7 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr`
`171`	`171`	`gpu_handles.push_back(outputs[pyt_idx].data_ptr());`
`172`	`172`	`}`
`173`	`173`	`}`
`174`		`-`
	`174`	`+ std::cout << "========== 3 ===============" << std::endl;`
`175`	`175`	`{`
`176`	`176`	`std::unique_ptr<torch::autograd::profiler::RecordProfile> enqueue_profiler_guard;`
`177`	`177`	`if (compiled_engine->profile_execution) {`
`@@ -183,19 +183,19 @@ std::vector<at::Tensor> execute_engine(std::vector<at::Tensor> inputs, c10::intr`
`183`	`183`
`184`	`184`	`// nvinfer1::IExecutionContext::enqueue is not thread safe and we need a mutex for it.`
`185`	`185`	`std::unique_lock<std::mutex> lock(compiled_engine->mu);`
`186`		`- std::unique_ptr<TRTEngineProfiler> trt_engine_profiler;`
`187`		`- if (compiled_engine->profile_execution) {`
`188`		`- trt_engine_profiler = std::make_unique<TRTEngineProfiler>(compiled_engine->name);`
`189`		`- compiled_engine->exec_ctx->setProfiler(trt_engine_profiler.get());`
`190`		`- }`
	`186`	`+ // std::unique_ptr<TRTEngineProfiler> trt_engine_profiler;`
	`187`	`+ // if (compiled_engine->profile_execution) {`
	`188`	`+ // trt_engine_profiler = std::make_unique<TRTEngineProfiler>(compiled_engine->name);`
	`189`	`+ // compiled_engine->exec_ctx->setProfiler(trt_engine_profiler.get());`
	`190`	`+ // }`
`191`	`191`	`compiled_engine->exec_ctx->enqueueV2(gpu_handles.data(), stream, nullptr);`
`192`	`192`	`if (compiled_engine->profile_execution) {`
`193`		`- LOG_INFO(std::endl << *trt_engine_profiler);`
`194`		`- dump_trace(compiled_engine->trt_engine_profile_path, *trt_engine_profiler);`
	`193`	`+ LOG_INFO(std::endl << *compiled_engine->trt_engine_profiler);`
	`194`	`+ dump_trace(compiled_engine->trt_engine_profile_path, *compiled_engine->trt_engine_profiler);`
`195`	`195`	`compiled_engine->dump_engine_layer_info();`
`196`	`196`	`}`
`197`	`197`	`}`
`198`		`-`
	`198`	`+ std::cout << "========== 4 ===============" << std::endl;`
`199`	`199`	`return outputs;`
`200`	`200`	`}`
`201`	`201`