chore: Add note about Cuda Driver Error

gs-olive · gs-olive · commit 7e5f96ce48b5 · 2023-07-12T10:29:03.000-07:00
- Update arguments to Dynamo compile call in line with new schema
updates
diff --git a/examples/dynamo/dynamo_compile_advanced_usage.py b/examples/dynamo/dynamo_compile_advanced_usage.py
@@ -9,7 +9,6 @@
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 import torch
-from torch_tensorrt.fx.lower_setting import LowerPrecision
 
 # %%
 
@@ -68,12 +67,12 @@ def forward(self, x: torch.Tensor, y: torch.Tensor):
 # For accepted backend options, see the CompilationSettings dataclass:
 # py/torch_tensorrt/dynamo/backend/_settings.py
 backend_kwargs = {
-    "precision": LowerPrecision.FP16,
+    "enabled_precisions": {torch.half},
     "debug": True,
     "min_block_size": 2,
     "torch_executed_ops": {"torch.ops.aten.sub.Tensor"},
     "optimization_level": 4,
-    "use_experimental_rt": True,
+    "use_python_runtime": False,
 }
 
 # Run the model on an input to cause compilation, as so:
@@ -89,5 +88,13 @@ def forward(self, x: torch.Tensor, y: torch.Tensor):
 # Finally, we use Torch utilities to clean up the workspace
 torch._dynamo.reset()
 
-with torch.no_grad():
-    torch.cuda.empty_cache()
+# %%
+# Cuda Driver Error Note
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+#
+# Occasionally, upon exiting the Python runtime after Dynamo compilation with `torch_tensorrt`,
+# one may encounter a Cuda Driver Error. This issue is related to https://github.com/NVIDIA/TensorRT/issues/2052
+# and can be resolved by wrapping the compilation/inference in a function and using a scoped call, as in::
+#
+#       if __name__ == '__main__':
+#           compile_engine_and_infer()
diff --git a/examples/dynamo/dynamo_compile_resnet_example.py b/examples/dynamo/dynamo_compile_resnet_example.py
@@ -33,7 +33,7 @@
 
 # Maximum number of TRT Engines
 # (Lower value allows more graph segmentation)
-min_block_size = 3
+min_block_size = 7
 
 # Operations to Run in Torch, regardless of converter support
 torch_executed_ops = {}
@@ -78,5 +78,13 @@
 # Finally, we use Torch utilities to clean up the workspace
 torch._dynamo.reset()
 
-with torch.no_grad():
-    torch.cuda.empty_cache()
+# %%
+# Cuda Driver Error Note
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+#
+# Occasionally, upon exiting the Python runtime after Dynamo compilation with `torch_tensorrt`,
+# one may encounter a Cuda Driver Error. This issue is related to https://github.com/NVIDIA/TensorRT/issues/2052
+# and can be resolved by wrapping the compilation/inference in a function and using a scoped call, as in::
+#
+#       if __name__ == '__main__':
+#           compile_engine_and_infer()
diff --git a/examples/dynamo/dynamo_compile_transformers_example.py b/examples/dynamo/dynamo_compile_transformers_example.py
@@ -37,7 +37,7 @@
 
 # Maximum number of TRT Engines
 # (Lower value allows more graph segmentation)
-min_block_size = 3
+min_block_size = 7
 
 # Operations to Run in Torch, regardless of converter support
 torch_executed_ops = {}
@@ -88,5 +88,13 @@
 # Finally, we use Torch utilities to clean up the workspace
 torch._dynamo.reset()
 
-with torch.no_grad():
-    torch.cuda.empty_cache()
+# %%
+# Cuda Driver Error Note
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+#
+# Occasionally, upon exiting the Python runtime after Dynamo compilation with `torch_tensorrt`,
+# one may encounter a Cuda Driver Error. This issue is related to https://github.com/NVIDIA/TensorRT/issues/2052
+# and can be resolved by wrapping the compilation/inference in a function and using a scoped call, as in::
+#
+#       if __name__ == '__main__':
+#           compile_engine_and_infer()