removing the try except block in examples

apbose · apbose · commit 59b8d3bf646e · 2025-07-01T00:36:18.000-07:00
diff --git a/examples/distributed_inference/tensor_parallel_rotary_embedding.py b/examples/distributed_inference/tensor_parallel_rotary_embedding.py
@@ -28,7 +28,7 @@
 
 """
 This example covers the rotary embedding in Llama3 model and is derived from https://lightning.ai/lightning-ai/studios/tensor-parallelism-supercharging-large-model-training-with-pytorch-lightning
-Command to run with single GPU: mpirun -n 1 --allow-run-as-root python tensor_parallel_rotary_embedding.pyx
+Command to run with single GPU: mpirun -n 1 --allow-run-as-root python tensor_parallel_rotary_embedding.py
 """
 
 BATCH = 2
@@ -49,22 +49,11 @@
 
     model = torch.compile(model, backend="torch_tensorrt")
 
-    try:
-        for i in range(15):
-            # seeding with dp_rank to ensure identical inputs for TP groups
-            torch.manual_seed(i)
-            start = time.time()
-            output = model(x)
-            end = time.time()
-            if i == 0:
-                logger.info(f"Compilation time is {end-start}")
-                assert (
-                    python_result - output
-                ).std() < 0.01, "Compilation result is not correct."
-            elif _rank == 0:
-                logger.info(f"Inference time is {end-start}")
-    except Exception as e:
-        logger.error(f"Error: {e}")
-        raise e
-    finally:
-        cleanup_distributed_env()
+    torch.manual_seed(0)
+    start = time.time()
+    output = model(x)
+    end = time.time()
+    logger.info(f"Compilation time is {end-start}")
+    assert (python_result - output).std() < 0.01, "Compilation result is not correct."
+
+    cleanup_distributed_env()
diff --git a/examples/distributed_inference/tensor_parallel_simple_example.py b/examples/distributed_inference/tensor_parallel_simple_example.py
@@ -103,22 +103,15 @@ def forward(self, x):
     dynamic=None,
 )
 
-try:
-    for i in range(10):
-        # For TP, input needs to be same across all TP ranks.
-        # Setting the random seed is to mimic the behavior of dataloader.
-        torch.manual_seed(i)
-        inp = torch.rand(20, 10, device="cuda")
-        start = time.time()
-        output = tp_model(inp)
-        end = time.time()
-        if i == 0:
-            logger.info(f"Compilation time is {end-start}")
-            assert (
-                python_result - output
-            ).std() < 0.01, "Compilation result is not correct."
-        elif _rank == 0:
-            logger.info(f"Inference time is {end-start}")
-finally:
-    # This cleans up the distributed process group
-    cleanup_distributed_env()
+# For TP, input needs to be same across all TP ranks.
+# Setting the random seed is to mimic the behavior of dataloader.
+torch.manual_seed(0)
+inp = torch.rand(20, 10, device="cuda")
+start = time.time()
+output = tp_model(inp)
+end = time.time()
+logger.info(f"Compilation time is {end - start}")
+assert (python_result - output).std() < 0.01, "Result is not correct."
+
+# This cleans up the distributed process group
+cleanup_distributed_env()
diff --git a/tests/py/dynamo/lowering/test_aten_lowering_passes.py b/tests/py/dynamo/lowering/test_aten_lowering_passes.py
@@ -313,7 +313,6 @@ def forward(self, x):
             inputs,
             min_block_size=1,
             pass_through_build_failures=True,
-            debug=True,
         )
         optimized_model_results = optimized_model(*inputs)[0].detach().cpu()
         torch_model_results = model(*inputs)[0].detach().cpu()

Original file line number	Diff line number	Diff line change
`@@ -313,7 +313,6 @@ def forward(self, x):`
`313`	`313`	`inputs,`
`314`	`314`	`min_block_size=1,`
`315`	`315`	`pass_through_build_failures=True,`
`316`		`- debug=True,`
`317`	`316`	`)`
`318`	`317`	`optimized_model_results = optimized_model(*inputs)[0].detach().cpu()`
`319`	`318`	`torch_model_results = model(*inputs)[0].detach().cpu()`