feat: Add bf16 support to cast converter (#3643)

peri044 · web-flow · commit 6cf72abaae97 · 2025-07-03T17:01:15.000-07:00
diff --git a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py
@@ -1034,6 +1034,7 @@ def validate_dtype(to_copy_node: Node) -> bool:
             torch.bool,
             torch.int8,
             torch.float16,
+            torch.bfloat16,
         }
 
         # Validate input node has convertible kwargs
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/elementwise/ops.py b/py/torch_tensorrt/dynamo/conversion/impl/elementwise/ops.py
@@ -544,9 +544,16 @@ def pow(
     lhs_val: Union[TRTTensor, int, float],
     rhs_val: Union[TRTTensor, int, float],
 ) -> TRTTensor:
+
+    lhs_dtype = None
+    rhs_dtype = None
+    if isinstance(lhs_val, int):
+        lhs_dtype = torch.int32
+    if isinstance(rhs_val, int):
+        rhs_dtype = torch.int32
     # POW operation supports only float32 and int8 inputs
-    lhs_val = get_trt_tensor(ctx, lhs_val, name + "_lhs_val", trt.float32)
-    rhs_val = get_trt_tensor(ctx, rhs_val, name + "_rhs_val", trt.float32)
+    lhs_val = get_trt_tensor(ctx, lhs_val, name + "_lhs_val", lhs_dtype)
+    rhs_val = get_trt_tensor(ctx, rhs_val, name + "_rhs_val", rhs_dtype)
     out = convert_binary_elementwise(
         ctx, target, source_ir, name, trt.ElementWiseOperation.POW, lhs_val, rhs_val
     )
diff --git a/tests/py/dynamo/conversion/harness.py b/tests/py/dynamo/conversion/harness.py
@@ -412,6 +412,7 @@ def run_test(
         propagate_shapes=False,
         int32_reqd=False,
         immutable_weights=True,
+        use_explicit_typing=False,
     ):
         # TODO: lan to remove this and set use_dynamo_traccer to True by default
         # once all the converter test files are moved to use_dynamo_tracer
@@ -422,6 +423,7 @@ def run_test(
             enabled_precisions={dtype._from(precision)},
             truncate_double=True,
             immutable_weights=immutable_weights,
+            use_explicit_typing=use_explicit_typing,
         )
 
         mod = self.generate_graph(
diff --git a/tests/py/dynamo/conversion/test_casts.py b/tests/py/dynamo/conversion/test_casts.py
@@ -64,6 +64,21 @@ def forward(self, x):
             precision=torch.float,
         )
 
+    def test_to_copy_bfloat16(self):
+        class ToCopyBFloat16(nn.Module):
+            def forward(self, x):
+                y = torch.ops.aten._to_copy.default(x, dtype=torch.bfloat16)
+                y = y**2
+                return y
+
+        inputs = [torch.rand((1, 3, 10), dtype=torch.float32)]
+        self.run_test(
+            ToCopyBFloat16(),
+            inputs,
+            precision=torch.float,
+            use_explicit_typing=True,
+        )
+
     def test_to_copy_i64b(self):
         class ToCopy64Bit(nn.Module):
             def forward(self, x):

Original file line number	Diff line number	Diff line change
`@@ -1034,6 +1034,7 @@ def validate_dtype(to_copy_node: Node) -> bool:`
`1034`	`1034`	`torch.bool,`
`1035`	`1035`	`torch.int8,`
`1036`	`1036`	`torch.float16,`
	`1037`	`+ torch.bfloat16,`
`1037`	`1038`	`}`
`1038`	`1039`
`1039`	`1040`	`# Validate input node has convertible kwargs`