pytorch · SandSnip3r · Jan 8, 2026 · Jan 8, 2026 · Jan 13, 2026 · Jan 15, 2026
diff --git a/.github/scripts/generate-release-matrix.py b/.github/scripts/generate-release-matrix.py
@@ -12,7 +12,7 @@
     "wheel": ["3.10", "3.11", "3.12", "3.13"],
     "tarball": ["3.11"],
 }
-sbsa_container_image: str = "quay.io/pypa/manylinux_2_34_aarch64"
+sbsa_container_image: str = "quay.io/pypa/manylinux_2_39_aarch64"
 
 CXX11_TARBALL_CONTAINER_IMAGE = {
     "cu130": "pytorch/libtorch-cxx11-builder:cuda13.0-main",

diff --git a/.github/workflows/build_linux.yml b/.github/workflows/build_linux.yml
@@ -413,5 +413,5 @@ jobs:
       PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
 
 concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{inputs.is-release-wheel}}-${{inputs.is-release-tarball}}-${{inputs.use-rtx}}-${{inputs.architecture}}-${{inputs.is-jetpack}}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{inputs.is-release-wheel}}-${{inputs.is-release-tarball}}-${{inputs.use-rtx}}-${{inputs.architecture}}-${{inputs.is-jetpack}}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ startsWith(github.ref, 'refs/tags/') && github.ref_name || 'no-tag' }}
   cancel-in-progress: true
diff --git a/.github/workflows/build_windows.yml b/.github/workflows/build_windows.yml
@@ -438,5 +438,5 @@ jobs:
       architecture: ${{ inputs.architecture }}
 
 concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ inputs.repository }}-${{ inputs.is-release-wheel }}-${{ inputs.is-release-tarball }}-${{ github.event_name == 'workflow_dispatch' }}
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ inputs.repository }}-${{ inputs.is-release-wheel }}-${{ inputs.is-release-tarball }}-${{ github.event_name == 'workflow_dispatch' }}-${{ startsWith(github.ref, 'refs/tags/') && github.ref_name || 'no-tag' }}
   cancel-in-progress: true
diff --git a/.github/workflows/release-linux-aarch64.yml b/.github/workflows/release-linux-aarch64.yml
@@ -1,6 +1,7 @@
 name: Release aarch64 Linux wheels and tarball artifacts
 
 on:
+  pull_request:
   push:
     tags:
       # NOTE: Binary build pipelines should only get triggered on release candidate builds
@@ -128,5 +129,5 @@ jobs:
       architecture: "aarch64"
 
 concurrency:
-  group: ${{ github.workflow }}-aarch64-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }}
+  group: ${{ github.workflow }}-aarch64-release-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }}
   cancel-in-progress: true
diff --git a/.github/workflows/release-linux-x86_64.yml b/.github/workflows/release-linux-x86_64.yml
@@ -126,5 +126,5 @@ jobs:
       is-release-wheel: true
 
 concurrency:
-  group: ${{ github.workflow }}-x86_64-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }}
+  group: ${{ github.workflow }}-x86_64-release-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }}
   cancel-in-progress: true
diff --git a/core/runtime/TRTEngine.cpp b/core/runtime/TRTEngine.cpp
@@ -142,6 +142,9 @@ TRTEngine::TRTEngine(
   }
   TORCHTRT_CHECK((exec_ctx.get() != nullptr), "Unable to create TensorRT execution context");
 
+  // Pre-allocate placeholder for empty tensors (TensorRT requires non-null addresses)
+  cudaMalloc(&empty_tensor_placeholder, 1);
+
   runtime_states.old_cudagraphs = CUDAGRAPHS_MODE;
   runtime_states.old_pre_allocated_outputs = false;
   runtime_states.context_changed = false;
@@ -264,6 +267,9 @@ TRTEngine::~TRTEngine() {
   trt_engine_profiler.reset();
   exec_ctx.reset();
   cuda_engine.reset();
+  if (empty_tensor_placeholder) {
+    cudaFree(empty_tensor_placeholder);
+  }
   rt.reset();
 }
 
@@ -315,7 +321,7 @@ void TRTEngine::set_profile_format(std::string format) {
 }
 
 std::string TRTEngine::get_engine_layer_info() {
-  auto inspector = cuda_engine->createEngineInspector();
+  auto inspector = make_trt(cuda_engine->createEngineInspector());
   return inspector->getEngineInformation(nvinfer1::LayerInformationFormat::kJSON);
 }
 

diff --git a/core/runtime/TRTEngine.h b/core/runtime/TRTEngine.h
@@ -187,6 +187,9 @@ struct TRTEngine : torch::CustomClassHolder {
   bool use_pre_allocated_outputs = false;
   std::vector<at::Tensor> pre_allocated_outputs;
 
+  // Single placeholder buffer for empty tensor inputs (allocated once, reused)
+  void* empty_tensor_placeholder = nullptr;
+
   // Output Allocator-Related Functionality
   bool requires_output_allocator = false; // engine requires output allocator
   bool use_output_allocator_outputs = false; // users specify to use output allocator

diff --git a/core/runtime/execute_engine.cpp b/core/runtime/execute_engine.cpp
@@ -149,18 +149,26 @@ void setup_input_tensors(
       TORCHTRT_CHECK(
           compiled_engine->exec_ctx->setInputShape(name.c_str(), dims), "Error while setting the input shape");
 
+      at::Tensor final_input;
       if (cudagraphs_enabled) {
         // If using CUDAGraphs copy formatted input to the corresponding persistent input buffer
         compiled_engine->input_buffers[i].copy_(formatted_inputs.back(), true);
-        TORCHTRT_CHECK(
-            compiled_engine->exec_ctx->setTensorAddress(name.c_str(), compiled_engine->input_buffers[i].data_ptr()),
-            "Error while setting the input tensor address for inputs");
+        final_input = compiled_engine->input_buffers[i];
       } else {
         // Otherwise use the formatted buffer directly
-        TORCHTRT_CHECK(
-            compiled_engine->exec_ctx->setTensorAddress(name.c_str(), formatted_inputs.back().data_ptr()),
-            "Error while setting the input tensor address for inputs");
+        final_input = formatted_inputs.back();
       }
+
+      // Get tensor address, using placeholder for empty tensors
+      // TensorRT requires non-null address even if numel() = 0
+      // empty_tensor_placeholder is pre-allocated in TRTEngine constructor
+      void* input_addr = (final_input.numel() == 0 || final_input.data_ptr() == nullptr)
+          ? compiled_engine->empty_tensor_placeholder
+          : final_input.data_ptr();
+
+      TORCHTRT_CHECK(
+          compiled_engine->exec_ctx->setTensorAddress(name.c_str(), input_addr),
+          "Failed to bind tensor address for " << name);
     }
   }
 }

diff --git a/cpp/bin/torchtrtc/main.cpp b/cpp/bin/torchtrtc/main.cpp
@@ -356,6 +356,7 @@ int main(int argc, char** argv) {
   }
 
   if (enabled_precisions) {
+    compile_settings.enabled_precisions.clear();
     for (const auto& precision : args::get(enabled_precisions)) {
       auto dtype = torchtrtc::parserutil::parse_dtype(precision);
       if (dtype == torchtrt::DataType::kFloat) {

diff --git a/examples/dynamo/compile_with_dynamic_inputs.py b/examples/dynamo/compile_with_dynamic_inputs.py
@@ -0,0 +1,58 @@
+import logging
+
+import torch
+import torch.nn as nn
+import torch_tensorrt
+
+logging.basicConfig(level=logging.DEBUG)
+
+torch.manual_seed(0)
+
+
+class ExpandReshapeModel(nn.Module):
+    def __init__(self, embed_dim: int):
+        super().__init__()
+        self.cls_token = nn.Parameter(torch.randn(1, 1, embed_dim))
+        self.embed_dim = embed_dim
+        self.qkv_proj = nn.Linear(self.embed_dim, self.embed_dim * 3)
+
+    def forward(self, x: torch.Tensor):
+        batch_size = x.shape[0]
+        cls_token = self.cls_token.expand(batch_size, -1, -1)
+        x = torch.cat([cls_token, x], dim=1)
+        x = self.qkv_proj(x)
+        reshaped_qkv = x.reshape(batch_size, x.size(1), 3, 12, -1)
+        return reshaped_qkv
+
+
+model = ExpandReshapeModel(embed_dim=768).cuda().eval()
+x = torch.randn(4, 196, 768).cuda()
+
+# 1. JIT: torch.compile
+x1 = x.clone()
+torch._dynamo.mark_dynamic(x1, index=0, min=2, max=32)
+trt_module = torch.compile(model, backend="tensorrt")
+out1 = trt_module(x1)
+
+# 2. AOT: torch_tensorrt.compile
+x2 = x.clone()
+example_input = torch_tensorrt.Input(
+    min_shape=[1, 196, 768],
+    opt_shape=[4, 196, 768],
+    max_shape=[32, 196, 768],
+    dtype=torch.float32,
+)
+trt_module = torch_tensorrt.compile(model, ir="dynamo", inputs=example_input)
+out2 = trt_module(x2)
+
+# 3. AOT: torch.export + Dynamo compile
+x3 = x.clone()
+bs = torch.export.Dim("bs", min=1, max=32)
+dynamic_shapes = {"x": {0: bs}}
+exp_program = torch.export.export(model, (x3,), dynamic_shapes=dynamic_shapes)
+trt_module = torch_tensorrt.dynamo.compile(exp_program, (x3,))
+out3 = trt_module(x3)
+
+assert torch.allclose(out1, out2)
+assert torch.allclose(out1, out3)
+assert torch.allclose(out2, out3)
diff --git a/examples/dynamo/torch_compile_resnet_example.py b/examples/dynamo/torch_compile_resnet_example.py
@@ -48,6 +48,7 @@
     model,
     ir="torch_compile",
     inputs=inputs,
+    use_explicit_typing=False,
     enabled_precisions=enabled_precisions,
     workspace_size=workspace_size,
     min_block_size=min_block_size,
@@ -86,6 +87,7 @@
     model,
     ir="torch_compile",
     inputs=inputs_bs8,
+    use_explicit_typing=False,
     enabled_precisions=enabled_precisions,
     workspace_size=workspace_size,
     min_block_size=min_block_size,
@@ -111,6 +113,7 @@
             dtype=torch.half,
         )
     ],
+    "use_explicit_typing": False,
     "enabled_precisions": enabled_precisions,
     "ir": "dynamo",
 }

diff --git a/py/torch_tensorrt/_Input.py b/py/torch_tensorrt/_Input.py
@@ -1,11 +1,14 @@
 from __future__ import annotations
 
+import logging
 from enum import Enum
 from typing import Any, Dict, List, Optional, Sequence, Tuple
 
 import torch
 from torch_tensorrt._enums import dtype, memory_format
 
+logger = logging.getLogger(__name__)
+
 
 class Input(object):
     """
@@ -149,6 +152,16 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
                 }
                 self.shape_mode = Input._ShapeMode.DYNAMIC
 
+                # Warn if min_shape has any 0 dimension (empty tensor) - TensorRT doesn't support this
+                # @apbose: Is this warning necessary?
+                if any(dim == 0 for dim in self.shape["min_shape"]):
+                    logger.warning(
+                        f"min_shape contains a 0 dimension: {self.shape['min_shape']}. "
+                        "TensorRT does not support dynamic shapes with min dimension of 0 (empty tensors). "
+                        "TensorRT will internally clamp min dimensions to 1, which may cause runtime errors "
+                        "if you try to run inference with empty tensor inputs."
+                    )
+
         else:
             raise ValueError(
                 f"Unexpected number of positional arguments for class Input \n    Found {len(args)} arguments, expected either zero or a single positional arguments"
@@ -384,7 +397,7 @@ def example_tensor(
                         dtype=self.dtype.to(torch.dtype, use_default=True)
                     )
                 else:
-                    RuntimeError(
+                    raise RuntimeError(
                         f"Input shape is dynamic but shapes are not provided as sequence (found: {self.shape})"
                     )
         else:
@@ -412,4 +425,3 @@ def example_tensor(
                 raise ValueError(
                     "Requested an example tensor from a dynamic shaped input but did not specific which profile field to use."
                 )
-        raise
diff --git a/py/torch_tensorrt/_utils.py b/py/torch_tensorrt/_utils.py
@@ -76,6 +76,12 @@ def is_tegra_platform() -> bool:
     return False
 
 
+def is_orin() -> bool:
+    if torch.cuda.get_device_capability() in [(8, 7)]:
+        return True
+    return False
+
+
 def is_thor() -> bool:
     if torch.cuda.get_device_capability() in [(11, 0)]:
         return True

diff --git a/py/torch_tensorrt/dynamo/conversion/_ConverterRegistry.py b/py/torch_tensorrt/dynamo/conversion/_ConverterRegistry.py
@@ -20,7 +20,6 @@
 
 import tensorrt as trt
 import torch
-import torch_tensorrt
 from torch import SymBool, SymFloat, SymInt
 from torch._ops import OpOverloadPacket
 from torch.fx.node import Argument, Node, Target, _get_qualified_name
@@ -536,7 +535,7 @@ def __contains__(self, key: Target | Node) -> bool:
     def get_all_converters_with_target(
         self, key: Target, return_registry_info: bool = False
     ) -> Tuple[
-        Union[List[Any], Dict[str, int], None]
+        List[Any], Optional[Dict[str, int]]
     ]:  # TODO: Narrow to ConverterImplSignature this when we can remove FX converters
         """Get all converters across all registries for the target
 
@@ -547,7 +546,7 @@ def get_all_converters_with_target(
 
         # Store count of number of registered converters per registry
         if return_registry_info:
-            registry_data = {name: 0 for name in self.registry_names}
+            registry_data = dict.fromkeys(self.registry_names, 0)
 
         for index, registry in enumerate(self.registries):
             if key in registry:
@@ -622,22 +621,18 @@ def display_all_available_converters(self) -> str:
         return available_converters
 
 
-# Initialize dynamo converter registry with the FX and Dynamo aten registries
-# Note the Dynamo registry is listed first, for precedence
-registries = [
-    DYNAMO_ATEN_CONVERTERS,
+# Initialize dynamo converter registry with Dynamo aten converters only
+# FX converters are not loaded here - they are legacy and should only be used
+# in the FX frontend, not as fallbacks in the dynamo frontend
+registries: List[
+    Dict[Target, Union[Callable[..., Any], Sequence[ConverterSupport]]]
+] = [
+    DYNAMO_ATEN_CONVERTERS,  # type: ignore[list-item]
 ]
 registry_names = ["Dynamo ATen Converters Registry"]
 registry_calling_conventions = [
     CallingConvention.CTX,
 ]
-if torch_tensorrt.ENABLED_FEATURES.fx_frontend:
-    from torch_tensorrt.fx.converter_registry import CONVERTERS as FX_CONVERTERS
-
-    registries.append(FX_CONVERTERS)
-    registry_names.append("FX Legacy ATen Converters Registry")
-    registry_calling_conventions.append(CallingConvention.LEGACY)
-
 
 DYNAMO_CONVERTERS: ConverterRegistry = ConverterRegistry(
     registries,