[TPU] add tpu_inference

jcyang43 · jcyang43 · commit 89ad99a18ae3 · 2025-10-21T10:48:45.000-07:00
Signed-off-by: Johnny Yang &lt;johnnyyang@google.com&gt;
diff --git a/requirements/tpu.txt b/requirements/tpu.txt
@@ -12,6 +12,5 @@ ray[data]
 setuptools==78.1.0
 nixl==0.3.0
 tpu_info==0.4.0
-
-# Install torch_xla
-torch_xla[tpu, pallas]==2.8.0
+tpu-inference==0.11.1
+numba
diff --git a/vllm/distributed/device_communicators/tpu_communicator.py b/vllm/distributed/device_communicators/tpu_communicator.py
@@ -97,11 +97,3 @@ def all_reduce(self, input_: torch.Tensor) -> torch.Tensor:
     def all_gather(self, input_: torch.Tensor, dim: int = -1) -> torch.Tensor:
         assert dim == -1, "TPUs only support dim=-1 for all-gather."
         return xm.all_gather(input_, dim=dim)
-
-
-if USE_TPU_INFERENCE:
-    from tpu_inference.distributed.device_communicators import (
-        TpuCommunicator as TpuInferenceCommunicator,
-    )
-
-    TpuCommunicator = TpuInferenceCommunicator  # type: ignore
diff --git a/vllm/v1/worker/tpu_worker.py b/vllm/v1/worker/tpu_worker.py
@@ -351,6 +351,6 @@ def apply_model(self, fn: Callable[[nn.Module], _R]) -> _R:
 
 
 if USE_TPU_INFERENCE:
-    from tpu_inference.worker import TPUWorker as TpuInferenceWorker
+    from tpu_inference.worker.tpu_worker_jax import TPUWorker as TpuInferenceWorker
 
     TPUWorker = TpuInferenceWorker  # type: ignore