From 0ad8c54ee0fd1da26edcd8f88fe4e059117c0e7c Mon Sep 17 00:00:00 2001 From: David Hall Date: Wed, 29 Jan 2025 13:12:51 -0800 Subject: [PATCH] you kill, not cancel, actors (#871) --- src/levanter/infra/ray_tpu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/levanter/infra/ray_tpu.py b/src/levanter/infra/ray_tpu.py index 86ce4223a..fa8970e4a 100644 --- a/src/levanter/infra/ray_tpu.py +++ b/src/levanter/infra/ray_tpu.py @@ -172,7 +172,7 @@ def do_run(self, remote_fn, coordinator_ip, slice_id, num_slices) -> _TpuRunResu logger.exception(e) for actor in actors: try: - ray.cancel(actor) + ray.kill(actor) except Exception: logger.exception("Failed to kill actor after primary failure") return futures