add grpo support for third-party devices

ji-huazhong · ji-huazhong · commit 95b6c764025f · 2025-02-10T16:58:22.000+08:00
diff --git a/trl/trainer/grpo_trainer.py b/trl/trainer/grpo_trainer.py
@@ -22,6 +22,7 @@
 import torch
 import torch.utils.data
 import transformers
+from accelerate import PartialState
 from accelerate.utils import broadcast_object_list, gather, gather_object
 from accelerate.utils.other import is_compiled_module
 from datasets import Dataset, IterableDataset
@@ -342,18 +343,20 @@ def data_collator(features):  # No data collation is needed in GRPO
 
             if self.accelerator.is_main_process:
                 vllm_device = self.args.vllm_device
+                device_type = PartialState().default_device.type
+                device_module = getattr(torch, device_type)
                 if vllm_device == "auto":
-                    vllm_device = f"cuda:{self.accelerator.num_processes}"  # take the next GPU idx
+                    vllm_device = f"{device_type}:{self.accelerator.num_processes}"  # take the next GPU idx
                 # Check that the requested device is available
-                if vllm_device.split(":")[0] == "cuda" and int(vllm_device.split(":")[1]) >= torch.cuda.device_count():
+                if vllm_device.split(":")[0] == f"{device_type}" and int(vllm_device.split(":")[1]) >= device_module.device_count():
                     raise ValueError(
                         f"The requested device for vllm ({vllm_device}) is not available. You are likely using vLLM "
                         "without restricting the number of GPUs for training. Set the `--num_processes` argument to a "
                         "value lower than the number of GPUs available on your machine—typically, reducing it by one "
-                        f"is sufficient. In your case: `--num_processes {torch.cuda.device_count() - 1}`."
+                        f"is sufficient. In your case: `--num_processes {device_module.device_count() - 1}`."
                     )
                 # Check that the requested device is not also used for training
-                if vllm_device in {f"cuda:{idx}" for idx in range(self.accelerator.num_processes)}:
+                if vllm_device in {f"{device_type}:{idx}" for idx in range(self.accelerator.num_processes)}:
                     warnings.warn(
                         f"The requested device {vllm_device} is also used for training. This may lead to unexpected "
                         "behavior. It is recommended to use a dedicated device for vLLM."