[Bugfix] fused_experts_impl wrong compute type for float32 (vllm-project#11921)

shaochangxu · mzusman · commit fd92c0c44ca2 · 2025-03-12T11:40:49.000+02:00
Signed-off-by: shaochangxu.scx &lt;shaochangxu.scx@antgroup.com&gt;
Co-authored-by: shaochangxu.scx &lt;shaochangxu.scx@antgroup.com&gt;
diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -701,8 +701,14 @@ def fused_experts_impl(hidden_states: torch.Tensor,
                                       device=hidden_states.device,
                                       dtype=hidden_states.dtype)
 
-    compute_type = (tl.bfloat16
-                    if hidden_states.dtype == torch.bfloat16 else tl.float16)
+    if hidden_states.dtype == torch.bfloat16:
+        compute_type = tl.bfloat16
+    elif hidden_states.dtype == torch.float16:
+        compute_type = tl.float16
+    elif hidden_states.dtype == torch.float32:
+        compute_type = tl.float32
+    else:
+        raise ValueError(f"Unsupported compute_type: {hidden_states.dtype}")
 
     if inplace:
         out_hidden_states = hidden_states