vllm-project · hmellor · Oct 22, 2025 · Oct 21, 2025 · Oct 22, 2025
diff --git a/tests/compile/piecewise/test_toy_llama.py b/tests/compile/piecewise/test_toy_llama.py
@@ -355,13 +355,13 @@ def test_toy_llama(
     )
 
     compile_config_no_compile = CompilationConfig(
-        level=CompilationMode.NONE,
+        mode=CompilationMode.NONE,
         cudagraph_mode=CUDAGraphMode.NONE,
         backend="eager",
     )
 
     compile_config_no_split = CompilationConfig(
-        level=CompilationMode.VLLM_COMPILE,
+        mode=CompilationMode.VLLM_COMPILE,
         use_inductor_graph_partition=use_inductor_graph_partition,
         cudagraph_mode=CUDAGraphMode.PIECEWISE,
         backend=backend,

diff --git a/tests/compile/test_aot_compile.py b/tests/compile/test_aot_compile.py
@@ -38,7 +38,7 @@ def forward(self, x: torch.Tensor):
 def make_vllm_config() -> VllmConfig:
     return VllmConfig(
         compilation_config=CompilationConfig(
-            level=CompilationMode.VLLM_COMPILE,
+            mode=CompilationMode.VLLM_COMPILE,
         )
     )
 

diff --git a/tests/compile/test_config.py b/tests/compile/test_config.py
@@ -168,7 +168,7 @@ def test_splitting_ops_dynamic():
     if is_torch_equal_or_newer("2.9.0.dev"):
         config = VllmConfig(
             compilation_config=CompilationConfig(
-                level=CompilationMode.VLLM_COMPILE,
+                mode=CompilationMode.VLLM_COMPILE,
                 use_inductor_graph_partition=True,
                 splitting_ops=["vllm::unified_attention"],
             )
@@ -180,7 +180,7 @@ def test_splitting_ops_dynamic():
     # When attn_fusion pass enabled, splitting_ops now default to attention ops.
     config = VllmConfig(
         compilation_config=CompilationConfig(
-            level=CompilationMode.VLLM_COMPILE,
+            mode=CompilationMode.VLLM_COMPILE,
             pass_config={"enable_attn_fusion": True, "enable_noop": True},
             custom_ops=["+quant_fp8"],
             cudagraph_mode=CUDAGraphMode.PIECEWISE,
@@ -195,7 +195,7 @@ def test_splitting_ops_dynamic():
     if is_torch_equal_or_newer("2.9.0.dev"):
         config = VllmConfig(
             compilation_config=CompilationConfig(
-                level=CompilationMode.VLLM_COMPILE,
+                mode=CompilationMode.VLLM_COMPILE,
                 use_inductor_graph_partition=True,
                 pass_config={"enable_attn_fusion": True, "enable_noop": True},
                 custom_ops=["+quant_fp8"],

diff --git a/tests/compile/test_full_graph.py b/tests/compile/test_full_graph.py
@@ -198,7 +198,7 @@ def run_model(compile_config: int | CompilationConfig, model: str, **model_kwarg
     compilation_config = (
         compile_config
         if isinstance(compile_config, CompilationConfig)
-        else CompilationConfig(level=compile_config)
+        else CompilationConfig(mode=compile_config)
     )
 
     prompts = [

diff --git a/tests/compile/test_fusions_e2e.py b/tests/compile/test_fusions_e2e.py
@@ -151,7 +151,7 @@ def test_attn_quant(
         cudagraph_mode=mode,
         splitting_ops=splitting_ops,
         # Common
-        level=CompilationMode.VLLM_COMPILE,
+        mode=CompilationMode.VLLM_COMPILE,
         pass_config=PassConfig(enable_attn_fusion=True, enable_noop=True),
         # Inductor caches custom passes by default as well via uuid
         inductor_compile_config={"force_disable_caches": True},
@@ -236,7 +236,7 @@ def test_tp2_attn_quant_allreduce_rmsnorm(
         custom_ops=custom_ops_list,
         splitting_ops=splitting_ops,
         # Common
-        level=CompilationMode.VLLM_COMPILE,
+        mode=CompilationMode.VLLM_COMPILE,
         pass_config=PassConfig(
             enable_attn_fusion=True,
             enable_noop=True,
@@ -273,7 +273,7 @@ def run_model(compile_config: int | CompilationConfig, model: str, **model_kwarg
     compilation_config = (
         compile_config
         if isinstance(compile_config, CompilationConfig)
-        else CompilationConfig(level=compile_config)
+        else CompilationConfig(mode=compile_config)
     )
 
     prompts = [

diff --git a/tests/model_executor/test_enabled_custom_ops.py b/tests/model_executor/test_enabled_custom_ops.py
@@ -36,7 +36,7 @@ class Relu3(ReLUSquaredActivation):
 
 
 @pytest.mark.parametrize(
-    "env, torch_level, backend, ops_enabled, default_on",
+    "env, compilation_mode, backend, ops_enabled, default_on",
     [
         # Default values based on compile level
         # - All by default (no Inductor compilation)
@@ -77,15 +77,15 @@ class Relu3(ReLUSquaredActivation):
 )
 def test_enabled_ops(
     env: str | None,
-    torch_level: int,
+    compilation_mode: int,
     backend: str,
     ops_enabled: list[int],
     default_on: bool,
 ):
     custom_ops = env.split(",") if env else []
     vllm_config = VllmConfig(
         compilation_config=CompilationConfig(
-            backend=backend, level=torch_level, custom_ops=custom_ops
+            backend=backend, mode=compilation_mode, custom_ops=custom_ops
         )
     )
     with set_current_vllm_config(vllm_config):