Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions tests/compile/piecewise/test_toy_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,13 +355,13 @@ def test_toy_llama(
)

compile_config_no_compile = CompilationConfig(
level=CompilationMode.NONE,
mode=CompilationMode.NONE,
cudagraph_mode=CUDAGraphMode.NONE,
backend="eager",
)

compile_config_no_split = CompilationConfig(
level=CompilationMode.VLLM_COMPILE,
mode=CompilationMode.VLLM_COMPILE,
use_inductor_graph_partition=use_inductor_graph_partition,
cudagraph_mode=CUDAGraphMode.PIECEWISE,
backend=backend,
Expand Down
2 changes: 1 addition & 1 deletion tests/compile/test_aot_compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def forward(self, x: torch.Tensor):
def make_vllm_config() -> VllmConfig:
return VllmConfig(
compilation_config=CompilationConfig(
level=CompilationMode.VLLM_COMPILE,
mode=CompilationMode.VLLM_COMPILE,
)
)

Expand Down
6 changes: 3 additions & 3 deletions tests/compile/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def test_splitting_ops_dynamic():
if is_torch_equal_or_newer("2.9.0.dev"):
config = VllmConfig(
compilation_config=CompilationConfig(
level=CompilationMode.VLLM_COMPILE,
mode=CompilationMode.VLLM_COMPILE,
use_inductor_graph_partition=True,
splitting_ops=["vllm::unified_attention"],
)
Expand All @@ -180,7 +180,7 @@ def test_splitting_ops_dynamic():
# When attn_fusion pass enabled, splitting_ops now default to attention ops.
config = VllmConfig(
compilation_config=CompilationConfig(
level=CompilationMode.VLLM_COMPILE,
mode=CompilationMode.VLLM_COMPILE,
pass_config={"enable_attn_fusion": True, "enable_noop": True},
custom_ops=["+quant_fp8"],
cudagraph_mode=CUDAGraphMode.PIECEWISE,
Expand All @@ -195,7 +195,7 @@ def test_splitting_ops_dynamic():
if is_torch_equal_or_newer("2.9.0.dev"):
config = VllmConfig(
compilation_config=CompilationConfig(
level=CompilationMode.VLLM_COMPILE,
mode=CompilationMode.VLLM_COMPILE,
use_inductor_graph_partition=True,
pass_config={"enable_attn_fusion": True, "enable_noop": True},
custom_ops=["+quant_fp8"],
Expand Down
2 changes: 1 addition & 1 deletion tests/compile/test_full_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def run_model(compile_config: int | CompilationConfig, model: str, **model_kwarg
compilation_config = (
compile_config
if isinstance(compile_config, CompilationConfig)
else CompilationConfig(level=compile_config)
else CompilationConfig(mode=compile_config)
)

prompts = [
Expand Down
6 changes: 3 additions & 3 deletions tests/compile/test_fusions_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def test_attn_quant(
cudagraph_mode=mode,
splitting_ops=splitting_ops,
# Common
level=CompilationMode.VLLM_COMPILE,
mode=CompilationMode.VLLM_COMPILE,
pass_config=PassConfig(enable_attn_fusion=True, enable_noop=True),
# Inductor caches custom passes by default as well via uuid
inductor_compile_config={"force_disable_caches": True},
Expand Down Expand Up @@ -236,7 +236,7 @@ def test_tp2_attn_quant_allreduce_rmsnorm(
custom_ops=custom_ops_list,
splitting_ops=splitting_ops,
# Common
level=CompilationMode.VLLM_COMPILE,
mode=CompilationMode.VLLM_COMPILE,
pass_config=PassConfig(
enable_attn_fusion=True,
enable_noop=True,
Expand Down Expand Up @@ -273,7 +273,7 @@ def run_model(compile_config: int | CompilationConfig, model: str, **model_kwarg
compilation_config = (
compile_config
if isinstance(compile_config, CompilationConfig)
else CompilationConfig(level=compile_config)
else CompilationConfig(mode=compile_config)
)

prompts = [
Expand Down
6 changes: 3 additions & 3 deletions tests/model_executor/test_enabled_custom_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class Relu3(ReLUSquaredActivation):


@pytest.mark.parametrize(
"env, torch_level, backend, ops_enabled, default_on",
"env, compilation_mode, backend, ops_enabled, default_on",
[
# Default values based on compile level
# - All by default (no Inductor compilation)
Expand Down Expand Up @@ -77,15 +77,15 @@ class Relu3(ReLUSquaredActivation):
)
def test_enabled_ops(
env: str | None,
torch_level: int,
compilation_mode: int,
backend: str,
ops_enabled: list[int],
default_on: bool,
):
custom_ops = env.split(",") if env else []
vllm_config = VllmConfig(
compilation_config=CompilationConfig(
backend=backend, level=torch_level, custom_ops=custom_ops
backend=backend, mode=compilation_mode, custom_ops=custom_ops
)
)
with set_current_vllm_config(vllm_config):
Expand Down