Skip to content

Commit 85c7aaf

Browse files
committed
Remove unused swap_space parameter
References: #27984
1 parent f7d2946 commit 85c7aaf

File tree

16 files changed

+5
-52
lines changed

16 files changed

+5
-52
lines changed

docs/serving/integrations/llamaindex.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ llm = Vllm(
1717
model="microsoft/Orca-2-7b",
1818
tensor_parallel_size=4,
1919
max_new_tokens=100,
20-
vllm_kwargs={"swap_space": 1, "gpu_memory_utilization": 0.5},
20+
vllm_kwargs={"gpu_memory_utilization": 0.5},
2121
)
2222
```
2323

tests/conftest.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -749,7 +749,6 @@ def __init__(
749749
tensor_parallel_size: int = 1,
750750
block_size: int = 16 if not torch.xpu.is_available() else 64,
751751
enable_chunked_prefill: bool | None = False,
752-
swap_space: int = 4,
753752
enforce_eager: bool | None = False,
754753
# Set this to avoid hanging issue
755754
default_torch_num_threads: int | None = None,
@@ -778,7 +777,6 @@ def __init__(
778777
trust_remote_code=trust_remote_code,
779778
dtype=dtype,
780779
seed=seed,
781-
swap_space=swap_space,
782780
enforce_eager=enforce_eager,
783781
disable_log_stats=disable_log_stats,
784782
tensor_parallel_size=tensor_parallel_size,

tests/distributed/test_torchrun_example.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,14 @@
2222

2323
sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
2424

25-
# set different `gpu_memory_utilization` and `swap_space` for different ranks,
25+
# set different `gpu_memory_utilization` for different ranks,
2626
# to test if all ranks agree on the same kv cache configuration.
2727
llm = LLM(
2828
model="facebook/opt-125m",
2929
tensor_parallel_size=2,
3030
pipeline_parallel_size=int(os.getenv("PP_SIZE", 1)),
3131
distributed_executor_backend="external_launcher",
3232
gpu_memory_utilization=random.uniform(0.7, 0.9),
33-
swap_space=random.randint(1, 4),
3433
seed=0,
3534
)
3635

tests/distributed/test_torchrun_example_moe.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828

2929
sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
3030

31-
# set different `gpu_memory_utilization` and `swap_space` for different ranks,
31+
# set different `gpu_memory_utilization` for different ranks,
3232
# to test if all ranks agree on the same kv cache configuration.
3333
llm = LLM(
3434
model="microsoft/Phi-mini-MoE-instruct",
@@ -37,7 +37,6 @@
3737
enable_expert_parallel=int(os.getenv("ENABLE_EP", "0")) == 1,
3838
distributed_executor_backend="external_launcher",
3939
gpu_memory_utilization=random.uniform(0.7, 0.9),
40-
swap_space=random.randint(1, 4),
4140
seed=0,
4241
)
4342

tests/lora/test_worker.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ def set_active_loras(worker: Worker, lora_requests: list[LoRARequest]):
5252
device_config=DeviceConfig("cuda"),
5353
cache_config=CacheConfig(
5454
block_size=16,
55-
swap_space=0,
5655
cache_dtype="auto",
5756
),
5857
lora_config=LoRAConfig(

tests/v1/attention/utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,6 @@ def create_vllm_config(
172172
cache_config = CacheConfig(
173173
block_size=block_size,
174174
cache_dtype="auto",
175-
swap_space=0,
176175
)
177176
# Set cache blocks for testing
178177
# (these may be set during initialization normally)

tests/v1/core/test_scheduler.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1425,7 +1425,6 @@ def create_scheduler_with_priority(
14251425
cache_config = CacheConfig(
14261426
block_size=block_size,
14271427
gpu_memory_utilization=0.9,
1428-
swap_space=0,
14291428
cache_dtype="auto",
14301429
**kwargs_cache,
14311430
)

tests/v1/core/utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,6 @@ def create_scheduler(
8989
cache_config = CacheConfig(
9090
block_size=block_size,
9191
gpu_memory_utilization=0.9,
92-
swap_space=0,
9392
cache_dtype="auto",
9493
**kwargs_cache,
9594
)

tests/v1/kv_connector/unit/utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,6 @@ def create_vllm_config(
105105
cache_config = CacheConfig(
106106
block_size=block_size,
107107
gpu_memory_utilization=0.9,
108-
swap_space=0,
109108
cache_dtype="auto",
110109
enable_prefix_caching=True,
111110
)

tests/v1/tpu/worker/test_tpu_model_runner.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ def get_vllm_config():
3939
cache_config = CacheConfig(
4040
block_size=16,
4141
gpu_memory_utilization=0.9,
42-
swap_space=0,
4342
cache_dtype="auto",
4443
)
4544
vllm_config = VllmConfig(

0 commit comments

Comments
 (0)