Skip to content

Commit 9d104b5

Browse files
[CI/Build] Update Ruff version (vllm-project#8469)
Signed-off-by: Aaron Pham <[email protected]> Co-authored-by: Cyrus Leung <[email protected]>
1 parent 6ffa3f3 commit 9d104b5

27 files changed

+50
-77
lines changed

.github/workflows/ruff.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,10 @@ jobs:
2525
- name: Install dependencies
2626
run: |
2727
python -m pip install --upgrade pip
28-
pip install ruff==0.1.5 codespell==2.3.0 tomli==2.0.1 isort==5.13.2
28+
pip install -r requirements-lint.txt
2929
- name: Analysing the code with ruff
3030
run: |
31-
ruff .
31+
ruff check .
3232
- name: Spelling check with codespell
3333
run: |
3434
codespell --toml pyproject.toml

benchmarks/kernels/graph_machete_bench.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,7 @@
4545
rows = int(math.ceil(len(results) / 2))
4646
fig, axs = plt.subplots(rows, 2, figsize=(12, 5 * rows))
4747
axs = axs.flatten()
48-
axs_idx = 0
49-
for shape, data in results.items():
48+
for axs_idx, (shape, data) in enumerate(results.items()):
5049
plt.sca(axs[axs_idx])
5150
df = pd.DataFrame(data)
5251
sns.lineplot(data=df,
@@ -59,6 +58,5 @@
5958
palette="Dark2")
6059
plt.title(f"Shape: {shape}")
6160
plt.ylabel("time (median, s)")
62-
axs_idx += 1
6361
plt.tight_layout()
6462
plt.savefig("graph_machete_bench.pdf")

format.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ echo 'vLLM codespell: Done'
159159

160160
# Lint specified files
161161
lint() {
162-
ruff "$@"
162+
ruff check "$@"
163163
}
164164

165165
# Lint files that differ from main branch. Ignores dirs that are not slated
@@ -175,7 +175,7 @@ lint_changed() {
175175

176176
if ! git diff --diff-filter=ACM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyi' &>/dev/null; then
177177
git diff --name-only --diff-filter=ACM "$MERGEBASE" -- '*.py' '*.pyi' | xargs \
178-
ruff
178+
ruff check
179179
fi
180180

181181
}

pyproject.toml

+2
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ ignore = [
4242
"E731",
4343
# Loop control variable not used within loop body
4444
"B007",
45+
# f-string format
46+
"UP032",
4547
]
4648

4749
[tool.mypy]

requirements-lint.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
yapf==0.32.0
33
toml==0.10.2
44
tomli==2.0.1
5-
ruff==0.1.5
5+
ruff==0.6.5
66
codespell==2.3.0
77
isort==5.13.2
88
clang-format==18.1.5

tests/conftest.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -158,10 +158,7 @@ def should_do_global_cleanup_after_test(request) -> bool:
158158
to initialize torch.
159159
"""
160160

161-
if request.node.get_closest_marker("skip_global_cleanup"):
162-
return False
163-
164-
return True
161+
return not request.node.get_closest_marker("skip_global_cleanup")
165162

166163

167164
@pytest.fixture(autouse=True)

tests/lora/conftest.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,7 @@ def should_do_global_cleanup_after_test(request) -> bool:
6565
to initialize torch.
6666
"""
6767

68-
if request.node.get_closest_marker("skip_global_cleanup"):
69-
return False
70-
71-
return True
68+
return not request.node.get_closest_marker("skip_global_cleanup")
7269

7370

7471
@pytest.fixture(autouse=True)

tests/multimodal/test_base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
def assert_nested_tensors_equal(expected: NestedTensors,
77
actual: NestedTensors):
8-
assert type(expected) == type(actual)
8+
assert type(expected) == type(actual) # noqa: E721
99
if isinstance(expected, torch.Tensor):
1010
assert torch.equal(expected, actual)
1111
else:

tests/test_cache_block_hashing.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,7 @@ def test_auto_prefix_caching(model: str, block_size: int, max_num_seqs: int,
6666

6767
hashes.append([])
6868
prompts = [prefix + prompt for prompt in sample_prompts]
69-
seq_id = 0
70-
for prompt in prompts:
69+
for seq_id, prompt in enumerate(prompts):
7170
hashes[-1].append([])
7271
prompt_token_ids = tokenizer.encode(prompt)
7372
seq = Sequence(seq_id,
@@ -83,8 +82,6 @@ def test_auto_prefix_caching(model: str, block_size: int, max_num_seqs: int,
8382
for idx in range(num_blocks):
8483
hashes[-1][-1].append(seq.hash_of_block(idx))
8584

86-
seq_id += 1
87-
8885
# Check that hashes made with two prefixes with different first blocks are
8986
# different everywhere.
9087
for hash0, hash1 in zip(flatten_2d(hashes[0]), flatten_2d(hashes[1])):

tests/test_logger.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def test_an_error_is_raised_when_custom_logging_config_file_does_not_exist():
111111
configuration occurs."""
112112
with pytest.raises(RuntimeError) as ex_info:
113113
_configure_vllm_root_logger()
114-
assert ex_info.type == RuntimeError
114+
assert ex_info.type == RuntimeError # noqa: E721
115115
assert "File does not exist" in str(ex_info)
116116

117117

@@ -152,7 +152,7 @@ def test_an_error_is_raised_when_custom_logging_config_is_unexpected_json(
152152
logging_config_file.name):
153153
with pytest.raises(ValueError) as ex_info:
154154
_configure_vllm_root_logger()
155-
assert ex_info.type == ValueError
155+
assert ex_info.type == ValueError # noqa: E721
156156
assert "Invalid logging config. Expected Dict, got" in str(ex_info)
157157

158158

tests/worker/test_encoder_decoder_model_runner.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -453,8 +453,7 @@ def test_prepare_decode(batch_size):
453453
# each sequence) in the decode phase
454454

455455
expected_selected_token_indices = []
456-
selected_token_start_idx = 0
457-
for seq_len in seq_lens:
456+
for selected_token_start_idx, seq_len in enumerate(seq_lens):
458457
# Compute the index offset of the final token in each
459458
# sequence's decoded outputs; since a single token is
460459
# decoded per iteration per sequence, then the length
@@ -463,7 +462,6 @@ def test_prepare_decode(batch_size):
463462
# generated tokens is 0 (i.e. the expected sampling index
464463
# for a given sequence is just `selected_token_start_idx`)
465464
expected_selected_token_indices.append(selected_token_start_idx)
466-
selected_token_start_idx += 1
467465

468466
sampling_metadata = model_input.sampling_metadata
469467
actual = sampling_metadata.selected_token_indices

tests/worker/test_model_runner.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -241,10 +241,8 @@ def test_prepare_decode_cuda_graph(batch_size):
241241

242242
# Verify Sampling
243243
expected_selected_token_indices = []
244-
selected_token_start_idx = 0
245-
for _ in context_lens:
244+
for selected_token_start_idx, _ in enumerate(context_lens):
246245
expected_selected_token_indices.append(selected_token_start_idx)
247-
selected_token_start_idx += 1
248246
sampling_metadata = SamplingMetadata.prepare(
249247
seq_group_metadata_list,
250248
seq_lens,

vllm/adapter_commons/utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def list_adapters(registered_adapters: Dict[int, Any]) -> Dict[int, Any]:
4242

4343
def get_adapter(adapter_id: int,
4444
registered_adapters: Dict[int, Any]) -> Optional[Any]:
45-
return registered_adapters.get(adapter_id, None)
45+
return registered_adapters.get(adapter_id)
4646

4747

4848
## worker functions

vllm/attention/backends/utils.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,8 @@ def is_block_tables_empty(block_tables: Union[None, Dict]):
3333
"""
3434
if block_tables is None:
3535
return True
36-
if isinstance(block_tables, dict) and all(
37-
value is None for value in block_tables.values()):
38-
return True
39-
return False
36+
return (isinstance(block_tables, dict)
37+
and all(value is None for value in block_tables.values()))
4038

4139

4240
def compute_slot_mapping_start_idx(is_prompt: bool, query_len: int,

vllm/core/block/prefix_caching_block.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -417,9 +417,7 @@ def get_prefix_cache_hit_rate(self) -> float:
417417

418418
def is_block_cached(self, block: Block) -> bool:
419419
assert block.content_hash is not None
420-
if block.content_hash in self._cached_blocks:
421-
return True
422-
return False
420+
return block.content_hash in self._cached_blocks
423421

424422
def promote_to_immutable_block(self, block: Block) -> BlockId:
425423
"""Once a mutable block is full, it can be promoted to an immutable

vllm/core/block_manager_v2.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -399,9 +399,7 @@ def can_swap_out(self, seq_group: SequenceGroup) -> bool:
399399
"""
400400
alloc_status = self._can_swap(seq_group, Device.CPU,
401401
SequenceStatus.RUNNING)
402-
if alloc_status == AllocStatus.OK:
403-
return True
404-
return False
402+
return alloc_status == AllocStatus.OK
405403

406404
def swap_out(self, seq_group: SequenceGroup) -> List[Tuple[int, int]]:
407405
"""Returns the block id mapping (from GPU to CPU) generated by

vllm/engine/async_llm_engine.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -826,7 +826,7 @@ async def generate(
826826
request_id: The unique id of the request.
827827
lora_request: LoRA request to use for generation, if any.
828828
trace_headers: OpenTelemetry trace headers.
829-
prompt_adapter_request: Prompt Adapter request to use
829+
prompt_adapter_request: Prompt Adapter request to use
830830
for generation, if any.
831831
832832
Yields:
@@ -1042,15 +1042,15 @@ def remove_logger(self, logger_name: str) -> None:
10421042
async def start_profile(self) -> None:
10431043
# using type instead of isinstance to check to avoid capturing
10441044
# inherited classes
1045-
if type(self.engine.model_executor) == GPUExecutorAsync:
1045+
if type(self.engine.model_executor) == GPUExecutorAsync: # noqa: E721
10461046
self.engine.model_executor.start_profile()
10471047
else:
10481048
self.engine.model_executor._run_workers("start_profile")
10491049

10501050
async def stop_profile(self) -> None:
10511051
# using type instead of isinstance to check to avoid capturing
10521052
# inherited classes
1053-
if type(self.engine.model_executor) == GPUExecutorAsync:
1053+
if type(self.engine.model_executor) == GPUExecutorAsync: # noqa: E721
10541054
self.engine.model_executor.stop_profile()
10551055
else:
10561056
self.engine.model_executor._run_workers("stop_profile")

vllm/engine/llm_engine.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ class LLMEngine:
144144
decoding.
145145
executor_class: The model executor class for managing distributed
146146
execution.
147-
prompt_adapter_config (Optional): The configuration related to serving
147+
prompt_adapter_config (Optional): The configuration related to serving
148148
prompt adapters.
149149
log_stats: Whether to log statistics.
150150
usage_context: Specified entry point, used for usage info collection.
@@ -1605,15 +1605,15 @@ def check_health(self) -> None:
16051605
def start_profile(self) -> None:
16061606
# using type instead of isinstance to check to avoid capturing
16071607
# inherited classes (MultiprocessingGPUExecutor)
1608-
if type(self.model_executor) == GPUExecutor:
1608+
if type(self.model_executor) == GPUExecutor: # noqa: E721
16091609
self.model_executor.start_profile()
16101610
else:
16111611
self.model_executor._run_workers("start_profile")
16121612

16131613
def stop_profile(self) -> None:
16141614
# using type instead of isinstance to check to avoid capturing
16151615
# inherited classes (MultiprocessingGPUExecutor)
1616-
if type(self.model_executor) == GPUExecutor:
1616+
if type(self.model_executor) == GPUExecutor: # noqa: E721
16171617
self.model_executor.stop_profile()
16181618
else:
16191619
self.model_executor._run_workers("stop_profile")

vllm/model_executor/guided_decoding/outlines_logits_processors.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -67,9 +67,9 @@ def __call__(self, input_ids: List[int],
6767
instruction = self._guide.get_next_instruction(
6868
state=self._fsm_state[seq_id])
6969

70-
if type(instruction) == Generate:
70+
if type(instruction) == Generate: # noqa: E721
7171
allowed_tokens = instruction.tokens
72-
elif type(instruction) == Write:
72+
elif type(instruction) == Write: # noqa: E721
7373
# TODO: support fast forward tokens
7474
allowed_tokens = [instruction.tokens[0]]
7575
else:

vllm/model_executor/layers/quantization/awq_marlin.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -110,9 +110,9 @@ def get_scaled_act_names(self) -> List[str]:
110110
def is_awq_marlin_compatible(cls, quant_config: Dict[str, Any]):
111111
# Extract data from quant config.
112112
quant_method = quant_config.get("quant_method", "").lower()
113-
num_bits = quant_config.get("bits", None)
114-
group_size = quant_config.get("group_size", None)
115-
has_zp = quant_config.get("zero_point", None)
113+
num_bits = quant_config.get("bits")
114+
group_size = quant_config.get("group_size")
115+
has_zp = quant_config.get("zero_point")
116116

117117
if quant_method != "awq":
118118
return False

vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Any, Dict, List, Optional
1+
from typing import Any, Dict, List, Optional, cast
22

33
import torch
44
from pydantic import BaseModel
@@ -79,8 +79,8 @@ def get_quant_method(
7979
@classmethod
8080
def from_config(cls, config: Dict[str, Any]) -> "CompressedTensorsConfig":
8181
target_scheme_map: Dict[str, Any] = dict()
82-
ignore: List[str] = config.get("ignore", None)
83-
quant_format: str = config.get("format", None)
82+
ignore = cast(List[str], config.get("ignore"))
83+
quant_format = cast(str, config.get("format"))
8484

8585
# The quant_config has multiple config_groups, each containing
8686
# an input_activations key with details about how the activations are
@@ -200,7 +200,7 @@ def _is_fp8_w8a16(self, weight_quant: BaseModel,
200200
is_per_tensor_or_channel_weight = (weight_quant.strategy in [
201201
QuantizationStrategy.TENSOR, QuantizationStrategy.CHANNEL
202202
])
203-
if not (is_symmetric_weight and is_static_weight
203+
if not (is_symmetric_weight and is_static_weight # noqa: SIM103
204204
and is_per_tensor_or_channel_weight):
205205
return False
206206

@@ -333,7 +333,7 @@ def create_weights(self, layer: torch.nn.Module,
333333
output_size: int, params_dtype: torch.dtype,
334334
**extra_weight_attrs):
335335
"""
336-
Use the CompressedTensorsScheme associated with each layer to create
336+
Use the CompressedTensorsScheme associated with each layer to create
337337
the necessary parameters for the layer. See LinearMethodBase for param
338338
details
339339
"""
@@ -352,8 +352,8 @@ def apply(self,
352352
x: torch.Tensor,
353353
bias: Optional[torch.Tensor] = None):
354354
"""
355-
Use the output of create_weights and the CompressedTensorsScheme
356-
associated with the layer to apply the forward pass with the
355+
Use the output of create_weights and the CompressedTensorsScheme
356+
associated with the layer to apply the forward pass with the
357357
layer input. See LinearMethodBase for param details
358358
359359
"""

vllm/model_executor/layers/quantization/gptq_marlin.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -132,10 +132,10 @@ def get_scaled_act_names(self) -> List[str]:
132132
def is_gptq_marlin_compatible(cls, quant_config: Dict[str, Any]):
133133
# Extract data from quant config.
134134
quant_method = quant_config.get("quant_method", "").lower()
135-
num_bits = quant_config.get("bits", None)
136-
group_size = quant_config.get("group_size", None)
137-
sym = quant_config.get("sym", None)
138-
desc_act = quant_config.get("desc_act", None)
135+
num_bits = quant_config.get("bits")
136+
group_size = quant_config.get("group_size")
137+
sym = quant_config.get("sym")
138+
desc_act = quant_config.get("desc_act")
139139

140140
if quant_method != "gptq":
141141
return False

vllm/model_executor/model_loader/tensorizer.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -408,9 +408,7 @@ def is_vllm_tensorized(tensorizer_config: "TensorizerConfig") -> bool:
408408
"inferred as vLLM models, so setting vllm_tensorized=True is "
409409
"only necessary for models serialized prior to this change.")
410410
return True
411-
if (".vllm_tensorized_marker" in deserializer):
412-
return True
413-
return False
411+
return ".vllm_tensorized_marker" in deserializer
414412

415413

416414
def serialize_vllm_model(

vllm/model_executor/models/minicpmv.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -884,7 +884,7 @@ def __new__(
884884
version = str(config.version).split(".")
885885
version = tuple([int(x) for x in version])
886886
# Dispatch class based on version
887-
instance_class = _SUPPORT_VERSION.get(version, None)
887+
instance_class = _SUPPORT_VERSION.get(version)
888888
if instance_class is None:
889889
raise ValueError(
890890
"Currently, MiniCPMV only supports versions 2.0, 2.5, and 2.6")

vllm/spec_decode/draft_model_runner.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -183,10 +183,7 @@ def supports_gpu_multi_step(self, execute_model_req: ExecuteModelRequest):
183183
return False
184184

185185
# TODO: Add soft-tuning prompt adapter support
186-
if self.prompt_adapter_config:
187-
return False
188-
189-
return True
186+
return not self.prompt_adapter_config
190187

191188
@torch.inference_mode()
192189
def execute_model(

vllm/spec_decode/metrics.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -104,13 +104,10 @@ def _should_collect_rejsample_metrics(self, now: float) -> bool:
104104
if self._rank != 0:
105105
return False
106106

107-
if (now - self._last_metrics_collect_time <
108-
self._rejsample_metrics_collect_interval_s):
109-
return False
110-
return True
107+
return now - self._last_metrics_collect_time >= self._rejsample_metrics_collect_interval_s # noqa: E501
111108

112109
def _copy_rejsample_metrics_async(self) -> torch.cuda.Event:
113-
"""Copy rejection/typical-acceptance sampling metrics
110+
"""Copy rejection/typical-acceptance sampling metrics
114111
(number of accepted tokens, etc) to CPU asynchronously.
115112
116113
Returns a CUDA event recording when the copy is complete.

0 commit comments

Comments
 (0)