Skip to content

Commit a0dce93

Browse files
authored
[Misc] Add compressed-tensors to optimized quant list (vllm-project#7006)
1 parent 35e9c12 commit a0dce93

File tree

1 file changed

+6
-4
lines changed

1 file changed

+6
-4
lines changed

vllm/config.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -197,13 +197,17 @@ def _verify_embedding_mode(self) -> None:
197197
def _parse_quant_hf_config(self):
198198
quant_cfg = getattr(self.hf_config, "quantization_config", None)
199199
if quant_cfg is None:
200-
# compress-tensors uses a "compression_config" key
200+
# compressed-tensors uses a "compression_config" key
201201
quant_cfg = getattr(self.hf_config, "compression_config", None)
202202
return quant_cfg
203203

204204
def _verify_quantization(self) -> None:
205205
supported_quantization = [*QUANTIZATION_METHODS]
206206
rocm_supported_quantization = ["gptq", "squeezellm"]
207+
optimized_quantization_methods = [
208+
"fp8", "marlin", "gptq_marlin_24", "gptq_marlin", "awq_marlin",
209+
"fbgemm_fp8", "compressed_tensors", "compressed-tensors"
210+
]
207211
if self.quantization is not None:
208212
self.quantization = self.quantization.lower()
209213

@@ -242,9 +246,7 @@ def _verify_quantization(self) -> None:
242246
raise ValueError(
243247
f"{self.quantization} quantization is currently not "
244248
f"supported in ROCm.")
245-
if (self.quantization
246-
not in ("fp8", "marlin", "gptq_marlin_24", "gptq_marlin",
247-
"awq_marlin", "fbgemm_fp8", "compressed_tensors")):
249+
if self.quantization not in optimized_quantization_methods:
248250
logger.warning(
249251
"%s quantization is not fully "
250252
"optimized yet. The speed can be slower than "

0 commit comments

Comments
 (0)