Skip to content

Commit 1e27fe7

Browse files
committed
Fixed issue of quantizer_compressed
Signed-off-by: Amit Raj <[email protected]>
1 parent 6b06ea2 commit 1e27fe7

File tree

1 file changed

+16
-3
lines changed

1 file changed

+16
-3
lines changed

QEfficient/transformers/quantizers/quantizer_compressed_tensors.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -130,12 +130,20 @@ def forward(self, x):
130130

131131
class QEffFP8Config(QuantizationConfigMixin):
132132
def __init__(
133-
self, quant_method: str, activation_scheme: str, ignored_layers: List[str] = None, kv_cache_scheme: str = None
133+
self,
134+
quant_method: str,
135+
activation_scheme: str,
136+
ignored_layers: List[str] = None,
137+
kv_cache_scheme: str = None,
138+
run_compressed: bool = True,
134139
):
135140
self.quant_method = quant_method
136141
self.activation_scheme = activation_scheme
137142
self.ignored_layers = ignored_layers
138143
self.kv_cache_scheme = kv_cache_scheme
144+
self.run_compressed = run_compressed
145+
self.quantization_config = None
146+
self.sparsity_config = None
139147
if kv_cache_scheme:
140148
logger.warning(
141149
f"kv_cache_scheme={kv_cache_scheme} will be ignored please use `mxint8_kv_cache=True` during compile call if you want to keep kv cache in int8 at runtime on Cloud AI 100"
@@ -156,7 +164,7 @@ def __init__(self, quantization_config, **kwargs):
156164
raise TypeError(f"Only {QEffFP8Config} is supported for initialization got {type(quantization_config)}")
157165

158166
self.quantization_config = quantization_config
159-
167+
self.run_compressed = quantization_config.run_compressed
160168
# -- Handle extra kwargs below --
161169
self.modules_to_not_convert = kwargs.pop("modules_to_not_convert", [])
162170
self.modules_to_not_convert = list(
@@ -216,6 +224,7 @@ def __init__(
216224
ignore=None,
217225
sparsity_config=None,
218226
quant_method="compressed-tensors",
227+
run_compressed: bool = True,
219228
**kwargs,
220229
):
221230
self.config_groups = config_groups
@@ -226,6 +235,10 @@ def __init__(
226235
self.global_compression_ratio = global_compression_ratio
227236
self.ignore = ignore
228237

238+
self.quantization_config = None
239+
self.sparsity_config = None
240+
241+
self.run_compressed = run_compressed
229242
# Validate configuration
230243
if len(self.config_groups) != 1:
231244
raise NotImplementedError(
@@ -318,7 +331,7 @@ def __init__(self, quantization_config, **kwargs):
318331
raise TypeError(
319332
f"Only {QEffCompressedTensorsConfig} is supported for initialization got {type(quantization_config)}"
320333
)
321-
334+
self.run_compressed = quantization_config.run_compressed
322335
self.quantization_config = quantization_config
323336

324337
# -- Handle extra kwargs below --

0 commit comments

Comments
 (0)