Fix issue with cuda.

Vladimir Leskov · Vladimir Leskov · commit d92a609d706f · 2024-12-11T22:08:25.000+01:00
Details: vllm-project#963
diff --git a/src/llmcompressor/modifiers/utils/pytorch_helpers.py b/src/llmcompressor/modifiers/utils/pytorch_helpers.py
@@ -102,7 +102,7 @@ def run_calibration_forward(
 
         # TODO: not ideal, figure out where we aren't freeing memory instead
         # currently without this we run OOM on the 2nd forward pass
-        torch.cuda.empty_cache()
+        # torch.cuda.empty_cache()
 
     return intermediates