From 13f119ae5deacf066cc3372415ebd09180005ed3 Mon Sep 17 00:00:00 2001 From: Andrew Song <40076917+a-ys@users.noreply.github.com> Date: Tue, 4 Feb 2025 16:57:57 -0800 Subject: [PATCH] Use VLLM python venv for quantization + make llm compressor version explicit in vllm venv (#2716) --- serving/docker/partition/sm_neo_dispatcher.py | 4 ++-- serving/docker/requirements-vllm.txt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/serving/docker/partition/sm_neo_dispatcher.py b/serving/docker/partition/sm_neo_dispatcher.py index db765f9cf..fd46fe935 100644 --- a/serving/docker/partition/sm_neo_dispatcher.py +++ b/serving/docker/partition/sm_neo_dispatcher.py @@ -136,10 +136,10 @@ def dispatch(self): python_exec = VLLM_VENV_EXEC else: python_exec = LMI_DIST_VENV_EXEC - print(f"Sharding Model...") + print("Sharding Model...") self.run_task(NeoTask.SHARDING, python_exec) else: - self.run_task(NeoTask.QUANTIZATION, LMI_DIST_VENV_EXEC) + self.run_task(NeoTask.QUANTIZATION, VLLM_VENV_EXEC) case "trtllm": self.run_task(NeoTask.TENSORRT_LLM, SYSTEM_PY_EXEC) case "vllm,lmi-dist,tnx": diff --git a/serving/docker/requirements-vllm.txt b/serving/docker/requirements-vllm.txt index 8b1f0b1fa..0c5ace26d 100644 --- a/serving/docker/requirements-vllm.txt +++ b/serving/docker/requirements-vllm.txt @@ -1,3 +1,3 @@ peft==0.14.0 -llmcompressor -vllm==0.7.1 \ No newline at end of file +llmcompressor==0.4.0 +vllm==0.7.1