diff --git a/serving/docker/partition/sm_neo_dispatcher.py b/serving/docker/partition/sm_neo_dispatcher.py index db765f9cf..fd46fe935 100644 --- a/serving/docker/partition/sm_neo_dispatcher.py +++ b/serving/docker/partition/sm_neo_dispatcher.py @@ -136,10 +136,10 @@ def dispatch(self): python_exec = VLLM_VENV_EXEC else: python_exec = LMI_DIST_VENV_EXEC - print(f"Sharding Model...") + print("Sharding Model...") self.run_task(NeoTask.SHARDING, python_exec) else: - self.run_task(NeoTask.QUANTIZATION, LMI_DIST_VENV_EXEC) + self.run_task(NeoTask.QUANTIZATION, VLLM_VENV_EXEC) case "trtllm": self.run_task(NeoTask.TENSORRT_LLM, SYSTEM_PY_EXEC) case "vllm,lmi-dist,tnx": diff --git a/serving/docker/requirements-vllm.txt b/serving/docker/requirements-vllm.txt index 8b1f0b1fa..0c5ace26d 100644 --- a/serving/docker/requirements-vllm.txt +++ b/serving/docker/requirements-vllm.txt @@ -1,3 +1,3 @@ peft==0.14.0 -llmcompressor -vllm==0.7.1 \ No newline at end of file +llmcompressor==0.4.0 +vllm==0.7.1