Skip to content

Commit

Permalink
Use VLLM python venv for quantization + make llm compressor version e…
Browse files Browse the repository at this point in the history
…xplicit in vllm venv (#2716)
  • Loading branch information
a-ys authored Feb 5, 2025
1 parent ba3950c commit 13f119a
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
4 changes: 2 additions & 2 deletions serving/docker/partition/sm_neo_dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,10 +136,10 @@ def dispatch(self):
python_exec = VLLM_VENV_EXEC
else:
python_exec = LMI_DIST_VENV_EXEC
print(f"Sharding Model...")
print("Sharding Model...")
self.run_task(NeoTask.SHARDING, python_exec)
else:
self.run_task(NeoTask.QUANTIZATION, LMI_DIST_VENV_EXEC)
self.run_task(NeoTask.QUANTIZATION, VLLM_VENV_EXEC)
case "trtllm":
self.run_task(NeoTask.TENSORRT_LLM, SYSTEM_PY_EXEC)
case "vllm,lmi-dist,tnx":
Expand Down
4 changes: 2 additions & 2 deletions serving/docker/requirements-vllm.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
peft==0.14.0
llmcompressor
vllm==0.7.1
llmcompressor==0.4.0
vllm==0.7.1

0 comments on commit 13f119a

Please sign in to comment.