Skip to content

Commit cfaf49a

Browse files
authored
[Misc] Define common requirements (vllm-project#3841)
1 parent 9edec65 commit cfaf49a

11 files changed

+62
-77
lines changed

.github/workflows/publish.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ jobs:
4949
matrix:
5050
os: ['ubuntu-20.04']
5151
python-version: ['3.8', '3.9', '3.10', '3.11']
52-
pytorch-version: ['2.2.1'] # Must be the most recent version that meets requirements.txt.
52+
pytorch-version: ['2.2.1'] # Must be the most recent version that meets requirements-cuda.txt.
5353
cuda-version: ['11.8', '12.1']
5454

5555
steps:

.github/workflows/scripts/build.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ LD_LIBRARY_PATH=${cuda_home}/lib64:$LD_LIBRARY_PATH
99

1010
# Install requirements
1111
$python_executable -m pip install wheel packaging
12-
$python_executable -m pip install -r requirements.txt
12+
$python_executable -m pip install -r requirements-cuda.txt
1313

1414
# Limit the number of parallel jobs to avoid OOM
1515
export MAX_JOBS=1

CONTRIBUTING.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ Express your support on Twitter if vLLM aids you, or simply offer your appreciat
2121
### Build from source
2222

2323
```bash
24-
pip install -r requirements.txt
2524
pip install -e . # This may take several minutes.
2625
```
2726

Dockerfile

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@ RUN ldconfig /usr/local/cuda-12.1/compat/
1717
WORKDIR /workspace
1818

1919
# install build and runtime dependencies
20-
COPY requirements.txt requirements.txt
20+
COPY requirements-common.txt requirements-common.txt
21+
COPY requirements-cuda.txt requirements-cuda.txt
2122
RUN --mount=type=cache,target=/root/.cache/pip \
22-
pip install -r requirements.txt
23+
pip install -r requirements-cuda.txt
2324

2425
# install development dependencies
2526
COPY requirements-dev.txt requirements-dev.txt
@@ -51,7 +52,8 @@ COPY csrc csrc
5152
COPY setup.py setup.py
5253
COPY cmake cmake
5354
COPY CMakeLists.txt CMakeLists.txt
54-
COPY requirements.txt requirements.txt
55+
COPY requirements-common.txt requirements-common.txt
56+
COPY requirements-cuda.txt requirements-cuda.txt
5557
COPY pyproject.toml pyproject.toml
5658
COPY vllm vllm
5759

MANIFEST.in

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
include LICENSE
2-
include requirements.txt
2+
include requirements-common.txt
3+
include requirements-cuda.txt
34
include CMakeLists.txt
45

56
recursive-include cmake *
Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,14 @@
1-
cmake>=3.21
1+
cmake >= 3.21
22
ninja # For faster builds.
33
psutil
4-
ray >= 2.9
54
sentencepiece # Required for LLaMA tokenizer.
65
numpy
7-
torch == 2.2.1
86
requests
97
py-cpuinfo
108
transformers >= 4.39.1 # Required for StarCoder2 & Llava.
11-
xformers == 0.0.25 # Requires PyTorch 2.2.1.
129
fastapi
1310
uvicorn[standard]
1411
pydantic >= 2.0 # Required for OpenAI server.
1512
prometheus_client >= 0.18.0
16-
pynvml == 11.5.0
17-
triton >= 2.1.0
18-
outlines == 0.0.34
19-
tiktoken == 0.6.0 # Required for DBRX tokenizer
20-
vllm-nccl-cu12>=2.18,<2.19 # for downloading nccl library
13+
tiktoken == 0.6.0 # Required for DBRX tokenizer
14+
outlines == 0.0.34 # Requires torch >= 2.1.0

requirements-cpu.txt

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,6 @@
1-
cmake>=3.21
2-
ninja # For faster builds.
3-
psutil
4-
ray >= 2.9
5-
sentencepiece # Required for LLaMA tokenizer.
6-
numpy
7-
transformers >= 4.38.0 # Required for Gemma.
8-
fastapi
9-
uvicorn[standard]
10-
pydantic >= 2.0 # Required for OpenAI server.
11-
prometheus_client >= 0.18.0
12-
torch == 2.2.1+cpu
13-
triton >= 2.1.0
14-
filelock == 3.13.3
15-
py-cpuinfo
1+
# Common dependencies
2+
-r requirements-common.txt
3+
4+
# Dependencies for x86_64 CPUs
5+
torch == 2.2.1+cpu
6+
triton >= 2.1.0 # FIXME(woosuk): This is a hack to avoid import error.

requirements-cuda.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Common dependencies
2+
-r requirements-common.txt
3+
4+
# Dependencies for NVIDIA GPUs
5+
ray >= 2.9
6+
pynvml == 11.5.0
7+
vllm-nccl-cu12>=2.18,<2.19 # for downloading nccl library
8+
torch == 2.2.1
9+
xformers == 0.0.25 # Requires PyTorch 2.2.1
10+
triton >= 2.1.0

requirements-neuron.txt

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,7 @@
1-
sentencepiece # Required for LLaMA tokenizer.
2-
numpy
1+
# Common dependencies
2+
-r requirements-common.txt
3+
4+
# Dependencies for Neuron devices
35
transformers-neuronx >= 0.9.0
46
torch-neuronx >= 2.1.0
57
neuronx-cc
6-
fastapi
7-
uvicorn[standard]
8-
pydantic >= 2.0 # Required for OpenAI server.
9-
prometheus_client >= 0.18.0
10-
requests
11-
psutil
12-
py-cpuinfo

requirements-rocm.txt

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,5 @@
1-
cmake>=3.21
2-
ninja # For faster builds.
3-
typing-extensions>=4.8.0
4-
starlette
5-
requests
6-
py-cpuinfo
7-
psutil
1+
# Common dependencies
2+
-r requirements-common.txt
3+
4+
# Dependencies for AMD GPUs
85
ray == 2.9.3
9-
sentencepiece # Required for LLaMA tokenizer.
10-
numpy
11-
tokenizers>=0.15.0
12-
transformers >= 4.39.1 # Required for StarCoder2 & Llava.
13-
fastapi
14-
uvicorn[standard]
15-
pydantic >= 2.0 # Required for OpenAI server.
16-
prometheus_client >= 0.18.0
17-
outlines == 0.0.34
18-
tiktoken == 0.6.0 # Required for DBRX tokenizer

setup.py

Lines changed: 26 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -325,32 +325,38 @@ def read_readme() -> str:
325325

326326
def get_requirements() -> List[str]:
327327
"""Get Python package dependencies from requirements.txt."""
328-
if _is_cuda():
329-
with open(get_path("requirements.txt")) as f:
328+
329+
def _read_requirements(filename: str) -> List[str]:
330+
with open(get_path(filename)) as f:
330331
requirements = f.read().strip().split("\n")
331-
cuda_major = torch.version.cuda.split(".")[0]
332-
modified_requirements = []
333-
for req in requirements:
334-
if "vllm-nccl-cu12" in req:
335-
modified_requirements.append(
336-
req.replace("vllm-nccl-cu12",
337-
f"vllm-nccl-cu{cuda_major}"))
338-
else:
339-
modified_requirements.append(req)
340-
requirements = modified_requirements
332+
resolved_requirements = []
333+
for line in requirements:
334+
if line.startswith("-r "):
335+
resolved_requirements += _read_requirements(line.split()[1])
336+
else:
337+
resolved_requirements.append(line)
338+
return resolved_requirements
339+
340+
if _is_cuda():
341+
requirements = _read_requirements("requirements-cuda.txt")
342+
cuda_major = torch.version.cuda.split(".")[0]
343+
modified_requirements = []
344+
for req in requirements:
345+
if "vllm-nccl-cu12" in req:
346+
modified_requirements.append(
347+
req.replace("vllm-nccl-cu12", f"vllm-nccl-cu{cuda_major}"))
348+
else:
349+
modified_requirements.append(req)
350+
requirements = modified_requirements
341351
elif _is_hip():
342-
with open(get_path("requirements-rocm.txt")) as f:
343-
requirements = f.read().strip().split("\n")
352+
requirements = _read_requirements("requirements-rocm.txt")
344353
elif _is_neuron():
345-
with open(get_path("requirements-neuron.txt")) as f:
346-
requirements = f.read().strip().split("\n")
354+
requirements = _read_requirements("requirements-neuron.txt")
347355
elif _is_cpu():
348-
with open(get_path("requirements-cpu.txt")) as f:
349-
requirements = f.read().strip().split("\n")
356+
requirements = _read_requirements("requirements-cpu.txt")
350357
else:
351358
raise ValueError(
352-
"Unsupported platform, please use CUDA, ROCM or Neuron.")
353-
359+
"Unsupported platform, please use CUDA, ROCm, Neuron, or CPU.")
354360
return requirements
355361

356362

0 commit comments

Comments
 (0)