Skip to content

Commit 2ac5db2

Browse files
committed
docker: precompiled wheel support + docker-aware setup tweaks
Main goal is in the context of CI, in order to not build wheels when unnecessary, and speed up CI builds overall. - added VLLM_DOCKER_BUILD_CONTEXT to envs to skip git + unzip logic in setup.py - normalized VLLM_USE_PRECOMPILED, treat only "1" or "true" as true - setup.py now copies contextually-named precompiled wheel into dist/ during docker builds. - smoother precompiled wheel flow, overall, in docker Signed-off-by: dougbtv <[email protected]>
1 parent 9fb2d22 commit 2ac5db2

File tree

3 files changed

+82
-34
lines changed

3 files changed

+82
-34
lines changed

docker/Dockerfile

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -209,16 +209,7 @@ ARG SCCACHE_REGION_NAME=us-west-2
209209
ARG SCCACHE_S3_NO_CREDENTIALS=0
210210

211211
# Flag to control whether to use pre-built vLLM wheels
212-
ARG VLLM_USE_PRECOMPILED
213-
# TODO: in setup.py VLLM_USE_PRECOMPILED is sensitive to truthiness, it will take =0 as "true", this should be fixed
214-
ENV VLLM_USE_PRECOMPILED=""
215-
RUN if [ "${VLLM_USE_PRECOMPILED}" = "1" ]; then \
216-
export VLLM_USE_PRECOMPILED=1 && \
217-
echo "Using precompiled wheels"; \
218-
else \
219-
unset VLLM_USE_PRECOMPILED && \
220-
echo "Leaving VLLM_USE_PRECOMPILED unset to build wheels from source"; \
221-
fi
212+
ARG VLLM_USE_PRECOMPILED=""
222213

223214
# if USE_SCCACHE is set, use sccache to speed up compilation
224215
RUN --mount=type=cache,target=/root/.cache/uv \
@@ -235,6 +226,8 @@ RUN --mount=type=cache,target=/root/.cache/uv \
235226
&& export SCCACHE_S3_NO_CREDENTIALS=${SCCACHE_S3_NO_CREDENTIALS} \
236227
&& export SCCACHE_IDLE_TIMEOUT=0 \
237228
&& export CMAKE_BUILD_TYPE=Release \
229+
&& export VLLM_USE_PRECOMPILED="${VLLM_USE_PRECOMPILED}" \
230+
&& export VLLM_DOCKER_BUILD_CONTEXT=1 \
238231
&& sccache --show-stats \
239232
&& python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 \
240233
&& sccache --show-stats; \
@@ -248,9 +241,22 @@ RUN --mount=type=cache,target=/root/.cache/ccache \
248241
# Clean any existing CMake artifacts
249242
rm -rf .deps && \
250243
mkdir -p .deps && \
244+
export VLLM_USE_PRECOMPILED="${VLLM_USE_PRECOMPILED}" && \
245+
export VLLM_DOCKER_BUILD_CONTEXT=1 && \
251246
python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \
252247
fi
253248

249+
# When using precompiled wheels, keep only the newest manylinux1 wheel and delete others
250+
RUN if [ "$VLLM_USE_PRECOMPILED" = "1" ]; then \
251+
echo "Cleaning up extra wheels in dist/..." && \
252+
# Identify the most recent manylinux1_x86_64 wheel
253+
KEEP_WHEEL=$(ls -t dist/*manylinux1_x86_64.whl 2>/dev/null | head -n1) && \
254+
if [ -n "$KEEP_WHEEL" ]; then \
255+
echo "Keeping wheel: $KEEP_WHEEL"; \
256+
find dist/ -type f -name "*.whl" ! -path "${KEEP_WHEEL}" -delete; \
257+
fi; \
258+
fi
259+
254260
# Check the size of the wheel if RUN_WHEEL_CHECK is true
255261
COPY .buildkite/check-wheel-size.py check-wheel-size.py
256262
# sync the default value with .buildkite/check-wheel-size.py
@@ -370,9 +376,15 @@ RUN --mount=type=cache,target=/root/.cache/uv \
370376
--pre pytorch_triton==3.3.0+gitab727c40 ; \
371377
fi
372378

379+
# Install vllm wheel first, so that torch etc will be installed.
380+
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
381+
--mount=type=cache,target=/root/.cache/uv \
382+
ls -l dist/
383+
373384
# Install vllm wheel first, so that torch etc will be installed.
374385
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
375386
--mount=type=cache,target=/root/.cache/uv \
387+
ls -l dist/ && \
376388
uv pip install --system dist/*.whl --verbose \
377389
--extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.')
378390

setup.py

Lines changed: 52 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import re
1010
import subprocess
1111
import sys
12+
import shutil
1213
from pathlib import Path
1314
from shutil import which
1415

@@ -297,6 +298,10 @@ def get_base_commit_in_main_branch(self) -> str:
297298
]).decode("utf-8")
298299
upstream_main_commit = json.loads(resp_json)["sha"]
299300

301+
# In Docker build context, .git may be immutable or missing.
302+
if envs.VLLM_DOCKER_BUILD_CONTEXT:
303+
return upstream_main_commit
304+
300305
# Check if the upstream_main_commit exists in the local repo
301306
try:
302307
subprocess.check_output(
@@ -357,19 +362,49 @@ def run(self) -> None:
357362
# create a temporary directory to store the wheel
358363
temp_dir = tempfile.mkdtemp(prefix="vllm-wheels")
359364
wheel_path = os.path.join(temp_dir, wheel_filename)
360-
361365
print(f"Downloading wheel from {wheel_location} to {wheel_path}")
362-
363366
from urllib.request import urlretrieve
364-
365367
try:
366368
urlretrieve(wheel_location, filename=wheel_path)
367369
except Exception as e:
368370
from setuptools.errors import SetupError
369-
370371
raise SetupError(
371-
f"Failed to get vLLM wheel from {wheel_location}") from e
372+
f"Failed to get vLLM wheel from {wheel_location}"
373+
) from e
374+
375+
# During a docker build shortcut: clean dist/, determine correct filename, copy
376+
if envs.VLLM_DOCKER_BUILD_CONTEXT:
377+
dist_dir = "/workspace/dist"
378+
os.makedirs(dist_dir, exist_ok=True)
379+
# Determine correct wheel filename from METADATA
380+
with zipfile.ZipFile(wheel_path, "r") as z:
381+
metadata_file = next(
382+
(n for n in z.namelist() if n.endswith(".dist-info/METADATA")),
383+
None,
384+
)
385+
if not metadata_file:
386+
raise RuntimeError("Could not find METADATA in precompiled wheel.")
387+
metadata = z.read(metadata_file).decode()
388+
version_line = next(
389+
(l for l in metadata.splitlines() if l.startswith("Version: ")), None
390+
)
391+
if not version_line:
392+
raise RuntimeError("Could not determine version from METADATA.")
393+
version = version_line.split(": ")[1].strip()
394+
395+
# Build correct filename using internal version
396+
arch_tag = "cp38-abi3-manylinux1_x86_64"
397+
corrected_wheel_name = f"vllm-{version}-{arch_tag}.whl"
398+
final_wheel_path = os.path.join(dist_dir, corrected_wheel_name)
399+
400+
print(
401+
f"Docker build context detected, copying precompiled wheel "
402+
f"({version}) to {final_wheel_path}"
403+
)
404+
shutil.copy2(wheel_path, final_wheel_path)
405+
return
372406

407+
# Unzip the wheel when not in Docker context
373408
with zipfile.ZipFile(wheel_path) as wheel:
374409
files_to_copy = [
375410
"vllm/_C.abi3.so",
@@ -378,36 +413,28 @@ def run(self) -> None:
378413
"vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so",
379414
"vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so",
380415
"vllm/cumem_allocator.abi3.so",
381-
# "vllm/_version.py", # not available in nightly wheels yet
382416
]
383-
384417
file_members = list(
385-
filter(lambda x: x.filename in files_to_copy, wheel.filelist))
386-
387-
# vllm_flash_attn python code:
388-
# Regex from
389-
# `glob.translate('vllm/vllm_flash_attn/**/*.py', recursive=True)`
418+
filter(lambda x: x.filename in files_to_copy, wheel.filelist)
419+
)
390420
compiled_regex = re.compile(
391-
r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py")
421+
r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py"
422+
)
392423
file_members += list(
393-
filter(lambda x: compiled_regex.match(x.filename),
394-
wheel.filelist))
424+
filter(lambda x: compiled_regex.match(x.filename), wheel.filelist)
425+
)
395426

396427
for file in file_members:
397-
print(f"Extracting and including {file.filename} "
398-
"from existing wheel")
428+
print(f"Extracting and including {file.filename} from existing wheel")
399429
package_name = os.path.dirname(file.filename).replace("/", ".")
400430
file_name = os.path.basename(file.filename)
401431

402432
if package_name not in package_data:
403433
package_data[package_name] = []
404434

405435
wheel.extract(file)
406-
if file_name.endswith(".py"):
407-
# python files shouldn't be added to package_data
408-
continue
409-
410-
package_data[package_name].append(file_name)
436+
if not file_name.endswith(".py"):
437+
package_data[package_name].append(file_name)
411438

412439

413440
def _is_hpu() -> bool:
@@ -438,6 +465,9 @@ def _no_device() -> bool:
438465

439466

440467
def _is_cuda() -> bool:
468+
# Allow forced CUDA in Docker/precompiled builds, even without torch.cuda
469+
if envs.VLLM_USE_PRECOMPILED and envs.VLLM_DOCKER_BUILD_CONTEXT:
470+
return True
441471
has_cuda = torch.version.cuda is not None
442472
return (VLLM_TARGET_DEVICE == "cuda" and has_cuda
443473
and not (_is_neuron() or _is_tpu() or _is_hpu()))

vllm/envs.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
MAX_JOBS: Optional[str] = None
6868
NVCC_THREADS: Optional[str] = None
6969
VLLM_USE_PRECOMPILED: bool = False
70+
VLLM_DOCKER_BUILD_CONTEXT: bool = False
7071
VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL: bool = False
7172
VLLM_NO_DEPRECATION_WARNING: bool = False
7273
VLLM_KEEP_ALIVE_ON_ENGINE_DEATH: bool = False
@@ -219,8 +220,13 @@ def get_vllm_port() -> Optional[int]:
219220

220221
# If set, vllm will use precompiled binaries (*.so)
221222
"VLLM_USE_PRECOMPILED":
222-
lambda: bool(os.environ.get("VLLM_USE_PRECOMPILED")) or bool(
223-
os.environ.get("VLLM_PRECOMPILED_WHEEL_LOCATION")),
223+
lambda: os.environ.get("VLLM_USE_PRECOMPILED", "").strip().lower() in ("1", "true")
224+
or bool(os.environ.get("VLLM_PRECOMPILED_WHEEL_LOCATION")),
225+
226+
# Used to mark that setup.py is running in a Docker build context, in order to force
227+
# the use of precompiled binaries.
228+
"VLLM_DOCKER_BUILD_CONTEXT":
229+
lambda: os.environ.get("VLLM_DOCKER_BUILD_CONTEXT", "").strip().lower() in ("1", "true"),
224230

225231
# Whether to force using nightly wheel in python build.
226232
# This is used for testing the nightly wheel in python build.

0 commit comments

Comments
 (0)