-
-
Notifications
You must be signed in to change notification settings - Fork 10.1k
docker: docker-aware precompiled wheel support #21127
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ | |
import logging | ||
import os | ||
import re | ||
import shutil | ||
import subprocess | ||
import sys | ||
from pathlib import Path | ||
|
@@ -297,6 +298,10 @@ def get_base_commit_in_main_branch(self) -> str: | |
]).decode("utf-8") | ||
upstream_main_commit = json.loads(resp_json)["sha"] | ||
|
||
# In Docker build context, .git may be immutable or missing. | ||
if envs.VLLM_DOCKER_BUILD_CONTEXT: | ||
return upstream_main_commit | ||
|
||
Comment on lines
+301
to
+304
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This will be problematic as the main commit might not have the wheels ready (e.g. when it is just merged). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm we still have nightly in that case.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess the same problem will be if the PR merge base is not compatible with latest main/nightly. we can address this as a follow up. |
||
# Check if the upstream_main_commit exists in the local repo | ||
try: | ||
subprocess.check_output( | ||
|
@@ -357,19 +362,48 @@ def run(self) -> None: | |
# create a temporary directory to store the wheel | ||
temp_dir = tempfile.mkdtemp(prefix="vllm-wheels") | ||
wheel_path = os.path.join(temp_dir, wheel_filename) | ||
|
||
print(f"Downloading wheel from {wheel_location} to {wheel_path}") | ||
|
||
from urllib.request import urlretrieve | ||
|
||
try: | ||
urlretrieve(wheel_location, filename=wheel_path) | ||
except Exception as e: | ||
from setuptools.errors import SetupError | ||
|
||
raise SetupError( | ||
f"Failed to get vLLM wheel from {wheel_location}") from e | ||
|
||
# During a docker build: determine correct filename, copy wheel. | ||
if envs.VLLM_DOCKER_BUILD_CONTEXT: | ||
dist_dir = "/workspace/dist" | ||
os.makedirs(dist_dir, exist_ok=True) | ||
# Determine correct wheel filename from METADATA | ||
with zipfile.ZipFile(wheel_path, "r") as z: | ||
metadata_file = next( | ||
(n for n in z.namelist() | ||
if n.endswith(".dist-info/METADATA")), | ||
None, | ||
) | ||
if not metadata_file: | ||
raise RuntimeError( | ||
"Could not find METADATA in precompiled wheel.") | ||
metadata = z.read(metadata_file).decode() | ||
version_line = next((line for line in metadata.splitlines() | ||
if line.startswith("Version: ")), None) | ||
if not version_line: | ||
raise RuntimeError( | ||
"Could not determine version from METADATA.") | ||
version = version_line.split(": ")[1].strip() | ||
|
||
# Build correct filename using internal version | ||
arch_tag = "cp38-abi3-manylinux1_x86_64" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The To make this more robust, you should determine the architecture tag dynamically. A good approach would be to set an environment variable in the For example, in your ARG TARGETPLATFORM
RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
export VLLM_ARCH_TAG="cp38-abi3-manylinux2014_aarch64" ; \
else \
export VLLM_ARCH_TAG="cp38-abi3-manylinux1_x86_64" ; \
fi && \
...
python3 setup.py bdist_wheel ... Then in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 above. |
||
corrected_wheel_name = f"vllm-{version}-{arch_tag}.whl" | ||
final_wheel_path = os.path.join(dist_dir, corrected_wheel_name) | ||
|
||
print(f"Docker build context detected, copying precompiled wheel " | ||
f"({version}) to {final_wheel_path}") | ||
shutil.copy2(wheel_path, final_wheel_path) | ||
return | ||
|
||
# Unzip the wheel when not in Docker context | ||
with zipfile.ZipFile(wheel_path) as wheel: | ||
files_to_copy = [ | ||
"vllm/_C.abi3.so", | ||
|
@@ -378,15 +412,9 @@ def run(self) -> None: | |
"vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so", | ||
"vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so", | ||
"vllm/cumem_allocator.abi3.so", | ||
# "vllm/_version.py", # not available in nightly wheels yet | ||
] | ||
|
||
file_members = list( | ||
filter(lambda x: x.filename in files_to_copy, wheel.filelist)) | ||
|
||
# vllm_flash_attn python code: | ||
# Regex from | ||
# `glob.translate('vllm/vllm_flash_attn/**/*.py', recursive=True)` | ||
compiled_regex = re.compile( | ||
r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py") | ||
file_members += list( | ||
|
@@ -403,11 +431,8 @@ def run(self) -> None: | |
package_data[package_name] = [] | ||
|
||
wheel.extract(file) | ||
if file_name.endswith(".py"): | ||
# python files shouldn't be added to package_data | ||
continue | ||
|
||
package_data[package_name].append(file_name) | ||
if not file_name.endswith(".py"): | ||
package_data[package_name].append(file_name) | ||
|
||
|
||
def _is_hpu() -> bool: | ||
|
@@ -438,6 +463,9 @@ def _no_device() -> bool: | |
|
||
|
||
def _is_cuda() -> bool: | ||
# Allow forced CUDA in Docker/precompiled builds, even without torch.cuda | ||
if envs.VLLM_USE_PRECOMPILED and envs.VLLM_DOCKER_BUILD_CONTEXT: | ||
return True | ||
has_cuda = torch.version.cuda is not None | ||
return (VLLM_TARGET_DEVICE == "cuda" and has_cuda | ||
and not (_is_neuron() or _is_tpu() or _is_hpu())) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The wheel filename pattern
*manylinux1_x86_64.whl
is hardcoded. This will not work for other architectures likearm64
, for which there is build logic in this Dockerfile (usingTARGETPLATFORM
).When using precompiled wheels on
arm64
, this step will fail to find the correct wheel to keep. If there are multiple wheels indist/
, it might not clean up correctly, potentially leading to the wrong wheel being installed in the final image.This should be parameterized. You could use a shell variable set based on
TARGETPLATFORM
to specify the wheel pattern.For example:
This change is necessary to support multi-architecture builds with precompiled wheels.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In the context of other hardcoding here, we're currently specifying:
elsewhere as a statically defined string. So, I think we can directly assume the arch.