Skip to content

Commit af37388

Browse files
rishikakediaR3hankhan123dtrifirotarukumar
authored
add support for s390x (#349)
add CPU support for the s390x architecture. Tested models on s390x: TinyLlama/TinyLlama-1.1B-Chat-v1.0 ibm-granite/granite-3.0-2b-instruct ibm-granite/granite-3b-code-instruct-2k microsoft/Phi-3-mini-4k-instruct mistralai/Mistral-7B-v0.3 gpt-2 facebook/opt-125m Signed-off-by: Rishika Kedia <[email protected]> Signed-off-by: Rehan Khan <[email protected]> Co-authored-by: Rehan Khan <[email protected]> Co-authored-by: Daniele <[email protected]> Co-authored-by: Tarun Kumar <[email protected]>
1 parent e96dae0 commit af37388

7 files changed

+685
-8
lines changed

Dockerfile.s390x

+185
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
# Base UBI image for s390x architecture
2+
ARG BASE_UBI_IMAGE_TAG=9.5-1736404155
3+
ARG PYTHON_VERSION=3.12
4+
FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} AS base
5+
6+
# Install basic dependencies
7+
ARG PYTHON_VERSION
8+
ENV PYTHON_VERSION=${PYTHON_VERSION}
9+
10+
WORKDIR /workspace
11+
12+
ENV LANG=C.UTF-8 \
13+
LC_ALL=C.UTF-8
14+
15+
# Install development utilities
16+
RUN microdnf install -y \
17+
which procps findutils tar vim git gcc g++ make patch zlib-devel \
18+
libjpeg-turbo-devel libtiff-devel libpng-devel libwebp-devel freetype-devel harfbuzz-devel \
19+
openssl-devel openblas openblas-devel autoconf automake libtool cmake && \
20+
microdnf clean all
21+
22+
# Python Installation
23+
FROM base AS python-install
24+
ARG PYTHON_VERSION
25+
26+
ENV VIRTUAL_ENV=/opt/vllm
27+
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
28+
ENV PYTHON_VERSION=${PYTHON_VERSION}
29+
RUN microdnf install -y \
30+
python${PYTHON_VERSION}-devel python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel && \
31+
python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && pip install --no-cache -U pip wheel uv && microdnf clean all
32+
33+
FROM python-install AS pyarrow
34+
35+
# Build Apache Arrow
36+
WORKDIR /tmp
37+
RUN --mount=type=cache,target=/root/.cache/uv \
38+
git clone https://github.com/apache/arrow.git && \
39+
cd arrow/cpp && \
40+
mkdir release && cd release && \
41+
cmake -DCMAKE_BUILD_TYPE=Release \
42+
-DCMAKE_INSTALL_PREFIX=/usr/local \
43+
-DARROW_PYTHON=ON \
44+
-DARROW_PARQUET=ON \
45+
-DARROW_ORC=ON \
46+
-DARROW_FILESYSTEM=ON \
47+
-DARROW_WITH_LZ4=ON \
48+
-DARROW_WITH_ZSTD=ON \
49+
-DARROW_WITH_SNAPPY=ON \
50+
-DARROW_JSON=ON \
51+
-DARROW_CSV=ON \
52+
-DARROW_DATASET=ON \
53+
-DPROTOBUF_PROTOC_EXECUTABLE=/usr/bin/protoc \
54+
-DARROW_DEPENDENCY_SOURCE=BUNDLED \
55+
.. && \
56+
make -j$(nproc) && \
57+
make install && \
58+
cd ../../python && \
59+
export PYARROW_PARALLEL=4 && \
60+
export ARROW_BUILD_TYPE=release && \
61+
uv pip install -r requirements-build.txt && \
62+
python setup.py build_ext --build-type=$ARROW_BUILD_TYPE --bundle-arrow-cpp bdist_wheel
63+
64+
FROM python-install AS numa-build
65+
# Install numactl (needed for numa.h dependency)
66+
WORKDIR /tmp
67+
RUN curl -LO https://github.com/numactl/numactl/archive/refs/tags/v2.0.16.tar.gz && \
68+
tar -xvzf v2.0.16.tar.gz && \
69+
cd numactl-2.0.16 && \
70+
./autogen.sh && \
71+
./configure && \
72+
make
73+
74+
# Set include path
75+
ENV C_INCLUDE_PATH="/usr/local/include:$C_INCLUDE_PATH"
76+
77+
FROM python-install AS rust
78+
ENV CARGO_HOME=/root/.cargo
79+
ENV RUSTUP_HOME=/root/.rustup
80+
ENV PATH="$CARGO_HOME/bin:$RUSTUP_HOME/bin:$PATH"
81+
82+
RUN curl https://sh.rustup.rs -sSf | sh -s -- -y && \
83+
. "$CARGO_HOME/env" && \
84+
rustup default stable && \
85+
rustup show
86+
87+
FROM python-install AS torch-vision
88+
# Install torchvision
89+
ARG TORCH_VERSION=2.7.0.dev20250304
90+
ARG TORCH_VISION_VERSION=v0.20.1
91+
WORKDIR /tmp
92+
RUN --mount=type=cache,target=/root/.cache/uv \
93+
git clone https://github.com/pytorch/vision.git && \
94+
cd vision && \
95+
git checkout $TORCH_VISION_VERSION && \
96+
uv pip install -v torch==${TORCH_VERSION} --extra-index-url https://download.pytorch.org/whl/nightly/cpu && \
97+
python setup.py bdist_wheel
98+
99+
# Final build stage
100+
FROM python-install AS vllm-cpu
101+
ARG PYTHON_VERSION
102+
103+
# Set correct library path for torch and numactl
104+
ENV LD_LIBRARY_PATH="/opt/vllm/lib64/python${PYTHON_VERSION}/site-packages/torch/lib:/usr/local/lib:$LD_LIBRARY_PATH"
105+
ENV C_INCLUDE_PATH="/usr/local/include:$C_INCLUDE_PATH"
106+
ENV UV_LINK_MODE=copy
107+
ENV CARGO_HOME=/root/.cargo
108+
ENV RUSTUP_HOME=/root/.rustup
109+
ENV PATH="$CARGO_HOME/bin:$RUSTUP_HOME/bin:$PATH"
110+
111+
COPY . /workspace/vllm
112+
WORKDIR /workspace/vllm
113+
114+
RUN --mount=type=bind,from=numa-build,src=/tmp/numactl-2.0.16,target=/numactl \
115+
make -C /numactl install
116+
117+
# Install dependencies, including PyTorch and Apache Arrow
118+
RUN --mount=type=cache,target=/root/.cache/uv \
119+
--mount=type=bind,from=rust,source=/root/.cargo,target=/root/.cargo,rw \
120+
--mount=type=bind,from=rust,source=/root/.rustup,target=/root/.rustup,rw \
121+
--mount=type=bind,from=pyarrow,source=/tmp/arrow/python/dist,target=/tmp/arrow-wheels \
122+
--mount=type=bind,from=torch-vision,source=/tmp/vision/dist,target=/tmp/vision-wheels/ \
123+
sed -i '/^torch/d' requirements-build.txt && \
124+
sed -i '/^numba/d' requirements-common.txt && \
125+
ARROW_WHL_FILE=$(ls /tmp/arrow-wheels/pyarrow-*.whl | head -n 1) && \
126+
VISION_WHL_FILE=$(ls /tmp/vision-wheels/*.whl | head -n 1) && \
127+
uv pip install -v \
128+
$ARROW_WHL_FILE \
129+
$VISION_WHL_FILE \
130+
--extra-index-url https://download.pytorch.org/whl/nightly/cpu \
131+
--index-strategy unsafe-best-match \
132+
-r requirements-build.txt \
133+
-r requirements-cpu.txt
134+
135+
# Build and install vllm
136+
RUN --mount=type=cache,target=/root/.cache/uv \
137+
VLLM_TARGET_DEVICE=cpu python setup.py bdist_wheel && \
138+
uv pip install "$(echo dist/*.whl)[tensorizer]"
139+
140+
ENV HF_HUB_OFFLINE=1 \
141+
HOME=/home/vllm \
142+
# Allow requested max length to exceed what is extracted from the
143+
# config.json
144+
# see: https://github.com/vllm-project/vllm/pull/7080
145+
VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
146+
VLLM_USAGE_SOURCE=production-docker-image \
147+
VLLM_WORKER_MULTIPROC_METHOD=fork \
148+
VLLM_NO_USAGE_STATS=1 \
149+
OUTLINES_CACHE_DIR=/tmp/outlines
150+
151+
# setup non-root user for vllm
152+
RUN umask 002 && \
153+
useradd --uid 2000 --gid 0 vllm && \
154+
mkdir -p /home/vllm && \
155+
chmod g+rwx /home/vllm
156+
157+
COPY LICENSE /licenses/vllm.md
158+
COPY examples/*.jinja /app/data/template/
159+
160+
USER 2000
161+
WORKDIR /home/vllm
162+
163+
# Set the default entrypoint
164+
ENTRYPOINT ["python", "-m", "vllm.entrypoints.openai.api_server"]
165+
166+
167+
FROM vllm-cpu as vllm-grpc-adapter
168+
169+
USER root
170+
171+
ENV GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=1
172+
173+
RUN --mount=type=cache,target=/root/.cache/uv \
174+
HOME=/root uv pip install "$(echo /workspace/vllm/dist/*.whl)[tensorizer]" vllm-tgis-adapter==0.6.3
175+
176+
ENV GRPC_PORT=8033 \
177+
PORT=8000 \
178+
# As an optimization, vLLM disables logprobs when using spec decoding by
179+
# default, but this would be unexpected to users of a hosted model that
180+
# happens to have spec decoding
181+
# see: https://github.com/vllm-project/vllm/pull/6485
182+
DISABLE_LOGPROBS_DURING_SPEC_DECODING=false
183+
184+
USER 2000
185+
ENTRYPOINT ["python", "-m", "vllm_tgis_adapter", "--uvicorn-log-level=warning"]

cmake/cpu_extension.cmake

+10-1
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ else()
8181
find_isa(${CPUINFO} "POWER9" POWER9_FOUND)
8282
find_isa(${CPUINFO} "asimd" ASIMD_FOUND) # Check for ARM NEON support
8383
find_isa(${CPUINFO} "bf16" ARM_BF16_FOUND) # Check for ARM BF16 support
84+
find_isa(${CPUINFO} "S390" S390_FOUND)
8485
endif()
8586

8687

@@ -129,8 +130,16 @@ elseif (ASIMD_FOUND)
129130
elseif(APPLE_SILICON_FOUND)
130131
message(STATUS "Apple Silicon Detected")
131132
set(ENABLE_NUMA OFF)
133+
elseif (S390_FOUND)
134+
message(STATUS "S390 detected")
135+
# Check for S390 VXE support
136+
list(APPEND CXX_COMPILE_FLAGS
137+
"-mvx"
138+
"-mzvector"
139+
"-march=native"
140+
"-mtune=native")
132141
else()
133-
message(FATAL_ERROR "vLLM CPU backend requires AVX512, AVX2, Power9+ ISA or ARMv8 support.")
142+
message(FATAL_ERROR "vLLM CPU backend requires AVX512, AVX2, Power9+ ISA, S390X ISA or ARMv8 support.")
134143
endif()
135144

136145
#

csrc/cpu/attention.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ struct KernelVecType<float> {
2424

2525
template <>
2626
struct KernelVecType<c10::Half> {
27-
#ifdef __powerpc64__
28-
// Power architecture-specific vector types
27+
#if defined(__powerpc64__) || defined(__s390x__)
28+
// Power and s390x architecture-specific vector types
2929
using q_load_vec_type = vec_op::FP32Vec8;
3030
using k_load_vec_type = vec_op::FP32Vec16;
3131
using v_load_vec_type = vec_op::FP32Vec16;

csrc/cpu/cpu_types.hpp

+3
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
#elif defined(__aarch64__)
1111
// arm implementation
1212
#include "cpu_types_arm.hpp"
13+
#elif defined(__s390x__)
14+
// s390 implementation
15+
#include "cpu_types_vxe.hpp"
1316
#else
1417
#warning "unsupported vLLM cpu implementation"
1518
#endif

0 commit comments

Comments
 (0)