opendatahub-io
diff --git a/‎Dockerfile.s390x
+185 b/‎Dockerfile.s390x
+185
diff --git a/‎cmake/cpu_extension.cmake
+10-1 b/‎cmake/cpu_extension.cmake
+10-1
diff --git a/‎csrc/cpu/attention.cpp
+2-2 b/‎csrc/cpu/attention.cpp
+2-2
diff --git a/‎csrc/cpu/cpu_types.hpp
+3 b/‎csrc/cpu/cpu_types.hpp
+3
@@ -0,0 +1,185 @@
+# Base UBI image for s390x architecture
+ARG BASE_UBI_IMAGE_TAG=9.5-1736404155
+ARG PYTHON_VERSION=3.12
+FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} AS base
+
+# Install basic dependencies
+ARG PYTHON_VERSION
+ENV PYTHON_VERSION=${PYTHON_VERSION}
+
+WORKDIR /workspace
+
+ENV LANG=C.UTF-8 \
+    LC_ALL=C.UTF-8
+
+# Install development utilities
+RUN microdnf install -y \
+    which procps findutils tar vim git gcc g++ make patch zlib-devel \
+    libjpeg-turbo-devel libtiff-devel libpng-devel libwebp-devel freetype-devel harfbuzz-devel \
+    openssl-devel openblas openblas-devel autoconf automake libtool cmake && \
+    microdnf clean all
+
+# Python Installation
+FROM base AS python-install
+ARG PYTHON_VERSION
+
+ENV VIRTUAL_ENV=/opt/vllm
+ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+ENV PYTHON_VERSION=${PYTHON_VERSION}
+RUN microdnf install -y \
+    python${PYTHON_VERSION}-devel python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel  && \
+    python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && pip install --no-cache -U pip wheel uv && microdnf clean all
+
+FROM python-install AS pyarrow
+
+# Build Apache Arrow
+WORKDIR /tmp
+RUN --mount=type=cache,target=/root/.cache/uv \
+    git clone https://github.com/apache/arrow.git && \
+    cd arrow/cpp && \
+    mkdir release && cd release && \
+    cmake -DCMAKE_BUILD_TYPE=Release \
+          -DCMAKE_INSTALL_PREFIX=/usr/local \
+          -DARROW_PYTHON=ON \
+          -DARROW_PARQUET=ON \
+          -DARROW_ORC=ON \
+          -DARROW_FILESYSTEM=ON \
+          -DARROW_WITH_LZ4=ON \
+          -DARROW_WITH_ZSTD=ON \
+          -DARROW_WITH_SNAPPY=ON \
+          -DARROW_JSON=ON \
+          -DARROW_CSV=ON \
+          -DARROW_DATASET=ON \
+          -DPROTOBUF_PROTOC_EXECUTABLE=/usr/bin/protoc \
+          -DARROW_DEPENDENCY_SOURCE=BUNDLED \
+          .. && \
+    make -j$(nproc) && \
+    make install && \
+    cd ../../python && \
+    export PYARROW_PARALLEL=4 && \
+    export ARROW_BUILD_TYPE=release && \
+    uv pip install -r requirements-build.txt && \
+    python setup.py build_ext --build-type=$ARROW_BUILD_TYPE --bundle-arrow-cpp bdist_wheel
+
+FROM python-install AS numa-build
+# Install numactl (needed for numa.h dependency)
+WORKDIR /tmp
+RUN curl -LO https://github.com/numactl/numactl/archive/refs/tags/v2.0.16.tar.gz && \
+    tar -xvzf v2.0.16.tar.gz && \
+    cd numactl-2.0.16 && \
+    ./autogen.sh && \
+    ./configure && \
+    make
+
+# Set include path
+ENV C_INCLUDE_PATH="/usr/local/include:$C_INCLUDE_PATH"
+
+FROM python-install AS rust
+ENV CARGO_HOME=/root/.cargo
+ENV RUSTUP_HOME=/root/.rustup
+ENV PATH="$CARGO_HOME/bin:$RUSTUP_HOME/bin:$PATH"
+
+RUN curl https://sh.rustup.rs -sSf | sh -s -- -y && \
+    . "$CARGO_HOME/env" && \
+    rustup default stable && \
+    rustup show
+
+FROM python-install AS torch-vision
+# Install torchvision
+ARG TORCH_VERSION=2.7.0.dev20250304
+ARG TORCH_VISION_VERSION=v0.20.1
+WORKDIR /tmp
+RUN --mount=type=cache,target=/root/.cache/uv \
+    git clone https://github.com/pytorch/vision.git && \
+    cd vision && \
+    git checkout $TORCH_VISION_VERSION && \
+    uv pip install -v torch==${TORCH_VERSION} --extra-index-url https://download.pytorch.org/whl/nightly/cpu && \
+    python setup.py bdist_wheel
+
+# Final build stage
+FROM python-install AS vllm-cpu
+ARG PYTHON_VERSION
+
+# Set correct library path for torch and numactl
+ENV LD_LIBRARY_PATH="/opt/vllm/lib64/python${PYTHON_VERSION}/site-packages/torch/lib:/usr/local/lib:$LD_LIBRARY_PATH"
+ENV C_INCLUDE_PATH="/usr/local/include:$C_INCLUDE_PATH"
+ENV UV_LINK_MODE=copy
+ENV CARGO_HOME=/root/.cargo
+ENV RUSTUP_HOME=/root/.rustup
+ENV PATH="$CARGO_HOME/bin:$RUSTUP_HOME/bin:$PATH"
+
+COPY . /workspace/vllm
+WORKDIR /workspace/vllm
+
+RUN --mount=type=bind,from=numa-build,src=/tmp/numactl-2.0.16,target=/numactl \
+    make -C /numactl install
+
+# Install dependencies, including PyTorch and Apache Arrow
+RUN --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=bind,from=rust,source=/root/.cargo,target=/root/.cargo,rw \
+    --mount=type=bind,from=rust,source=/root/.rustup,target=/root/.rustup,rw \
+    --mount=type=bind,from=pyarrow,source=/tmp/arrow/python/dist,target=/tmp/arrow-wheels \
+    --mount=type=bind,from=torch-vision,source=/tmp/vision/dist,target=/tmp/vision-wheels/ \
+     sed -i '/^torch/d' requirements-build.txt && \
+     sed -i '/^numba/d' requirements-common.txt && \
+     ARROW_WHL_FILE=$(ls /tmp/arrow-wheels/pyarrow-*.whl | head -n 1) && \
+     VISION_WHL_FILE=$(ls /tmp/vision-wheels/*.whl | head -n 1) && \
+    uv pip install -v \    
+        $ARROW_WHL_FILE  \
+        $VISION_WHL_FILE \
+        --extra-index-url https://download.pytorch.org/whl/nightly/cpu \
+        --index-strategy unsafe-best-match \
+        -r requirements-build.txt \
+        -r requirements-cpu.txt 
+
+# Build and install vllm
+RUN --mount=type=cache,target=/root/.cache/uv \
+    VLLM_TARGET_DEVICE=cpu python setup.py bdist_wheel && \
+    uv pip install "$(echo dist/*.whl)[tensorizer]"
+
+ENV HF_HUB_OFFLINE=1 \
+    HOME=/home/vllm \
+    # Allow requested max length to exceed what is extracted from the
+    # config.json
+    # see: https://github.com/vllm-project/vllm/pull/7080
+    VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
+    VLLM_USAGE_SOURCE=production-docker-image \
+    VLLM_WORKER_MULTIPROC_METHOD=fork \
+    VLLM_NO_USAGE_STATS=1 \
+    OUTLINES_CACHE_DIR=/tmp/outlines
+
+# setup non-root user for vllm
+RUN umask 002 && \
+    useradd --uid 2000 --gid 0 vllm && \
+    mkdir -p /home/vllm && \
+    chmod g+rwx /home/vllm
+
+COPY LICENSE /licenses/vllm.md
+COPY examples/*.jinja /app/data/template/
+
+USER 2000
+WORKDIR /home/vllm
+
+# Set the default entrypoint
+ENTRYPOINT ["python", "-m", "vllm.entrypoints.openai.api_server"]
+
+
+FROM vllm-cpu as vllm-grpc-adapter
+
+USER root
+
+ENV GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=1
+
+RUN --mount=type=cache,target=/root/.cache/uv \
+    HOME=/root uv pip install "$(echo /workspace/vllm/dist/*.whl)[tensorizer]" vllm-tgis-adapter==0.6.3
+
+ENV GRPC_PORT=8033 \
+    PORT=8000 \
+    # As an optimization, vLLM disables logprobs when using spec decoding by
+    # default, but this would be unexpected to users of a hosted model that
+    # happens to have spec decoding
+    # see: https://github.com/vllm-project/vllm/pull/6485
+    DISABLE_LOGPROBS_DURING_SPEC_DECODING=false
+
+USER 2000
+ENTRYPOINT ["python", "-m", "vllm_tgis_adapter", "--uvicorn-log-level=warning"]
@@ -81,6 +81,7 @@ else()
     find_isa(${CPUINFO} "POWER9" POWER9_FOUND)
     find_isa(${CPUINFO} "asimd" ASIMD_FOUND) # Check for ARM NEON support
     find_isa(${CPUINFO} "bf16" ARM_BF16_FOUND) # Check for ARM BF16 support
+    find_isa(${CPUINFO} "S390" S390_FOUND)
 endif()
 
 
@@ -129,8 +130,16 @@ elseif (ASIMD_FOUND)
 elseif(APPLE_SILICON_FOUND)
     message(STATUS "Apple Silicon Detected")
     set(ENABLE_NUMA OFF)
+elseif (S390_FOUND)
+    message(STATUS "S390 detected")
+    # Check for S390 VXE support
+    list(APPEND CXX_COMPILE_FLAGS
+        "-mvx"
+        "-mzvector"
+        "-march=native"
+        "-mtune=native")
 else()
-    message(FATAL_ERROR "vLLM CPU backend requires AVX512, AVX2, Power9+ ISA or ARMv8 support.")
+    message(FATAL_ERROR "vLLM CPU backend requires AVX512, AVX2, Power9+ ISA, S390X ISA or ARMv8 support.")
 endif()
 
 #
 
@@ -24,8 +24,8 @@ struct KernelVecType<float> {
 
 template <>
 struct KernelVecType<c10::Half> {
-#ifdef __powerpc64__
-  // Power architecture-specific vector types
+#if defined(__powerpc64__) || defined(__s390x__)
+  // Power and s390x architecture-specific vector types
   using q_load_vec_type = vec_op::FP32Vec8;
   using k_load_vec_type = vec_op::FP32Vec16;
   using v_load_vec_type = vec_op::FP32Vec16;
 
@@ -10,6 +10,9 @@
 #elif defined(__aarch64__)
   // arm implementation
   #include "cpu_types_arm.hpp"
+#elif defined(__s390x__)
+  // s390 implementation
+  #include "cpu_types_vxe.hpp"
 #else
   #warning "unsupported vLLM cpu implementation"
 #endif