Skip to content

Commit 76b7b89

Browse files
authored
Merge branch 'main' into enable-ipex
2 parents fdf4a93 + 5f67482 commit 76b7b89

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+1320
-1124
lines changed

Cargo.lock

Lines changed: 227 additions & 130 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Dockerfile

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
## Global Args #################################################################
2-
ARG BASE_UBI_IMAGE_TAG=9.3-1361.1699548029
3-
ARG PROTOC_VERSION=25.0
2+
ARG BASE_UBI_IMAGE_TAG=9.3-1476
3+
ARG PROTOC_VERSION=25.1
44
ARG PYTORCH_INDEX="https://download.pytorch.org/whl"
5-
ARG IPEX_INDEX="https://pytorch-extension.intel.com/release-whl/stable/cpu/us/"
6-
#ARG PYTORCH_INDEX="https://download.pytorch.org/whl/nightly"
75
ARG PYTORCH_VERSION=2.1.0
6+
# ARG PYTORCH_INDEX="https://download.pytorch.org/whl/nightly"
7+
# ARG PYTORCH_VERSION=2.3.0.dev20231221
8+
ARG IPEX_INDEX="https://pytorch-extension.intel.com/release-whl/stable/cpu/us/"
89
ARG IPEX_VERSION=2.1.0
910

11+
1012
## Base Layer ##################################################################
1113
FROM registry.access.redhat.com/ubi9/ubi:${BASE_UBI_IMAGE_TAG} as base
1214
WORKDIR /app
@@ -34,7 +36,7 @@ ENV CUDA_VERSION=11.8.0 \
3436
NV_CUDA_COMPAT_VERSION=520.61.05-1
3537

3638
RUN dnf config-manager \
37-
--add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo \
39+
--add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo \
3840
&& dnf install -y \
3941
cuda-cudart-11-8-${NV_CUDA_CUDART_VERSION} \
4042
cuda-compat-11-8-${NV_CUDA_COMPAT_VERSION} \
@@ -55,7 +57,7 @@ ENV NV_NVTX_VERSION=11.8.86-1 \
5557
NV_LIBNCCL_PACKAGE_VERSION=2.15.5-1+cuda11.8
5658

5759
RUN dnf config-manager \
58-
--add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo \
60+
--add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo \
5961
&& dnf install -y \
6062
cuda-libraries-11-8-${NV_CUDA_LIB_VERSION} \
6163
cuda-nvtx-11-8-${NV_NVTX_VERSION} \
@@ -74,7 +76,7 @@ ENV NV_CUDA_CUDART_DEV_VERSION=11.8.89-1 \
7476
NV_LIBNCCL_DEV_PACKAGE_VERSION=2.15.5-1+cuda11.8
7577

7678
RUN dnf config-manager \
77-
--add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo \
79+
--add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo \
7880
&& dnf install -y \
7981
cuda-command-line-tools-11-8-${NV_CUDA_LIB_VERSION} \
8082
cuda-libraries-devel-11-8-${NV_CUDA_LIB_VERSION} \
@@ -90,7 +92,7 @@ ENV LIBRARY_PATH="$CUDA_HOME/lib64/stubs"
9092

9193
## Rust builder ################################################################
9294
# Specific debian version so that compatible glibc version is used
93-
FROM rust:1.73-bullseye as rust-builder
95+
FROM rust:1.75-bullseye as rust-builder
9496
ARG PROTOC_VERSION
9597

9698
ENV CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
@@ -135,7 +137,7 @@ RUN dnf install -y make unzip python3.11 python3.11-pip gcc openssl-devel gcc-c+
135137
ln -fs /usr/bin/python3.11 /usr/bin/python3 && \
136138
ln -s /usr/bin/python3.11 /usr/local/bin/python && ln -s /usr/bin/pip3.11 /usr/local/bin/pip
137139

138-
RUN pip install --upgrade pip && pip install pytest && pip install pytest-asyncio
140+
RUN pip install --upgrade pip --no-cache-dir && pip install pytest --no-cache-dir && pip install pytest-asyncio --no-cache-dir
139141

140142
# CPU only
141143
ENV CUDA_VISIBLE_DEVICES=""
@@ -161,7 +163,7 @@ RUN cd server && \
161163
make gen-server && \
162164
pip install ".[accelerate]" --no-cache-dir
163165

164-
# Patch codegen model changes into transformers 4.34
166+
# Patch codegen model changes into transformers 4.35
165167
RUN cp server/transformers_patch/modeling_codegen.py ${SITE_PACKAGES}/transformers/models/codegen/modeling_codegen.py
166168

167169
# Install router
@@ -183,7 +185,7 @@ ARG IPEX_VERSION
183185
RUN dnf install -y unzip git ninja-build && dnf clean all
184186

185187
RUN cd ~ && \
186-
curl -L -O https://repo.anaconda.com/miniconda/Miniconda3-py311_23.9.0-0-Linux-x86_64.sh && \
188+
curl -L -O https://repo.anaconda.com/miniconda/Miniconda3-py311_23.10.0-1-Linux-x86_64.sh && \
187189
chmod +x Miniconda3-*-Linux-x86_64.sh && \
188190
bash ./Miniconda3-*-Linux-x86_64.sh -bf -p /opt/miniconda
189191

@@ -226,15 +228,15 @@ FROM python-builder as exllama-kernels-builder
226228
WORKDIR /usr/src
227229

228230
COPY server/exllama_kernels/ .
229-
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" python setup.py build
231+
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX;8.9" python setup.py build
230232

231233
## Build transformers exllamav2 kernels ########################################
232234
FROM python-builder as exllamav2-kernels-builder
233235

234236
WORKDIR /usr/src
235237

236238
COPY server/exllamav2_kernels/ .
237-
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" python setup.py build
239+
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX;8.9" python setup.py build
238240

239241
## Flash attention cached build image ##########################################
240242
FROM base as flash-att-cache
@@ -291,7 +293,7 @@ COPY proto proto
291293
COPY server server
292294
RUN cd server && make gen-server && pip install ".[accelerate, onnx-gpu, quantize]" --no-cache-dir
293295

294-
# Patch codegen model changes into transformers 4.34.0
296+
# Patch codegen model changes into transformers 4.35
295297
RUN cp server/transformers_patch/modeling_codegen.py ${SITE_PACKAGES}/transformers/models/codegen/modeling_codegen.py
296298

297299
# Install router

0 commit comments

Comments
 (0)