1
1
# # Global Args #################################################################
2
- ARG BASE_UBI_IMAGE_TAG=9.3-1361.1699548029
3
- ARG PROTOC_VERSION=25.0
2
+ ARG BASE_UBI_IMAGE_TAG=9.3-1476
3
+ ARG PROTOC_VERSION=25.1
4
4
ARG PYTORCH_INDEX="https://download.pytorch.org/whl"
5
- ARG IPEX_INDEX="https://pytorch-extension.intel.com/release-whl/stable/cpu/us/"
6
- # ARG PYTORCH_INDEX="https://download.pytorch.org/whl/nightly"
7
5
ARG PYTORCH_VERSION=2.1.0
6
+ # ARG PYTORCH_INDEX="https://download.pytorch.org/whl/nightly"
7
+ # ARG PYTORCH_VERSION=2.3.0.dev20231221
8
+ ARG IPEX_INDEX="https://pytorch-extension.intel.com/release-whl/stable/cpu/us/"
8
9
ARG IPEX_VERSION=2.1.0
9
10
11
+
10
12
# # Base Layer ##################################################################
11
13
FROM registry.access.redhat.com/ubi9/ubi:${BASE_UBI_IMAGE_TAG} as base
12
14
WORKDIR /app
@@ -34,7 +36,7 @@ ENV CUDA_VERSION=11.8.0 \
34
36
NV_CUDA_COMPAT_VERSION=520.61.05-1
35
37
36
38
RUN dnf config-manager \
37
- --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8 /x86_64/cuda-rhel8 .repo \
39
+ --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9 /x86_64/cuda-rhel9 .repo \
38
40
&& dnf install -y \
39
41
cuda-cudart-11-8-${NV_CUDA_CUDART_VERSION} \
40
42
cuda-compat-11-8-${NV_CUDA_COMPAT_VERSION} \
@@ -55,7 +57,7 @@ ENV NV_NVTX_VERSION=11.8.86-1 \
55
57
NV_LIBNCCL_PACKAGE_VERSION=2.15.5-1+cuda11.8
56
58
57
59
RUN dnf config-manager \
58
- --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8 /x86_64/cuda-rhel8 .repo \
60
+ --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9 /x86_64/cuda-rhel9 .repo \
59
61
&& dnf install -y \
60
62
cuda-libraries-11-8-${NV_CUDA_LIB_VERSION} \
61
63
cuda-nvtx-11-8-${NV_NVTX_VERSION} \
@@ -74,7 +76,7 @@ ENV NV_CUDA_CUDART_DEV_VERSION=11.8.89-1 \
74
76
NV_LIBNCCL_DEV_PACKAGE_VERSION=2.15.5-1+cuda11.8
75
77
76
78
RUN dnf config-manager \
77
- --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8 /x86_64/cuda-rhel8 .repo \
79
+ --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9 /x86_64/cuda-rhel9 .repo \
78
80
&& dnf install -y \
79
81
cuda-command-line-tools-11-8-${NV_CUDA_LIB_VERSION} \
80
82
cuda-libraries-devel-11-8-${NV_CUDA_LIB_VERSION} \
@@ -90,7 +92,7 @@ ENV LIBRARY_PATH="$CUDA_HOME/lib64/stubs"
90
92
91
93
# # Rust builder ################################################################
92
94
# Specific debian version so that compatible glibc version is used
93
- FROM rust:1.73 -bullseye as rust-builder
95
+ FROM rust:1.75 -bullseye as rust-builder
94
96
ARG PROTOC_VERSION
95
97
96
98
ENV CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
@@ -135,7 +137,7 @@ RUN dnf install -y make unzip python3.11 python3.11-pip gcc openssl-devel gcc-c+
135
137
ln -fs /usr/bin/python3.11 /usr/bin/python3 && \
136
138
ln -s /usr/bin/python3.11 /usr/local/bin/python && ln -s /usr/bin/pip3.11 /usr/local/bin/pip
137
139
138
- RUN pip install --upgrade pip && pip install pytest && pip install pytest-asyncio
140
+ RUN pip install --upgrade pip --no-cache-dir && pip install pytest --no-cache-dir && pip install pytest-asyncio --no-cache-dir
139
141
140
142
# CPU only
141
143
ENV CUDA_VISIBLE_DEVICES=""
@@ -161,7 +163,7 @@ RUN cd server && \
161
163
make gen-server && \
162
164
pip install ".[accelerate]" --no-cache-dir
163
165
164
- # Patch codegen model changes into transformers 4.34
166
+ # Patch codegen model changes into transformers 4.35
165
167
RUN cp server/transformers_patch/modeling_codegen.py ${SITE_PACKAGES}/transformers/models/codegen/modeling_codegen.py
166
168
167
169
# Install router
@@ -183,7 +185,7 @@ ARG IPEX_VERSION
183
185
RUN dnf install -y unzip git ninja-build && dnf clean all
184
186
185
187
RUN cd ~ && \
186
- curl -L -O https://repo.anaconda.com/miniconda/Miniconda3-py311_23.9 .0-0 -Linux-x86_64.sh && \
188
+ curl -L -O https://repo.anaconda.com/miniconda/Miniconda3-py311_23.10 .0-1 -Linux-x86_64.sh && \
187
189
chmod +x Miniconda3-*-Linux-x86_64.sh && \
188
190
bash ./Miniconda3-*-Linux-x86_64.sh -bf -p /opt/miniconda
189
191
@@ -226,15 +228,15 @@ FROM python-builder as exllama-kernels-builder
226
228
WORKDIR /usr/src
227
229
228
230
COPY server/exllama_kernels/ .
229
- RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" python setup.py build
231
+ RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX;8.9 " python setup.py build
230
232
231
233
# # Build transformers exllamav2 kernels ########################################
232
234
FROM python-builder as exllamav2-kernels-builder
233
235
234
236
WORKDIR /usr/src
235
237
236
238
COPY server/exllamav2_kernels/ .
237
- RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" python setup.py build
239
+ RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX;8.9 " python setup.py build
238
240
239
241
# # Flash attention cached build image ##########################################
240
242
FROM base as flash-att-cache
@@ -291,7 +293,7 @@ COPY proto proto
291
293
COPY server server
292
294
RUN cd server && make gen-server && pip install ".[accelerate, onnx-gpu, quantize]" --no-cache-dir
293
295
294
- # Patch codegen model changes into transformers 4.34.0
296
+ # Patch codegen model changes into transformers 4.35
295
297
RUN cp server/transformers_patch/modeling_codegen.py ${SITE_PACKAGES}/transformers/models/codegen/modeling_codegen.py
296
298
297
299
# Install router
0 commit comments