@@ -57,6 +57,22 @@ ENV VLLM_INSTALL_PUNICA_KERNELS=1
57
57
RUN python3 setup.py build_ext --inplace
58
58
# ################### EXTENSION Build IMAGE ####################
59
59
60
+ # ################### FLASH_ATTENTION Build IMAGE ####################
61
+ FROM dev as flash-attn-builder
62
+ # max jobs used for build
63
+ ARG max_jobs=2
64
+ ENV MAX_JOBS=${max_jobs}
65
+ # flash attention version
66
+ ARG flash_attn_version=v2.5.6
67
+ ENV FLASH_ATTN_VERSION=${flash_attn_version}
68
+
69
+ WORKDIR /usr/src/flash-attention-v2
70
+
71
+ # Download the wheel or build it if a pre-compiled release doesn't exist
72
+ RUN pip --verbose wheel flash-attn==${FLASH_ATTN_VERSION} \
73
+ --no-build-isolation --no-deps --no-cache-dir
74
+
75
+ # ################### FLASH_ATTENTION Build IMAGE ####################
60
76
61
77
# ################### TEST IMAGE ####################
62
78
# image to run unit testing suite
@@ -68,6 +84,9 @@ WORKDIR /vllm-workspace
68
84
# ADD is used to preserve directory structure
69
85
ADD . /vllm-workspace/
70
86
COPY --from=build /workspace/vllm/*.so /vllm-workspace/vllm/
87
+ # Install flash attention (from pre-built wheel)
88
+ RUN --mount=type=bind,from=flash-attn-builder,src=/usr/src/flash-attention-v2,target=/usr/src/flash-attention-v2 \
89
+ pip install /usr/src/flash-attention-v2/*.whl --no-cache-dir
71
90
# ignore build dependencies installation because we are using pre-complied extensions
72
91
RUN rm pyproject.toml
73
92
RUN --mount=type=cache,target=/root/.cache/pip VLLM_USE_PRECOMPILED=1 pip install . --verbose
@@ -88,6 +107,11 @@ WORKDIR /workspace
88
107
COPY requirements.txt requirements.txt
89
108
RUN --mount=type=cache,target=/root/.cache/pip \
90
109
pip install -r requirements.txt
110
+
111
+ # Install flash attention (from pre-built wheel)
112
+ RUN --mount=type=bind,from=flash-attn-builder,src=/usr/src/flash-attention-v2,target=/usr/src/flash-attention-v2 \
113
+ pip install /usr/src/flash-attention-v2/*.whl --no-cache-dir
114
+
91
115
# ################### RUNTIME BASE IMAGE ####################
92
116
93
117
0 commit comments