Skip to content

Commit daa5725

Browse files
committed
whisper.cpp: add latest
1 parent a41766f commit daa5725

File tree

3 files changed

+96
-0
lines changed

3 files changed

+96
-0
lines changed

.github/workflows/docker-build.yml

+31
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ on:
1111
- psql-cli/**/*
1212
- llama.cpp-base/**/*
1313
- llama.cpp/**/*
14+
- whisper.cpp/**/*
1415
workflow_dispatch: # enables manual runs
1516

1617
jobs:
@@ -129,3 +130,33 @@ jobs:
129130
cd $IMAGE
130131
docker build --pull -t "${{ env.CI_REGISTRY_PATH }}/${{ env.IMAGE }}" .
131132
docker push "${{ env.CI_REGISTRY_PATH }}/${{ env.IMAGE }}"
133+
134+
build_whisper:
135+
name: Build whisper.cpp Image
136+
runs-on: ubuntu-latest
137+
needs: validate
138+
139+
services:
140+
docker:
141+
image: docker:20.10.16
142+
options: --privileged
143+
144+
env:
145+
IMAGE: whisper.cpp
146+
CI_REGISTRY_PATH: index.docker.io/ahoylabs
147+
CI_REGISTRY_USER: ${{ secrets.CI_REGISTRY_USER }}
148+
CI_REGISTRY_PASSWORD: ${{ secrets.CI_REGISTRY_PASSWORD }}
149+
150+
steps:
151+
- name: Check out repository
152+
uses: actions/checkout@v4
153+
154+
- name: Log in to Docker Hub
155+
run: docker login -u "${{ secrets.CI_REGISTRY_USER }}" -p "${{ secrets.CI_REGISTRY_PASSWORD }}"
156+
157+
- name: Build whisper.cpp Docker image
158+
run: |
159+
cd $IMAGE
160+
docker build --pull -t "${{ env.CI_REGISTRY_PATH }}/${{ env.IMAGE }}" .
161+
docker push "${{ env.CI_REGISTRY_PATH }}/${{ env.IMAGE }}"
162+

whisper.cpp/Dockerfile

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
ARG UBUNTU_VERSION=22.04
2+
# This needs to generally match the container host's environment.
3+
# We use a slightly older version for greater compatibility
4+
ARG CUDA_VERSION=12.3.2
5+
# CUDA build image
6+
ARG CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
7+
# CUDA runtime image
8+
ARG CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
9+
# CUDA base image (excludes cublas)
10+
ARG CUDA_BASE_CONTAINER=nvidia/cuda:${CUDA_VERSION}-base-ubuntu${UBUNTU_VERSION}
11+
12+
FROM ghcr.io/ggerganov/whisper.cpp:main-cuda AS whisper
13+
14+
FROM ${CUDA_BASE_CONTAINER} AS runtime
15+
16+
COPY --from=whisper /app/server /app/models/download-ggml-model.sh /app/
17+
18+
RUN /bin/echo -e '#!/bin/bash\nDEBIAN_FRONTEND=noninteractive\napt-get update && apt-get install -y $@ --no-install-recommends && apt-get clean autoclean && apt-get autoremove --yes && rm -rf /var/lib/apt/lists/*' \
19+
> /usr/local/sbin/apt_install_clean.sh && \
20+
chmod a+x /usr/local/sbin/apt_install_clean.sh
21+
RUN /bin/echo -e '#!/bin/bash\nDEBIAN_FRONTEND=noninteractive\napt-get update && apt-get remove -y $@ && apt-get clean autoclean && apt-get autoremove --yes && rm -rf /var/lib/apt/lists/*' \
22+
> /usr/local/sbin/apt_remove_clean.sh && \
23+
chmod a+x /usr/local/sbin/apt_remove_clean.sh
24+
25+
# we need just CUDA and CUBLAS
26+
# this saves ~1GB vs the -runtime image
27+
RUN /usr/local/sbin/apt_install_clean.sh libcublas-12-3 curl wget ffmpeg
28+
29+
ENV MODEL="small-q5_1"
30+
ENV PORT="8200"
31+
ENV ADDITIONAL_ARGS="--host 0.0.0.0 --convert --inference-path /whisper"
32+
33+
# mount volume here
34+
ENV WORKSPACE="/workspace"
35+
36+
COPY entry.sh /
37+
ENTRYPOINT [ "/entry.sh" ]

whisper.cpp/entry.sh

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#!/bin/sh
2+
3+
# we expect the following env variables
4+
# MODEL="small-q5_1"
5+
# PORT="8080"
6+
# ADDITIONAL_ARGS="--metrics --host 0.0.0.0--convert --inference-path /whisper"
7+
8+
# also handle create /public and populate
9+
10+
# set WORKSPACE to "/" if $WORKSPACE not a directory/doesn't exist
11+
if [ ! -d "$WORKSPACE" ]; then
12+
WORKSPACE="/"
13+
fi
14+
15+
cd $WORKSPACE
16+
mkdir -p models
17+
18+
if [ ! -f "models/ggml-${MODEL}.bin" ]; then
19+
/app/download-ggml-model.sh $MODEL /models
20+
fi
21+
MODEL_ARG="-m $WORKSPACE/models/ggml-${MODEL}.bin"
22+
23+
# this is the path we use for health checks
24+
mkdir -p public/v1
25+
echo "alive" > public/v1/models
26+
PUBLIC_ARG="--public $WORKSPACE/public"
27+
28+
/app/server $MODEL_ARG --port $PORT $ADDITIONAL_ARGS $PUBLIC_ARG

0 commit comments

Comments
 (0)