Skip to content

Commit 5d14a7f

Browse files
authored
Merge branch 'main' into feature/get-trace-id-from-req-headers
2 parents e4d7a67 + 84ab88d commit 5d14a7f

File tree

445 files changed

+81126
-10795
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

445 files changed

+81126
-10795
lines changed

.github/workflows/build.yaml

+144-23
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@ on:
66
hardware:
77
type: string
88
description: Hardware
9-
# options:
10-
# - cuda
11-
# - rocm
12-
# - intel
9+
# options:
10+
# - cuda
11+
# - cuda-trtllm
12+
# - rocm
13+
# - intel
1314
required: true
1415
release-tests:
1516
description: "Run release integration tests"
@@ -24,22 +25,34 @@ jobs:
2425
docker_volume: ${{ steps.final.outputs.docker_volume }}
2526
docker_devices: ${{ steps.final.outputs.docker_devices }}
2627
runs_on: ${{ steps.final.outputs.runs_on }}
27-
label: ${{ steps.final.outputs.label }}
28+
label_extension: ${{ steps.final.outputs.label_extension }}
2829
extra_pytest: ${{ steps.final.outputs.extra_pytest }}
2930
concurrency:
3031
group: ${{ github.workflow }}-build-and-push-image-${{ inputs.hardware }}-${{ github.head_ref || github.run_id }}
3132
cancel-in-progress: true
3233
runs-on:
33-
group: aws-highmemory-32-plus-priv
34+
group: aws-highmemory-64-plus-priv
3435
permissions:
3536
contents: write
3637
packages: write
38+
id-token: write
3739
steps:
3840
- name: Checkout repository
3941
uses: actions/checkout@v4
4042
- name: Inject slug/short variables
4143
uses: rlespinasse/[email protected]
42-
- name: Construct harware variables
44+
- name: Inject required variables for sccache to interact with Github Actions Cache
45+
uses: actions/github-script@v7
46+
with:
47+
script: |
48+
core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
49+
core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');
50+
51+
- name: Extract TensorRT-LLM version
52+
run: |
53+
echo "TENSORRT_LLM_VERSION=$(grep -oP '([a-z,0-9]{40})' $GITHUB_WORKSPACE/backends/trtllm/cmake/trtllm.cmake)" >> $GITHUB_ENV
54+
echo "TensorRT-LLM version: ${{ env.TENSORRT_LLM_VERSION }}"
55+
- name: Construct hardware variables
4356
shell: bash
4457
run: |
4558
case ${{ inputs.hardware }} in
@@ -51,15 +64,34 @@ jobs:
5164
export runs_on="aws-g6-12xl-plus-priv-cache"
5265
export platform=""
5366
export extra_pytest=""
67+
export target=""
68+
;;
69+
cuda-trtllm)
70+
export dockerfile="Dockerfile_trtllm"
71+
export label_extension="-trtllm"
72+
export docker_volume="/mnt/cache"
73+
export docker_devices=""
74+
export runs_on="ubuntu-latest"
75+
export platform=""
76+
export extra_pytest=""
77+
if [[ "${GITHUB_REF}" == refs/tags/* ]]; then
78+
export build_type="release";
79+
export target="";
80+
else
81+
export build_type="dev";
82+
export target="ci-runtime";
83+
fi
5484
;;
5585
rocm)
5686
export dockerfile="Dockerfile_amd"
5787
export label_extension="-rocm"
5888
export docker_devices="/dev/kfd,/dev/dri"
5989
export docker_volume="/mnt"
60-
export runs_on="amd-gpu-runners"
90+
# This runner was deactivated.
91+
export runs_on="ubuntu-latest"
6192
export platform=""
6293
export extra_pytest="-k test_flash_gemma_gptq_load"
94+
export target=""
6395
;;
6496
intel-xpu)
6597
export dockerfile="Dockerfile_intel"
@@ -69,6 +101,7 @@ jobs:
69101
export runs_on="ubuntu-latest"
70102
export platform="xpu"
71103
export extra_pytest=""
104+
export target=""
72105
;;
73106
intel-cpu)
74107
export dockerfile="Dockerfile_intel"
@@ -79,7 +112,27 @@ jobs:
79112
export runs_on="aws-highmemory-32-plus-priv"
80113
export platform="cpu"
81114
export extra_pytest="-k test_flash_gemma_simple"
115+
export target=""
82116
;;
117+
neuron)
118+
export dockerfile="Dockerfile.neuron"
119+
export label_extension="-neuron"
120+
export docker_devices="/dev/neuron0"
121+
export docker_volume="/mnt/cache"
122+
export runs_on="aws-inf2-8xlarge"
123+
export platform="cpu"
124+
export extra_pytest="--neuron"
125+
export target=""
126+
;;
127+
gaudi)
128+
export dockerfile="Dockerfile_gaudi"
129+
export label_extension="-gaudi"
130+
export docker_volume="/mnt/cache"
131+
export docker_devices=""
132+
export runs_on="ubuntu-latest"
133+
export platform=""
134+
export extra_pytest=""
135+
export target=""
83136
esac
84137
echo $dockerfile
85138
echo "Dockerfile=${dockerfile}"
@@ -88,19 +141,22 @@ jobs:
88141
echo $runs_on
89142
echo $platform
90143
echo "DOCKERFILE=${dockerfile}" >> $GITHUB_ENV
91-
echo "LABEL=${label_extension}" >> $GITHUB_ENV
144+
echo "LABEL_EXTENSION=${label_extension}" >> $GITHUB_ENV
92145
echo "PLATFORM=${platform}" >> $GITHUB_ENV
93146
echo "DOCKER_VOLUME=${docker_volume}" >> $GITHUB_ENV
94147
echo "DOCKER_DEVICES=${docker_devices}" >> $GITHUB_ENV
95148
echo "RUNS_ON=${runs_on}" >> $GITHUB_ENV
96149
echo "EXTRA_PYTEST=${extra_pytest}" >> $GITHUB_ENV
97150
echo REGISTRY_MIRROR=$REGISTRY_MIRROR >> $GITHUB_ENV
151+
echo "TARGET=${target}" >> $GITHUB_ENV
152+
echo "BUILD_TYPE=${build_type}" >> $GITHUB_ENV
98153
- name: Initialize Docker Buildx
99154
uses: docker/setup-buildx-action@v3
100155
with:
101156
install: true
102157
buildkitd-config: /tmp/buildkitd.toml
103158
- name: Login to internal Container Registry
159+
if: github.event_name != 'pull_request'
104160
uses: docker/login-action@v3
105161
with:
106162
username: ${{ secrets.REGISTRY_USERNAME }}
@@ -113,6 +169,12 @@ jobs:
113169
registry: ghcr.io
114170
username: ${{ github.actor }}
115171
password: ${{ secrets.GITHUB_TOKEN }}
172+
- name: Login to Docker Hub Container Registry
173+
uses: docker/login-action@v3
174+
with:
175+
registry: docker.io
176+
username: ${{ secrets.DOCKERHUB_USERNAME }}
177+
password: ${{ secrets.DOCKERHUB_PASSWORD }}
116178
- name: Login to Azure Container Registry
117179
if: github.event_name != 'pull_request'
118180
uses: docker/login-action@v3
@@ -127,9 +189,9 @@ jobs:
127189
uses: docker/metadata-action@v5
128190
with:
129191
images: |
130-
registry.internal.huggingface.tech/api-inference/community/text-generation-inference
192+
docker.io/huggingface/text-generation-inference-ci
131193
tags: |
132-
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }}
194+
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL_EXTENSION }}
133195
# If main, release or tag
134196
- name: Extract metadata (tags, labels) for Docker
135197
if: ${{ github.event_name != 'pull_request' }}
@@ -143,10 +205,10 @@ jobs:
143205
ghcr.io/huggingface/text-generation-inference
144206
db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference
145207
tags: |
146-
type=semver,pattern={{version}}${{ env.LABEL }}
147-
type=semver,pattern={{major}}.{{minor}}${{ env.LABEL }}
148-
type=raw,value=latest${{ env.LABEL }},enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
149-
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }}
208+
type=semver,pattern={{version}}${{ env.LABEL_EXTENSION }}
209+
type=semver,pattern={{major}}.{{minor}}${{ env.LABEL_EXTENSION }}
210+
type=raw,value=latest${{ env.LABEL_EXTENSION }},enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
211+
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL_EXTENSION }}
150212
- name: Build and push Docker image
151213
id: build-and-push
152214
uses: docker/build-push-action@v4
@@ -157,27 +219,66 @@ jobs:
157219
platforms: 'linux/amd64'
158220
build-args: |
159221
GIT_SHA=${{ env.GITHUB_SHA }}
160-
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }}
222+
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL_EXTENSION }}
161223
PLATFORM=${{ env.PLATFORM }}
224+
build_type=${{ env.BUILD_TYPE }}
225+
sccache_gha_enabled=on
226+
actions_cache_url=${{ env.ACTIONS_CACHE_URL }}
227+
actions_runtime_token=${{ env.ACTIONS_RUNTIME_TOKEN }}
228+
target: ${{ env.TARGET }}
162229
tags: ${{ steps.meta.outputs.tags || steps.meta-pr.outputs.tags }}
163230
labels: ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }}
164-
cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL }},mode=min,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=min
165-
cache-to: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL }},mode=min,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=min
231+
cache-from: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL_EXTENSION }},mode=max,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=min
232+
cache-to: type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL_EXTENSION }},mode=min,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=min
166233
- name: Final
167234
id: final
168235
run: |
169-
echo "docker_image=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT}}${{ env.LABEL }}" >> "$GITHUB_OUTPUT"
236+
237+
if [ "${{ github.event_name }}" = "pull_request" ]; then
238+
echo "docker_image=docker.io/huggingface/text-generation-inference-ci:sha-${{ env.GITHUB_SHA_SHORT}}${{ env.LABEL_EXTENSION }}" >> "$GITHUB_OUTPUT"
239+
else
240+
echo "docker_image=ghcr.io/huggingface/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT}}${{ env.LABEL_EXTENSION }}" >> "$GITHUB_OUTPUT"
241+
fi
170242
echo "docker_devices=${{ env.DOCKER_DEVICES }}" >> "$GITHUB_OUTPUT"
171243
echo "docker_volume=${{ env.DOCKER_VOLUME }}" >> "$GITHUB_OUTPUT"
172244
echo "runs_on=${{ env.RUNS_ON }}" >> "$GITHUB_OUTPUT"
173-
echo "label=${{ env.LABEL }}" >> "$GITHUB_OUTPUT"
245+
echo "label_extension=${{ env.LABEL_EXTENSION }}" >> "$GITHUB_OUTPUT"
174246
echo "extra_pytest=${{ env.EXTRA_PYTEST }}" >> "$GITHUB_OUTPUT"
175-
integration_tests:
247+
precompile_neuron_models:
176248
concurrency:
177-
group: ${{ github.workflow }}-${{ github.job }}-${{ needs.build-and-push.outputs.label }}-${{ github.head_ref || github.run_id }}
249+
group: ${{ github.workflow }}-${{ github.job }}-${{ needs.build-and-push.outputs.label_extension }}-${{ github.head_ref || github.run_id }}
178250
cancel-in-progress: true
179251
needs: build-and-push
180-
if: needs.build-and-push.outputs.runs_on != 'ubuntu-latest'
252+
if: needs.build-and-push.outputs.label_extension == '-neuron'
253+
runs-on:
254+
group: ${{ needs.build-and-push.outputs.runs_on }}
255+
env:
256+
PYTEST_FLAGS: ${{ (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main' || inputs.release-tests == true) && '--release' || '--release' }}
257+
steps:
258+
- name: Checkout repository
259+
uses: actions/checkout@v4
260+
- name: Inject slug/short variables
261+
uses: rlespinasse/[email protected]
262+
- name: Set up Python
263+
uses: actions/setup-python@v4
264+
with:
265+
python-version: "3.11"
266+
- name: Install
267+
run: |
268+
make install-integration-tests
269+
- name: Export neuron models
270+
run: |
271+
export DOCKER_IMAGE=${{ needs.build-and-push.outputs.docker_image }}
272+
echo $DOCKER_IMAGE
273+
docker pull $DOCKER_IMAGE
274+
export HF_TOKEN=${{ secrets.HF_TOKEN_NEURON }}
275+
python integration-tests/fixtures/neuron/export_models.py
276+
integration_tests:
277+
concurrency:
278+
group: ${{ github.workflow }}-${{ github.job }}-${{ needs.build-and-push.outputs.label_extension }}-${{ github.head_ref || github.run_id }}
279+
cancel-in-progress: true
280+
needs: [precompile_neuron_models, build-and-push]
281+
if: ${{ always() && !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') && needs.build-and-push.outputs.runs_on != 'ubuntu-latest' }}
181282
runs-on:
182283
group: ${{ needs.build-and-push.outputs.runs_on }}
183284
env:
@@ -204,3 +305,23 @@ jobs:
204305
echo $DOCKER_IMAGE
205306
docker pull $DOCKER_IMAGE
206307
pytest -s -vv integration-tests ${PYTEST_FLAGS} ${EXTRA_PYTEST}
308+
309+
backend_trtllm_cxx_tests:
310+
needs: build-and-push
311+
if: needs.build-and-push.outputs.label_extension == '-trtllm'
312+
concurrency:
313+
group: ${{ github.workflow }}-${{ github.job }}-trtllm-${{ github.head_ref || github.run_id }}
314+
cancel-in-progress: true
315+
runs-on:
316+
group: aws-g6-12xl-plus-priv-cache
317+
container:
318+
image: ${{ needs.build-and-push.outputs.docker_image }}
319+
credentials:
320+
username: ${{ secrets.DOCKERHUB_USERNAME }}
321+
password: ${{ secrets.DOCKERHUB_PASSWORD }}
322+
options: --gpus all --shm-size=8g
323+
324+
steps:
325+
- name: Run C++/CUDA tests
326+
if: ${{ env.LABEL_EXTENSION == 'ci-runtime' }}
327+
run: /usr/local/tgi/bin/tgi_trtllm_backend_tests

.github/workflows/ci_build.yaml

+4-1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ on:
2020
- "Dockerfile"
2121
- "Dockerfile_amd"
2222
- "Dockerfile_intel"
23+
- "Dockerfile.neuron"
24+
- "Dockerfile_gaudi"
2325
branches:
2426
- "main"
2527
workflow_dispatch:
@@ -37,11 +39,12 @@ jobs:
3739
# fail-fast is true by default
3840
fail-fast: false
3941
matrix:
40-
hardware: ["cuda", "rocm", "intel-xpu", "intel-cpu"]
42+
hardware: ["cuda", "cuda-trtllm", "rocm", "intel-xpu", "intel-cpu", "neuron", "gaudi"]
4143
uses: ./.github/workflows/build.yaml # calls the one above ^
4244
permissions:
4345
contents: write
4446
packages: write
47+
id-token: write
4548
with:
4649
hardware: ${{ matrix.hardware }}
4750
# https://github.com/actions/runner/issues/2206

.github/workflows/nix_build.yaml

+53
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
name: "Nix Build Docker image"
2+
on:
3+
pull_request:
4+
push:
5+
branches:
6+
- 'main'
7+
tags:
8+
- 'v*'
9+
concurrency:
10+
group: nix-image-${{ github.workflow }}-${{ github.head_ref || github.run_id }}
11+
cancel-in-progress: true
12+
13+
jobs:
14+
build_nix_image:
15+
runs-on:
16+
group: aws-highmemory-32-plus-priv
17+
steps:
18+
- uses: actions/checkout@v4
19+
- uses: cachix/install-nix-action@v27
20+
with:
21+
nix_path: nixpkgs=channel:nixos-unstable
22+
- uses: cachix/cachix-action@v14
23+
with:
24+
name: text-generation-inference
25+
# If you chose signing key for write access
26+
authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
27+
env:
28+
USER: github_runner
29+
- name: Build
30+
run: nix build .#dockerImage
31+
- name: Initialize Docker Buildx
32+
uses: docker/setup-buildx-action@v3
33+
with:
34+
install: true
35+
buildkitd-config: /tmp/buildkitd.toml
36+
- name: Inject slug/short variables
37+
uses: rlespinasse/[email protected]
38+
- name: Login to internal Container Registry
39+
# if: github.event_name != 'pull_request'
40+
uses: docker/login-action@v3
41+
with:
42+
username: ${{ secrets.REGISTRY_USERNAME }}
43+
password: ${{ secrets.REGISTRY_PASSWORD }}
44+
registry: registry.internal.huggingface.tech
45+
- name: Push to docker
46+
run: |
47+
if [ "${{ github.event_name }}" = "pull_request" ]; then
48+
export TAG=nix-sha-${{ env.GITHUB_SHA_SHORT }}
49+
else
50+
export TAG=${{ github.ref_name }}-nix
51+
fi
52+
export IMAGE=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:$TAG
53+
nix-shell -p skopeo --command "skopeo --insecure-policy copy docker-archive:$(readlink -f ./result) docker://$IMAGE --dest-compress-format zstd"

.github/workflows/nix_tests.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ on:
77
- "proto/**"
88
- "router/**"
99
- "launcher/**"
10+
- "backends/**"
1011
- "Cargo.lock"
1112
- "rust-toolchain.toml"
1213
concurrency:

0 commit comments

Comments
 (0)