6
6
hardware :
7
7
type : string
8
8
description : Hardware
9
- # options:
10
- # - cuda
11
- # - rocm
12
- # - intel
9
+ # options:
10
+ # - cuda
11
+ # - cuda-trtllm
12
+ # - rocm
13
+ # - intel
13
14
required : true
14
15
release-tests :
15
16
description : " Run release integration tests"
@@ -24,22 +25,34 @@ jobs:
24
25
docker_volume : ${{ steps.final.outputs.docker_volume }}
25
26
docker_devices : ${{ steps.final.outputs.docker_devices }}
26
27
runs_on : ${{ steps.final.outputs.runs_on }}
27
- label : ${{ steps.final.outputs.label }}
28
+ label_extension : ${{ steps.final.outputs.label_extension }}
28
29
extra_pytest : ${{ steps.final.outputs.extra_pytest }}
29
30
concurrency :
30
31
group : ${{ github.workflow }}-build-and-push-image-${{ inputs.hardware }}-${{ github.head_ref || github.run_id }}
31
32
cancel-in-progress : true
32
33
runs-on :
33
- group : aws-highmemory-32 -plus-priv
34
+ group : aws-highmemory-64 -plus-priv
34
35
permissions :
35
36
contents : write
36
37
packages : write
38
+ id-token : write
37
39
steps :
38
40
- name : Checkout repository
39
41
uses : actions/checkout@v4
40
42
- name : Inject slug/short variables
41
43
uses :
rlespinasse/[email protected]
42
- - name : Construct harware variables
44
+ - name : Inject required variables for sccache to interact with Github Actions Cache
45
+ uses : actions/github-script@v7
46
+ with :
47
+ script : |
48
+ core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
49
+ core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');
50
+
51
+ - name : Extract TensorRT-LLM version
52
+ run : |
53
+ echo "TENSORRT_LLM_VERSION=$(grep -oP '([a-z,0-9]{40})' $GITHUB_WORKSPACE/backends/trtllm/cmake/trtllm.cmake)" >> $GITHUB_ENV
54
+ echo "TensorRT-LLM version: ${{ env.TENSORRT_LLM_VERSION }}"
55
+ - name : Construct hardware variables
43
56
shell : bash
44
57
run : |
45
58
case ${{ inputs.hardware }} in
@@ -51,15 +64,34 @@ jobs:
51
64
export runs_on="aws-g6-12xl-plus-priv-cache"
52
65
export platform=""
53
66
export extra_pytest=""
67
+ export target=""
68
+ ;;
69
+ cuda-trtllm)
70
+ export dockerfile="Dockerfile_trtllm"
71
+ export label_extension="-trtllm"
72
+ export docker_volume="/mnt/cache"
73
+ export docker_devices=""
74
+ export runs_on="ubuntu-latest"
75
+ export platform=""
76
+ export extra_pytest=""
77
+ if [[ "${GITHUB_REF}" == refs/tags/* ]]; then
78
+ export build_type="release";
79
+ export target="";
80
+ else
81
+ export build_type="dev";
82
+ export target="ci-runtime";
83
+ fi
54
84
;;
55
85
rocm)
56
86
export dockerfile="Dockerfile_amd"
57
87
export label_extension="-rocm"
58
88
export docker_devices="/dev/kfd,/dev/dri"
59
89
export docker_volume="/mnt"
60
- export runs_on="amd-gpu-runners"
90
+ # This runner was deactivated.
91
+ export runs_on="ubuntu-latest"
61
92
export platform=""
62
93
export extra_pytest="-k test_flash_gemma_gptq_load"
94
+ export target=""
63
95
;;
64
96
intel-xpu)
65
97
export dockerfile="Dockerfile_intel"
69
101
export runs_on="ubuntu-latest"
70
102
export platform="xpu"
71
103
export extra_pytest=""
104
+ export target=""
72
105
;;
73
106
intel-cpu)
74
107
export dockerfile="Dockerfile_intel"
@@ -79,7 +112,27 @@ jobs:
79
112
export runs_on="aws-highmemory-32-plus-priv"
80
113
export platform="cpu"
81
114
export extra_pytest="-k test_flash_gemma_simple"
115
+ export target=""
82
116
;;
117
+ neuron)
118
+ export dockerfile="Dockerfile.neuron"
119
+ export label_extension="-neuron"
120
+ export docker_devices="/dev/neuron0"
121
+ export docker_volume="/mnt/cache"
122
+ export runs_on="aws-inf2-8xlarge"
123
+ export platform="cpu"
124
+ export extra_pytest="--neuron"
125
+ export target=""
126
+ ;;
127
+ gaudi)
128
+ export dockerfile="Dockerfile_gaudi"
129
+ export label_extension="-gaudi"
130
+ export docker_volume="/mnt/cache"
131
+ export docker_devices=""
132
+ export runs_on="ubuntu-latest"
133
+ export platform=""
134
+ export extra_pytest=""
135
+ export target=""
83
136
esac
84
137
echo $dockerfile
85
138
echo "Dockerfile=${dockerfile}"
@@ -88,19 +141,22 @@ jobs:
88
141
echo $runs_on
89
142
echo $platform
90
143
echo "DOCKERFILE=${dockerfile}" >> $GITHUB_ENV
91
- echo "LABEL =${label_extension}" >> $GITHUB_ENV
144
+ echo "LABEL_EXTENSION =${label_extension}" >> $GITHUB_ENV
92
145
echo "PLATFORM=${platform}" >> $GITHUB_ENV
93
146
echo "DOCKER_VOLUME=${docker_volume}" >> $GITHUB_ENV
94
147
echo "DOCKER_DEVICES=${docker_devices}" >> $GITHUB_ENV
95
148
echo "RUNS_ON=${runs_on}" >> $GITHUB_ENV
96
149
echo "EXTRA_PYTEST=${extra_pytest}" >> $GITHUB_ENV
97
150
echo REGISTRY_MIRROR=$REGISTRY_MIRROR >> $GITHUB_ENV
151
+ echo "TARGET=${target}" >> $GITHUB_ENV
152
+ echo "BUILD_TYPE=${build_type}" >> $GITHUB_ENV
98
153
- name : Initialize Docker Buildx
99
154
uses : docker/setup-buildx-action@v3
100
155
with :
101
156
install : true
102
157
buildkitd-config : /tmp/buildkitd.toml
103
158
- name : Login to internal Container Registry
159
+ if : github.event_name != 'pull_request'
104
160
uses : docker/login-action@v3
105
161
with :
106
162
username : ${{ secrets.REGISTRY_USERNAME }}
@@ -113,6 +169,12 @@ jobs:
113
169
registry : ghcr.io
114
170
username : ${{ github.actor }}
115
171
password : ${{ secrets.GITHUB_TOKEN }}
172
+ - name : Login to Docker Hub Container Registry
173
+ uses : docker/login-action@v3
174
+ with :
175
+ registry : docker.io
176
+ username : ${{ secrets.DOCKERHUB_USERNAME }}
177
+ password : ${{ secrets.DOCKERHUB_PASSWORD }}
116
178
- name : Login to Azure Container Registry
117
179
if : github.event_name != 'pull_request'
118
180
uses : docker/login-action@v3
@@ -127,9 +189,9 @@ jobs:
127
189
uses : docker/metadata-action@v5
128
190
with :
129
191
images : |
130
- registry.internal. huggingface.tech/api-inference/community/ text-generation-inference
192
+ docker.io/ huggingface/ text-generation-inference-ci
131
193
tags : |
132
- type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }}
194
+ type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL_EXTENSION }}
133
195
# If main, release or tag
134
196
- name : Extract metadata (tags, labels) for Docker
135
197
if : ${{ github.event_name != 'pull_request' }}
@@ -143,10 +205,10 @@ jobs:
143
205
ghcr.io/huggingface/text-generation-inference
144
206
db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference
145
207
tags : |
146
- type=semver,pattern={{version}}${{ env.LABEL }}
147
- type=semver,pattern={{major}}.{{minor}}${{ env.LABEL }}
148
- type=raw,value=latest${{ env.LABEL }},enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
149
- type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }}
208
+ type=semver,pattern={{version}}${{ env.LABEL_EXTENSION }}
209
+ type=semver,pattern={{major}}.{{minor}}${{ env.LABEL_EXTENSION }}
210
+ type=raw,value=latest${{ env.LABEL_EXTENSION }},enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
211
+ type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL_EXTENSION }}
150
212
- name : Build and push Docker image
151
213
id : build-and-push
152
214
uses : docker/build-push-action@v4
@@ -157,27 +219,66 @@ jobs:
157
219
platforms : ' linux/amd64'
158
220
build-args : |
159
221
GIT_SHA=${{ env.GITHUB_SHA }}
160
- DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }}
222
+ DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL_EXTENSION }}
161
223
PLATFORM=${{ env.PLATFORM }}
224
+ build_type=${{ env.BUILD_TYPE }}
225
+ sccache_gha_enabled=on
226
+ actions_cache_url=${{ env.ACTIONS_CACHE_URL }}
227
+ actions_runtime_token=${{ env.ACTIONS_RUNTIME_TOKEN }}
228
+ target : ${{ env.TARGET }}
162
229
tags : ${{ steps.meta.outputs.tags || steps.meta-pr.outputs.tags }}
163
230
labels : ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }}
164
- cache-from : type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL }},mode=min ,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=min
165
- cache-to : type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL }},mode=min,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=min
231
+ cache-from : type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL_EXTENSION }},mode=max ,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=min
232
+ cache-to : type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL_EXTENSION }},mode=min,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=min
166
233
- name : Final
167
234
id : final
168
235
run : |
169
- echo "docker_image=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT}}${{ env.LABEL }}" >> "$GITHUB_OUTPUT"
236
+
237
+ if [ "${{ github.event_name }}" = "pull_request" ]; then
238
+ echo "docker_image=docker.io/huggingface/text-generation-inference-ci:sha-${{ env.GITHUB_SHA_SHORT}}${{ env.LABEL_EXTENSION }}" >> "$GITHUB_OUTPUT"
239
+ else
240
+ echo "docker_image=ghcr.io/huggingface/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT}}${{ env.LABEL_EXTENSION }}" >> "$GITHUB_OUTPUT"
241
+ fi
170
242
echo "docker_devices=${{ env.DOCKER_DEVICES }}" >> "$GITHUB_OUTPUT"
171
243
echo "docker_volume=${{ env.DOCKER_VOLUME }}" >> "$GITHUB_OUTPUT"
172
244
echo "runs_on=${{ env.RUNS_ON }}" >> "$GITHUB_OUTPUT"
173
- echo "label =${{ env.LABEL }}" >> "$GITHUB_OUTPUT"
245
+ echo "label_extension =${{ env.LABEL_EXTENSION }}" >> "$GITHUB_OUTPUT"
174
246
echo "extra_pytest=${{ env.EXTRA_PYTEST }}" >> "$GITHUB_OUTPUT"
175
- integration_tests :
247
+ precompile_neuron_models :
176
248
concurrency :
177
- group : ${{ github.workflow }}-${{ github.job }}-${{ needs.build-and-push.outputs.label }}-${{ github.head_ref || github.run_id }}
249
+ group : ${{ github.workflow }}-${{ github.job }}-${{ needs.build-and-push.outputs.label_extension }}-${{ github.head_ref || github.run_id }}
178
250
cancel-in-progress : true
179
251
needs : build-and-push
180
- if : needs.build-and-push.outputs.runs_on != 'ubuntu-latest'
252
+ if : needs.build-and-push.outputs.label_extension == '-neuron'
253
+ runs-on :
254
+ group : ${{ needs.build-and-push.outputs.runs_on }}
255
+ env :
256
+ PYTEST_FLAGS : ${{ (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main' || inputs.release-tests == true) && '--release' || '--release' }}
257
+ steps :
258
+ - name : Checkout repository
259
+ uses : actions/checkout@v4
260
+ - name : Inject slug/short variables
261
+ uses :
rlespinasse/[email protected]
262
+ - name : Set up Python
263
+ uses : actions/setup-python@v4
264
+ with :
265
+ python-version : " 3.11"
266
+ - name : Install
267
+ run : |
268
+ make install-integration-tests
269
+ - name : Export neuron models
270
+ run : |
271
+ export DOCKER_IMAGE=${{ needs.build-and-push.outputs.docker_image }}
272
+ echo $DOCKER_IMAGE
273
+ docker pull $DOCKER_IMAGE
274
+ export HF_TOKEN=${{ secrets.HF_TOKEN_NEURON }}
275
+ python integration-tests/fixtures/neuron/export_models.py
276
+ integration_tests :
277
+ concurrency :
278
+ group : ${{ github.workflow }}-${{ github.job }}-${{ needs.build-and-push.outputs.label_extension }}-${{ github.head_ref || github.run_id }}
279
+ cancel-in-progress : true
280
+ needs : [precompile_neuron_models, build-and-push]
281
+ if : ${{ always() && !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') && needs.build-and-push.outputs.runs_on != 'ubuntu-latest' }}
181
282
runs-on :
182
283
group : ${{ needs.build-and-push.outputs.runs_on }}
183
284
env :
@@ -204,3 +305,23 @@ jobs:
204
305
echo $DOCKER_IMAGE
205
306
docker pull $DOCKER_IMAGE
206
307
pytest -s -vv integration-tests ${PYTEST_FLAGS} ${EXTRA_PYTEST}
308
+
309
+ backend_trtllm_cxx_tests :
310
+ needs : build-and-push
311
+ if : needs.build-and-push.outputs.label_extension == '-trtllm'
312
+ concurrency :
313
+ group : ${{ github.workflow }}-${{ github.job }}-trtllm-${{ github.head_ref || github.run_id }}
314
+ cancel-in-progress : true
315
+ runs-on :
316
+ group : aws-g6-12xl-plus-priv-cache
317
+ container :
318
+ image : ${{ needs.build-and-push.outputs.docker_image }}
319
+ credentials :
320
+ username : ${{ secrets.DOCKERHUB_USERNAME }}
321
+ password : ${{ secrets.DOCKERHUB_PASSWORD }}
322
+ options : --gpus all --shm-size=8g
323
+
324
+ steps :
325
+ - name : Run C++/CUDA tests
326
+ if : ${{ env.LABEL_EXTENSION == 'ci-runtime' }}
327
+ run : /usr/local/tgi/bin/tgi_trtllm_backend_tests
0 commit comments