66 hardware :
77 type : string
88 description : Hardware
9- # options:
10- # - cuda
11- # - rocm
12- # - intel
9+ # options:
10+ # - cuda
11+ # - cuda-trtllm
12+ # - rocm
13+ # - intel
1314 required : true
1415 release-tests :
1516 description : " Run release integration tests"
@@ -24,22 +25,34 @@ jobs:
2425 docker_volume : ${{ steps.final.outputs.docker_volume }}
2526 docker_devices : ${{ steps.final.outputs.docker_devices }}
2627 runs_on : ${{ steps.final.outputs.runs_on }}
27- label : ${{ steps.final.outputs.label }}
28+ label_extension : ${{ steps.final.outputs.label_extension }}
2829 extra_pytest : ${{ steps.final.outputs.extra_pytest }}
2930 concurrency :
3031 group : ${{ github.workflow }}-build-and-push-image-${{ inputs.hardware }}-${{ github.head_ref || github.run_id }}
3132 cancel-in-progress : true
3233 runs-on :
33- group : aws-highmemory-32 -plus-priv
34+ group : aws-highmemory-64 -plus-priv
3435 permissions :
3536 contents : write
3637 packages : write
38+ id-token : write
3739 steps :
3840 - name : Checkout repository
3941 uses : actions/checkout@v4
4042 - name : Inject slug/short variables
4143 uses :
rlespinasse/[email protected] 42- - name : Construct harware variables
44+ - name : Inject required variables for sccache to interact with Github Actions Cache
45+ uses : actions/github-script@v7
46+ with :
47+ script : |
48+ core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
49+ core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');
50+
51+ - name : Extract TensorRT-LLM version
52+ run : |
53+ echo "TENSORRT_LLM_VERSION=$(grep -oP '([a-z,0-9]{40})' $GITHUB_WORKSPACE/backends/trtllm/cmake/trtllm.cmake)" >> $GITHUB_ENV
54+ echo "TensorRT-LLM version: ${{ env.TENSORRT_LLM_VERSION }}"
55+ - name : Construct hardware variables
4356 shell : bash
4457 run : |
4558 case ${{ inputs.hardware }} in
@@ -51,15 +64,34 @@ jobs:
5164 export runs_on="aws-g6-12xl-plus-priv-cache"
5265 export platform=""
5366 export extra_pytest=""
67+ export target=""
68+ ;;
69+ cuda-trtllm)
70+ export dockerfile="Dockerfile_trtllm"
71+ export label_extension="-trtllm"
72+ export docker_volume="/mnt/cache"
73+ export docker_devices=""
74+ export runs_on="ubuntu-latest"
75+ export platform=""
76+ export extra_pytest=""
77+ if [[ "${GITHUB_REF}" == refs/tags/* ]]; then
78+ export build_type="release";
79+ export target="";
80+ else
81+ export build_type="dev";
82+ export target="ci-runtime";
83+ fi
5484 ;;
5585 rocm)
5686 export dockerfile="Dockerfile_amd"
5787 export label_extension="-rocm"
5888 export docker_devices="/dev/kfd,/dev/dri"
5989 export docker_volume="/mnt"
60- export runs_on="amd-gpu-runners"
90+ # This runner was deactivated.
91+ export runs_on="ubuntu-latest"
6192 export platform=""
6293 export extra_pytest="-k test_flash_gemma_gptq_load"
94+ export target=""
6395 ;;
6496 intel-xpu)
6597 export dockerfile="Dockerfile_intel"
69101 export runs_on="ubuntu-latest"
70102 export platform="xpu"
71103 export extra_pytest=""
104+ export target=""
72105 ;;
73106 intel-cpu)
74107 export dockerfile="Dockerfile_intel"
@@ -79,7 +112,27 @@ jobs:
79112 export runs_on="aws-highmemory-32-plus-priv"
80113 export platform="cpu"
81114 export extra_pytest="-k test_flash_gemma_simple"
115+ export target=""
82116 ;;
117+ neuron)
118+ export dockerfile="Dockerfile.neuron"
119+ export label_extension="-neuron"
120+ export docker_devices="/dev/neuron0"
121+ export docker_volume="/mnt/cache"
122+ export runs_on="aws-inf2-8xlarge"
123+ export platform="cpu"
124+ export extra_pytest="--neuron"
125+ export target=""
126+ ;;
127+ gaudi)
128+ export dockerfile="Dockerfile_gaudi"
129+ export label_extension="-gaudi"
130+ export docker_volume="/mnt/cache"
131+ export docker_devices=""
132+ export runs_on="ubuntu-latest"
133+ export platform=""
134+ export extra_pytest=""
135+ export target=""
83136 esac
84137 echo $dockerfile
85138 echo "Dockerfile=${dockerfile}"
@@ -88,19 +141,22 @@ jobs:
88141 echo $runs_on
89142 echo $platform
90143 echo "DOCKERFILE=${dockerfile}" >> $GITHUB_ENV
91- echo "LABEL =${label_extension}" >> $GITHUB_ENV
144+ echo "LABEL_EXTENSION =${label_extension}" >> $GITHUB_ENV
92145 echo "PLATFORM=${platform}" >> $GITHUB_ENV
93146 echo "DOCKER_VOLUME=${docker_volume}" >> $GITHUB_ENV
94147 echo "DOCKER_DEVICES=${docker_devices}" >> $GITHUB_ENV
95148 echo "RUNS_ON=${runs_on}" >> $GITHUB_ENV
96149 echo "EXTRA_PYTEST=${extra_pytest}" >> $GITHUB_ENV
97150 echo REGISTRY_MIRROR=$REGISTRY_MIRROR >> $GITHUB_ENV
151+ echo "TARGET=${target}" >> $GITHUB_ENV
152+ echo "BUILD_TYPE=${build_type}" >> $GITHUB_ENV
98153 - name : Initialize Docker Buildx
99154 uses : docker/setup-buildx-action@v3
100155 with :
101156 install : true
102157 buildkitd-config : /tmp/buildkitd.toml
103158 - name : Login to internal Container Registry
159+ if : github.event_name != 'pull_request'
104160 uses : docker/login-action@v3
105161 with :
106162 username : ${{ secrets.REGISTRY_USERNAME }}
@@ -113,6 +169,12 @@ jobs:
113169 registry : ghcr.io
114170 username : ${{ github.actor }}
115171 password : ${{ secrets.GITHUB_TOKEN }}
172+ - name : Login to Docker Hub Container Registry
173+ uses : docker/login-action@v3
174+ with :
175+ registry : docker.io
176+ username : ${{ secrets.DOCKERHUB_USERNAME }}
177+ password : ${{ secrets.DOCKERHUB_PASSWORD }}
116178 - name : Login to Azure Container Registry
117179 if : github.event_name != 'pull_request'
118180 uses : docker/login-action@v3
@@ -127,9 +189,9 @@ jobs:
127189 uses : docker/metadata-action@v5
128190 with :
129191 images : |
130- registry.internal. huggingface.tech/api-inference/community/ text-generation-inference
192+ docker.io/ huggingface/ text-generation-inference-ci
131193 tags : |
132- type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }}
194+ type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL_EXTENSION }}
133195 # If main, release or tag
134196 - name : Extract metadata (tags, labels) for Docker
135197 if : ${{ github.event_name != 'pull_request' }}
@@ -143,10 +205,10 @@ jobs:
143205 ghcr.io/huggingface/text-generation-inference
144206 db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference
145207 tags : |
146- type=semver,pattern={{version}}${{ env.LABEL }}
147- type=semver,pattern={{major}}.{{minor}}${{ env.LABEL }}
148- type=raw,value=latest${{ env.LABEL }},enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
149- type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }}
208+ type=semver,pattern={{version}}${{ env.LABEL_EXTENSION }}
209+ type=semver,pattern={{major}}.{{minor}}${{ env.LABEL_EXTENSION }}
210+ type=raw,value=latest${{ env.LABEL_EXTENSION }},enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
211+ type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL_EXTENSION }}
150212 - name : Build and push Docker image
151213 id : build-and-push
152214 uses : docker/build-push-action@v4
@@ -157,27 +219,66 @@ jobs:
157219 platforms : ' linux/amd64'
158220 build-args : |
159221 GIT_SHA=${{ env.GITHUB_SHA }}
160- DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }}
222+ DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL_EXTENSION }}
161223 PLATFORM=${{ env.PLATFORM }}
224+ build_type=${{ env.BUILD_TYPE }}
225+ sccache_gha_enabled=on
226+ actions_cache_url=${{ env.ACTIONS_CACHE_URL }}
227+ actions_runtime_token=${{ env.ACTIONS_RUNTIME_TOKEN }}
228+ target : ${{ env.TARGET }}
162229 tags : ${{ steps.meta.outputs.tags || steps.meta-pr.outputs.tags }}
163230 labels : ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }}
164- cache-from : type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL }},mode=min ,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=min
165- cache-to : type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL }},mode=min,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=min
231+ cache-from : type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL_EXTENSION }},mode=max ,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=min
232+ cache-to : type=s3,region=us-east-1,bucket=ci-docker-buildx-cache,name=text-generation-inference-cache${{ env.LABEL_EXTENSION }},mode=min,access_key_id=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_ACCESS_KEY_ID }},secret_access_key=${{ secrets.S3_CI_DOCKER_BUILDX_CACHE_SECRET_ACCESS_KEY }},mode=min
166233 - name : Final
167234 id : final
168235 run : |
169- echo "docker_image=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT}}${{ env.LABEL }}" >> "$GITHUB_OUTPUT"
236+
237+ if [ "${{ github.event_name }}" = "pull_request" ]; then
238+ echo "docker_image=docker.io/huggingface/text-generation-inference-ci:sha-${{ env.GITHUB_SHA_SHORT}}${{ env.LABEL_EXTENSION }}" >> "$GITHUB_OUTPUT"
239+ else
240+ echo "docker_image=ghcr.io/huggingface/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT}}${{ env.LABEL_EXTENSION }}" >> "$GITHUB_OUTPUT"
241+ fi
170242 echo "docker_devices=${{ env.DOCKER_DEVICES }}" >> "$GITHUB_OUTPUT"
171243 echo "docker_volume=${{ env.DOCKER_VOLUME }}" >> "$GITHUB_OUTPUT"
172244 echo "runs_on=${{ env.RUNS_ON }}" >> "$GITHUB_OUTPUT"
173- echo "label =${{ env.LABEL }}" >> "$GITHUB_OUTPUT"
245+ echo "label_extension =${{ env.LABEL_EXTENSION }}" >> "$GITHUB_OUTPUT"
174246 echo "extra_pytest=${{ env.EXTRA_PYTEST }}" >> "$GITHUB_OUTPUT"
175- integration_tests :
247+ precompile_neuron_models :
176248 concurrency :
177- group : ${{ github.workflow }}-${{ github.job }}-${{ needs.build-and-push.outputs.label }}-${{ github.head_ref || github.run_id }}
249+ group : ${{ github.workflow }}-${{ github.job }}-${{ needs.build-and-push.outputs.label_extension }}-${{ github.head_ref || github.run_id }}
178250 cancel-in-progress : true
179251 needs : build-and-push
180- if : needs.build-and-push.outputs.runs_on != 'ubuntu-latest'
252+ if : needs.build-and-push.outputs.label_extension == '-neuron'
253+ runs-on :
254+ group : ${{ needs.build-and-push.outputs.runs_on }}
255+ env :
256+ PYTEST_FLAGS : ${{ (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main' || inputs.release-tests == true) && '--release' || '--release' }}
257+ steps :
258+ - name : Checkout repository
259+ uses : actions/checkout@v4
260+ - name : Inject slug/short variables
261+ uses :
rlespinasse/[email protected] 262+ - name : Set up Python
263+ uses : actions/setup-python@v4
264+ with :
265+ python-version : " 3.11"
266+ - name : Install
267+ run : |
268+ make install-integration-tests
269+ - name : Export neuron models
270+ run : |
271+ export DOCKER_IMAGE=${{ needs.build-and-push.outputs.docker_image }}
272+ echo $DOCKER_IMAGE
273+ docker pull $DOCKER_IMAGE
274+ export HF_TOKEN=${{ secrets.HF_TOKEN_NEURON }}
275+ python integration-tests/fixtures/neuron/export_models.py
276+ integration_tests :
277+ concurrency :
278+ group : ${{ github.workflow }}-${{ github.job }}-${{ needs.build-and-push.outputs.label_extension }}-${{ github.head_ref || github.run_id }}
279+ cancel-in-progress : true
280+ needs : [precompile_neuron_models, build-and-push]
281+ if : ${{ always() && !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') && needs.build-and-push.outputs.runs_on != 'ubuntu-latest' }}
181282 runs-on :
182283 group : ${{ needs.build-and-push.outputs.runs_on }}
183284 env :
@@ -204,3 +305,23 @@ jobs:
204305 echo $DOCKER_IMAGE
205306 docker pull $DOCKER_IMAGE
206307 pytest -s -vv integration-tests ${PYTEST_FLAGS} ${EXTRA_PYTEST}
308+
309+ backend_trtllm_cxx_tests :
310+ needs : build-and-push
311+ if : needs.build-and-push.outputs.label_extension == '-trtllm'
312+ concurrency :
313+ group : ${{ github.workflow }}-${{ github.job }}-trtllm-${{ github.head_ref || github.run_id }}
314+ cancel-in-progress : true
315+ runs-on :
316+ group : aws-g6-12xl-plus-priv-cache
317+ container :
318+ image : ${{ needs.build-and-push.outputs.docker_image }}
319+ credentials :
320+ username : ${{ secrets.DOCKERHUB_USERNAME }}
321+ password : ${{ secrets.DOCKERHUB_PASSWORD }}
322+ options : --gpus all --shm-size=8g
323+
324+ steps :
325+ - name : Run C++/CUDA tests
326+ if : ${{ env.LABEL_EXTENSION == 'ci-runtime' }}
327+ run : /usr/local/tgi/bin/tgi_trtllm_backend_tests
0 commit comments