Skip to content

Commit b6e3ffb

Browse files
authored
Merge branch 'main' into feature/get-trace-id-from-req-headers
2 parents 14e8ca5 + ab7ccf5 commit b6e3ffb

File tree

139 files changed

+9066
-1985
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

139 files changed

+9066
-1985
lines changed

.github/workflows/build.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -202,4 +202,5 @@ jobs:
202202
export EXTRA_PYTEST="${{ needs.build-and-push.outputs.extra_pytest }}"
203203
export HF_TOKEN=${{ secrets.HF_TOKEN }}
204204
echo $DOCKER_IMAGE
205+
docker pull $DOCKER_IMAGE
205206
pytest -s -vv integration-tests ${PYTEST_FLAGS} ${EXTRA_PYTEST}

.github/workflows/load_test.yaml

+24-17
Original file line numberDiff line numberDiff line change
@@ -3,41 +3,48 @@ name: Nightly load test
33
on:
44
schedule:
55
- cron: '0 0 * * 1-5'
6+
workflow_call:
7+
workflow_dispatch:
68

79
pull_request:
810
paths:
911
- ".github/workflows/load_test.yaml"
10-
branches:
11-
- 'main'
12+
13+
env:
14+
AWS_DEFAULT_REGION: us-east-1
15+
AWS_ACCESS_KEY_ID: ${{ secrets.S3_AWS_ACCESS_KEY_ID }}
16+
AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_AWS_SECRET_ACCESS_KEY }}
1217

1318
jobs:
1419
load-tests:
1520
concurrency:
1621
group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }}
1722
cancel-in-progress: true
1823
runs-on:
19-
group: aws-g5-12xlarge
24+
group: aws-g6-12xl-plus-priv-cache
2025
env:
2126
DOCKER_VOLUME: /cache
2227
steps:
2328
- name: Checkout repository
2429
uses: actions/checkout@v3
2530

26-
- name: Install k6
27-
run: |
28-
curl https://github.com/grafana/k6/releases/download/v0.44.0/k6-v0.44.0-linux-amd64.tar.gz -L | tar xvz --strip-components 1
29-
30-
- name: Start starcoder
31-
run: |
32-
docker run --name tgi-starcoder --rm --gpus all -p 3000:80 -v /mnt/cache:/data -e HF_TOKEN=${{ secrets.HF_TOKEN }} --pull always -d ghcr.io/huggingface/text-generation-inference:latest --model-id bigcode/starcoder --num-shard 2 --max-batch-total-tokens 32768
33-
sleep 10
34-
wget --timeout 10 --retry-on-http-error --waitretry=1 --tries=240 http://localhost:3000/health
31+
- name: Install Python 3.11
32+
uses: actions/setup-python@v2
33+
with:
34+
python-version: 3.11
3535

36-
- name: Run k6
36+
- name: Install poetry
3737
run: |
38-
./k6 run load_tests/starcoder_load.js
38+
curl -sSL https://install.python-poetry.org | python3 -
39+
export PATH="$HOME/.local/bin:$PATH"
40+
poetry --version
3941
40-
- name: Stop starcoder
41-
if: ${{ always() }}
42+
- name: Run bench test
4243
run: |
43-
docker stop tgi-starcoder || true
44+
export PATH="$HOME/.local/bin:$PATH"
45+
cd load_tests
46+
poetry install
47+
poetry run python benchmarks.py --sha ${{ github.sha }} --results-file "s3://text-generation-inference-ci/benchmarks/ci/${{ github.sha }}.parquet"
48+
shell: bash
49+
env:
50+
HF_TOKEN: ${{ secrets.HF_TOKEN_BENCHMARK }}

.github/workflows/nix_cache.yaml

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
name: "Cache devshells"
2+
on:
3+
pull_request:
4+
paths:
5+
- "flake.nix"
6+
- "flake.lock"
7+
- "nix/**"
8+
concurrency:
9+
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
10+
cancel-in-progress: true
11+
12+
jobs:
13+
tests:
14+
runs-on:
15+
group: aws-highmemory-32-plus-priv
16+
steps:
17+
- uses: actions/checkout@v4
18+
- uses: cachix/install-nix-action@v27
19+
with:
20+
nix_path: nixpkgs=channel:nixos-unstable
21+
- uses: cachix/cachix-action@v14
22+
with:
23+
name: text-generation-inference
24+
# If you chose signing key for write access
25+
authToken: "${{ secrets.CACHIX_AUTH_TOKEN }}"
26+
env:
27+
USER: github_runner
28+
- name: Build impure devshell
29+
run: nix build .\#devShells.x86_64-linux.impure
30+
- name: Build impure devshell (CUDA dev)
31+
run: nix build .\#devShells.x86_64-linux.impureWithCuda
32+
# Pure shell dependencies are covered by Nix tests.
33+
# - name: Build pure devshell
34+
# run: nix build .\#devShells.x86_64-linux.pure

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ router/tokenizer.json
55

66
backends/v2/src/client/pb
77
backends/v3/src/client/pb
8+
backends/client/src/v2/pb
9+
backends/client/src/v3/pb
810

911
# ROCm auto-generated files
1012
*.hip

0 commit comments

Comments
 (0)