Skip to content

Build and push docker nightly #2259

Build and push docker nightly

Build and push docker nightly #2259

name: Build and push docker nightly
on:
workflow_dispatch:
inputs:
mode:
description: 'release/nightly/temp, default is nightly'
required: true
default: 'nightly'
arch:
description: 'which images to build [ cpu, cpu-full, pytorch-inf2, pytorch-gpu, tensorrt-llm, lmi, aarch64]'
type: string
required: false
default: '["cpu", "cpu-full", "pytorch-inf2", "pytorch-gpu", "tensorrt-llm", "lmi", "aarch64"]'
workflow_call:
inputs:
mode:
description: 'release/nightly/temp, default is nightly'
type: string
required: true
default: 'nightly'
arch:
description: 'which images to build [ cpu, cpu-full, pytorch-inf2, pytorch-gpu, tensorrt-llm, lmi, aarch64]'
type: string
required: false
default: '["cpu", "cpu-full", "pytorch-inf2", "pytorch-gpu", "tensorrt-llm", "lmi", "aarch64"]'
outputs:
djl_version:
description: "djl version"
value: ${{ jobs.nightly-build.outputs.djl_version }}
permissions:
id-token: write
contents: read
env:
AWS_ECR_REPO: "185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp"
DOCKER_HUB_REPO: "deepjavalibrary/djl-serving"
jobs:
create-runners:
runs-on: [ self-hosted, scheduler ]
strategy:
matrix:
arch: ${{ startsWith(inputs.arch, '[') && fromJson(inputs.arch) || fromJson(format('[{0}]', inputs.arch)) }}
steps:
- name: Create new instance
id: create_cpu
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
instance_type=action_cpu
if [ "${{matrix.arch}}" == "aarch64" ]; then
instance_type=action_graviton
fi
./start_instance.sh $instance_type $token djl-serving
instance_id=`grep "^instance_id=" $GITHUB_OUTPUT | cut -d'=' -f2`
echo "instance_id_${{matrix.arch}}=$instance_id" >>"$GITHUB_OUTPUT"
outputs:
instance_id_cpu: ${{ steps.create_cpu.outputs.instance_id_cpu }}
instance_id_cpu-full: ${{ steps.create_cpu.outputs.instance_id_cpu-full }}
instance_id_pytorch-inf2: ${{ steps.create_cpu.outputs.instance_id_pytorch-inf2 }}
instance_id_pytorch-gpu: ${{ steps.create_cpu.outputs.instance_id_pytorch-gpu }}
instance_id_tensorrt-llm: ${{ steps.create_cpu.outputs.instance_id_tensorrt-llm }}
instance_id_lmi: ${{ steps.create_cpu.outputs.instance_id_lmi }}
instance_id_aarch64: ${{ steps.create_cpu.outputs.instance_id_aarch64 }}
nightly-build:
needs: create-runners
timeout-minutes: 120
strategy:
fail-fast: false
matrix:
arch: ${{ startsWith(inputs.arch, '[') && fromJson(inputs.arch) || fromJson(format('[{0}]', inputs.arch)) }}
runs-on:
- self-hosted
- ${{ matrix.arch != 'aarch64' && 'cpu' || 'aarch64' }}
- RUN_ID-${{ github.run_id }}
- RUN_NUMBER-${{ github.run_number }}
- SHA-${{ github.sha }}
outputs:
djl_version: ${{ steps.get-versions.outputs.DJL_VERSION }}
steps:
- name: Clean disk space
run: |
sudo rm -rf \
/usr/share/dotnet /usr/local/lib/android /opt/ghc \
/usr/local/share/powershell /usr/share/swift /usr/local/.ghcup \
$AGENT_TOOLSDIRECTORY
- uses: actions/checkout@v4
- name: Set up Python3
uses: actions/setup-python@v5
with:
python-version: '3.10.x'
- name: Install pip dependencies
run: |
python -m pip install --upgrade pip
pip install awscli
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving
aws-region: us-east-1
- name: Set up JDK 17
uses: actions/setup-java@v4
with:
distribution: 'corretto'
java-version: 17
- uses: actions/cache@v4
with:
path: ~/.gradle/caches
key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*') }}
- name: Set env vars for container build and tag
id: get-versions
run: |
DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml)
SERVING_VERSION=$(awk -F '=' '/serving / {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml)
echo "DJL_VERSION=$DJL_VERSION" >> $GITHUB_ENV
echo "DJL_VERSION=$DJL_VERSION" >> $GITHUB_OUTPUT
echo "SERVING_VERSION=$SERVING_VERSION" >> $GITHUB_ENV
mode=${{ inputs.mode }}
if [[ "$mode" != "release" ]]; then
export NIGHTLY="-nightly"
echo "NIGHTLY=$NIGHTLY" >> $GITHUB_ENV
./gradlew --refresh-dependencies :serving:dockerDeb -Psnapshot
export BUILD_VERSION_ARG_DJL="${DJL_VERSION}-SNAPSHOT"
export BUILD_VERSION_ARG_SERVING="${SERVING_VERSION}-SNAPSHOT"
else
rm -rf serving/docker/distributions
export BUILD_VERSION_ARG_DJL="${DJL_VERSION}-SNAPSHOT"
export BUILD_VERSION_ARG_SERVING="${SERVING_VERSION}-SNAPSHOT"
fi
echo "BUILD_VERSION_ARG_DJL=$BUILD_VERSION_ARG_DJL" >> $GITHUB_ENV
echo "BUILD_VERSION_ARG_SERVING=$BUILD_VERSION_ARG_SERVING" >> $GITHUB_ENV
- name: Build docker image
run: |
export RELEASE_VERSION="${{ env.SERVING_VERSION }}-"
cd serving/docker
docker compose build --no-cache \
--build-arg djl_version=${{ env.BUILD_VERSION_ARG_DJL }} \
--build-arg djl_serving_version=${{ env.BUILD_VERSION_ARG_SERVING }} \
${{ matrix.arch }}
- name: Tag and push temp image to ECR repo
working-directory: serving/docker
run: |
ECR_REGION=$(echo "${{ env.AWS_ECR_REPO }}" | awk -F. '{print $4}')
aws ecr get-login-password --region $ECR_REGION | docker login --username AWS --password-stdin ${{env.AWS_ECR_REPO}}
baseDockerTag="${{ env.AWS_ECR_REPO }}:${{ env.SERVING_VERSION}}-${{ matrix.arch }}${{ env.NIGHTLY }}"
tempRunIdTag="${baseDockerTag}-${GITHUB_RUN_ID}"
tempCommitTag="${baseDockerTag}-${GITHUB_SHA}"
docker tag ${{ env.DOCKER_HUB_REPO }}:${{ env.SERVING_VERSION}}-${{ matrix.arch }}${{ env.NIGHTLY }} $baseDockerTag
docker tag ${{ env.DOCKER_HUB_REPO }}:${{ env.SERVING_VERSION}}-${{ matrix.arch }}${{ env.NIGHTLY }} $tempRunIdTag
docker tag ${{ env.DOCKER_HUB_REPO }}:${{ env.SERVING_VERSION}}-${{ matrix.arch }}${{ env.NIGHTLY }} $tempCommitTag
time docker push --all-tags ${{ env.AWS_ECR_REPO }}
stop-runners:
if: always()
runs-on: [ self-hosted, scheduler ]
needs: [nightly-build, create-runners]
env:
runner_output: ${{ toJson(needs.create-runners.outputs) }}
steps:
- name: Stop all instances
continue-on-error: true
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
for key in $(echo $runner_output | jq -r 'keys[]'); do
instance_id=$(echo $runner_output | jq -r ".[\"$key\"]")
echo "Key: $key, instance_id: $instance_id"
./stop_instance.sh $instance_id
done