From d87235b82434bf9d534a05ecdebd8027ef9a118f Mon Sep 17 00:00:00 2001 From: Brandy Nguyen Date: Mon, 14 Oct 2024 22:42:00 -0700 Subject: [PATCH] Decouple djl and serving in dockerfiles and gradle and add serving publish to repo --- .github/workflows/docker-nightly-publish.yml | 61 ++++++++++---- .github/workflows/serving-publish.yml | 84 +++++++++++++++++++ .../main/kotlin/ai/djl/javaBase.gradle.kts | 2 +- gradle/libs.versions.toml | 1 + serving/docker/Dockerfile | 11 +-- serving/docker/aarch64.Dockerfile | 12 +-- serving/docker/lmi.Dockerfile | 10 ++- serving/docker/pytorch-gpu.Dockerfile | 10 ++- serving/docker/pytorch-inf2.Dockerfile | 12 +-- serving/docker/scripts/install_djl_serving.sh | 11 ++- serving/docker/tensorrt-llm.Dockerfile | 8 +- 11 files changed, 173 insertions(+), 49 deletions(-) create mode 100644 .github/workflows/serving-publish.yml diff --git a/.github/workflows/docker-nightly-publish.yml b/.github/workflows/docker-nightly-publish.yml index 8a536f0c93..a52898eef1 100644 --- a/.github/workflows/docker-nightly-publish.yml +++ b/.github/workflows/docker-nightly-publish.yml @@ -58,6 +58,13 @@ jobs: with: path: ~/.gradle/caches key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*') }} + - name: Extract DJL and DJL Serving versions from TOML + id: get-versions + run: | + DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml) + SERVING_VERSION=$(awk -F '=' '/serving / {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml) + echo "DJL_VERSION=$DJL_VERSION" >> $GITHUB_ENV + echo "SERVING_VERSION=$SERVING_VERSION" >> $GITHUB_ENV - name: Build serving package for nightly if: ${{ inputs.mode == '' || inputs.mode == 'nightly' }} run: | @@ -66,17 +73,21 @@ jobs: if: ${{ inputs.mode == '' || inputs.mode == 'nightly' }} working-directory: serving/docker run: | - DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' ../../gradle/libs.versions.toml) export NIGHTLY="-nightly" - docker compose build --no-cache --build-arg djl_version=${DJL_VERSION}-SNAPSHOT ${{ matrix.arch }} + docker compose build --no-cache \ + --build-arg djl_version=${{ env.DJL_VERSION }} \ + --build-arg djl_serving_version=${{ env.SERVING_VERSION }} \ + ${{ matrix.arch }} docker compose push ${{ matrix.arch }} - name: Build and push temp image if: ${{ inputs.mode == 'temp' }} working-directory: serving/docker run: | - DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' ../../gradle/libs.versions.toml) export NIGHTLY="-nightly" - docker compose build --no-cache --build-arg djl_version=${DJL_VERSION}-SNAPSHOT ${{ matrix.arch }} + docker compose build --no-cache \ + --build-arg djl_version=${{ env.DJL_VERSION }} \ + --build-arg djl_serving_version=${{ env.SERVING_VERSION }} \ + ${{ matrix.arch }} repo="185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp" aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin $repo tempTag="$repo:${{ matrix.arch }}-${GITHUB_SHA}" @@ -86,17 +97,18 @@ jobs: if: ${{ inputs.mode == 'release' }} working-directory: serving/docker run: | - DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' ../../gradle/libs.versions.toml) - export BASE_RELEASE_VERSION="${DJL_VERSION}" - export RELEASE_VERSION="${DJL_VERSION}-" - docker compose build --no-cache --build-arg djl_version=${DJL_VERSION} ${{ matrix.arch }} + export BASE_RELEASE_VERSION="${{ env.SERVING_VERSION }}" + export RELEASE_VERSION="${{ env.SERVING_VERSION }}-" + docker compose build --no-cache \ + --build-arg djl_version=${{ env.DJL_VERSION }} \ + --build-arg djl_serving_version=${{ env.SERVING_VERSION }} \ + ${{ matrix.arch }} docker compose push ${{ matrix.arch }} - name: Retag image for release if: ${{ matrix.arch == 'cpu' && inputs.mode == 'release' }} working-directory: serving/docker run: | - DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' ../../gradle/libs.versions.toml) - docker tag deepjavalibrary/djl-serving:${DJL_VERSION} deepjavalibrary/djl-serving:latest + docker tag deepjavalibrary/djl-serving:${{ env.SERVING_VERSION }} deepjavalibrary/djl-serving:latest docker push deepjavalibrary/djl-serving:latest create-runner: @@ -147,6 +159,13 @@ jobs: with: path: ~/.gradle/caches key: ${{ runner.os }}-gradle-${{ hashFiles('**/*.gradle*') }} + - name: Extract DJL and DJL Serving versions from TOML + id: get-versions + run: | + DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml) + SERVING_VERSION=$(awk -F '=' '/serving / {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml) + echo "DJL_VERSION=$DJL_VERSION" >> $GITHUB_ENV + echo "SERVING_VERSION=$SERVING_VERSION" >> $GITHUB_ENV - name: Build serving package for nightly if: ${{ inputs.mode == '' || inputs.mode == 'nightly' }} run: | @@ -155,17 +174,21 @@ jobs: if: ${{ inputs.mode == '' || inputs.mode == 'nightly' }} working-directory: serving/docker run: | - DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' ../../gradle/libs.versions.toml) export NIGHTLY="-nightly" - docker compose build --no-cache --build-arg djl_version=${DJL_VERSION}-SNAPSHOT aarch64 + docker compose build --no-cache \ + --build-arg djl_version=${{ env.DJL_VERSION }} \ + --build-arg djl_serving_version=${{ env.SERVING_VERSION }} \ + aarch64 docker compose push aarch64 - name: Build and push temp image if: ${{ inputs.mode == 'temp' }} working-directory: serving/docker run: | - DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' ../../gradle/libs.versions.toml) export NIGHTLY="-nightly" - docker compose build --no-cache --build-arg djl_version=${DJL_VERSION}-SNAPSHOT aarch64 + docker compose build --no-cache \ + --build-arg djl_version=${{ env.DJL_VERSION }} \ + --build-arg djl_serving_version=${{ env.SERVING_VERSION }} \ + aarch64 repo="185921645874.dkr.ecr.us-east-1.amazonaws.com/djl-ci-temp" aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin $repo tempTag="$repo:aarch64-${GITHUB_SHA}" @@ -175,10 +198,12 @@ jobs: if: ${{ inputs.mode == 'release' }} working-directory: serving/docker run: | - DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' ../../gradle/libs.versions.toml) - export BASE_RELEASE_VERSION="${DJL_VERSION}" - export RELEASE_VERSION="${DJL_VERSION}-" - docker compose build --no-cache --build-arg djl_version=${DJL_VERSION} aarch64 + export BASE_RELEASE_VERSION="${{ env.SERVING_VERSION }}" + export RELEASE_VERSION="${{ env.SERVING_VERSION }}-" + docker compose build --no-cache \ + --build-arg djl_version=${{ env.DJL_VERSION }} \ + --build-arg djl_serving_version=${{ env.SERVING_VERSION }} \ + aarch64 docker compose push aarch64 diff --git a/.github/workflows/serving-publish.yml b/.github/workflows/serving-publish.yml new file mode 100644 index 0000000000..f60681d5b9 --- /dev/null +++ b/.github/workflows/serving-publish.yml @@ -0,0 +1,84 @@ +name: Serving publish +on: + schedule: + - cron: '0 12 * * *' + push: + branches: + - decouple-djl # Trigger on push events to the decouple-djl branch + workflow_dispatch: + inputs: + mode: + description: 'staging/snapshot, default is snapshot' + required: true + default: 'snapshot' + repo-id: + description: 'staging repository id for djl api packages' + required: false + default: '' +permissions: + id-token: write + contents: read +jobs: + publish: + if: github.repository == 'deepjavalibrary/djl-serving' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up JDK 17 + uses: actions/setup-java@v4 + with: + distribution: 'corretto' + java-version: 17 + # Enable gradle cache: https://github.com/actions/cache/blob/master/examples.md#java---gradle + - uses: actions/cache@v4 + with: + path: ~/.gradle/caches + key: ${{ runner.os }}-gradle-${{ hashFiles('*/build.gradle.kts', 'engines/**/build.gradle.kts', 'extensions/**/build.gradle.kts') }} + restore-keys: | + ${{ runner.os }}-gradle- + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::425969335547:role/djl-ci-publish-role + aws-region: us-east-2 + - name: Copy serving snapshot artifacts to S3 + if: ${{ github.event.inputs.mode == '' || github.event.inputs.mode == 'snapshot' }} + run: | + ./gradlew :serving:createDeb -Psnapshot + DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml) + aws s3 cp serving/build/distributions/*.deb s3://djl-ai/publish/djl-serving/ + aws cloudfront create-invalidation --distribution-id E371VB8JQ6NRVY --paths "/djl-serving/djl-serving_${DJL_VERSION}*" + - name: Copy serving release artifacts to S3 + if: ${{ github.event.inputs.mode == 'staging' }} + run: | + ./gradlew :serving:dZ :serving:createDeb -Pstaging + DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml) + aws s3 cp serving/build/distributions/*.deb s3://djl-ai/publish/djl-serving/ + aws cloudfront create-invalidation --distribution-id E371VB8JQ6NRVY --paths "/djl-serving/djl-serving_${DJL_VERSION}*" + if [[ $(aws s3 ls s3://djl-ai/publish/djl-serving/serving-$DJL_VERSION.tar | wc -l) -eq 0 ]]; \ + then aws s3 cp serving/build/distributions/*.tar s3://djl-ai/publish/djl-serving/; \ + else echo serving tarball published already!; fi + aws s3 cp serving/build/distributions/*.zip s3://djl-ai/publish/djl-serving/ + aws cloudfront create-invalidation --distribution-id E371VB8JQ6NRVY --paths "/djl-serving/serving-${DJL_VERSION}*" + - name: Copy benchmark release artifacts to S3 + if: ${{ github.event.inputs.mode == 'staging' }} + run: | + ./gradlew :benchmark:dZ :benchmark:createDeb -Pstaging + DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml) + aws s3 cp benchmark/build/distributions/*.tar s3://djl-ai/publish/djl-bench/${DJL_VERSION}/ + aws s3 cp benchmark/build/distributions/*.deb s3://djl-ai/publish/djl-bench/${DJL_VERSION}/ + aws s3 cp benchmark/build/distributions/*.zip s3://djl-ai/publish/djl-bench/${DJL_VERSION}/ + aws cloudfront create-invalidation --distribution-id E371VB8JQ6NRVY --paths "/djl-bench/${DJL_VERSION}/*" + - name: Copy awscurl snapshot artifacts to S3 + if: ${{ github.event.inputs.mode == '' || github.event.inputs.mode == 'snapshot' }} + run: | + ./gradlew :awscurl:jar + aws s3 cp awscurl/build/awscurl s3://djl-ai/publish/awscurl/ + aws cloudfront create-invalidation --distribution-id E371VB8JQ6NRVY --paths "/awscurl/awscurl" + - name: Copy awscurl staging artifacts to S3 + if: ${{ github.event.inputs.mode == 'staging' }} + run: | + ./gradlew :awscurl:jar -Pstaging + DJL_VERSION=$(awk -F '=' '/djl / {gsub(/ ?"/, "", $2); print $2}' gradle/libs.versions.toml) + aws s3 cp awscurl/build/awscurl s3://djl-ai/publish/awscurl/${DJL_VERSION}/ + aws cloudfront create-invalidation --distribution-id E371VB8JQ6NRVY --paths "/awscurl/${DJL_VERSION}/*" diff --git a/buildSrc/src/main/kotlin/ai/djl/javaBase.gradle.kts b/buildSrc/src/main/kotlin/ai/djl/javaBase.gradle.kts index 94054badda..7b6d65a5a1 100644 --- a/buildSrc/src/main/kotlin/ai/djl/javaBase.gradle.kts +++ b/buildSrc/src/main/kotlin/ai/djl/javaBase.gradle.kts @@ -13,7 +13,7 @@ plugins { val libs = the() var servingVersion: String? = System.getenv("DJL_VERSION") val stagingRepo: String? = System.getenv("DJL_STAGING") -servingVersion = if (servingVersion == null) libs.versions.djl.get() else servingVersion +servingVersion = if (servingVersion == null) libs.versions.serving.get() else servingVersion if (!project.hasProperty("staging")) { servingVersion += "-SNAPSHOT" } diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 6e076ead2b..3f99b22ee4 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -3,6 +3,7 @@ format.version = "1.1" [versions] djl = "0.30.0" +serving = "0.30.0" onnxruntime = "1.19.0" commonsCli = "1.9.0" commonsCodec = "1.17.1" diff --git a/serving/docker/Dockerfile b/serving/docker/Dockerfile index b855c36cc3..77efe23e18 100644 --- a/serving/docker/Dockerfile +++ b/serving/docker/Dockerfile @@ -10,7 +10,8 @@ # BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for # the specific language governing permissions and limitations under the License. FROM ubuntu:22.04 AS base -ARG djl_version=0.30.0~SNAPSHOT +ARG djl_version +ARG djl_serving_version COPY scripts scripts/ RUN mkdir -p /opt/djl/conf && \ @@ -20,10 +21,10 @@ COPY config.properties /opt/djl/conf/ COPY distribution[s]/ ./ RUN mv *.deb djl-serving_all.deb || true -RUN scripts/install_djl_serving.sh $djl_version && \ +RUN scripts/install_djl_serving.sh $djl_version $djl_serving_version && \ scripts/install_s5cmd.sh x64 && \ mkdir -p /opt/djl/bin && cp scripts/telemetry.sh /opt/djl/bin && \ - echo "${djl_version} cpu" > /opt/djl/bin/telemetry && \ + echo "${djl_serving_version} cpu" > /opt/djl/bin/telemetry && \ scripts/security_patch.sh cpu && \ apt-get clean -y && rm -rf /var/lib/apt/lists/* && \ rm -rf scripts @@ -55,7 +56,7 @@ LABEL dlc_major_version="1" LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.djl.cpu="true" LABEL com.amazonaws.sagemaker.capabilities.multi-models="true" LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true" -LABEL djl-version=$djl_version +LABEL djl-version=$djl_serving_version FROM base AS cpu-full @@ -64,7 +65,7 @@ ARG torch_version=2.4.0 COPY scripts scripts/ RUN scripts/install_python.sh && \ - scripts/install_djl_serving.sh $djl_version $torch_version && \ + scripts/install_djl_serving.sh $djl_version $djl_serving_version $torch_version && \ djl-serving -i ai.djl.pytorch:pytorch-native-cpu:$torch_version:linux-x86_64 && \ djl-serving -i ai.djl.onnxruntime:onnxruntime-engine:$djl_version && \ djl-serving -i com.microsoft.onnxruntime:onnxruntime:1.18.0 && \ diff --git a/serving/docker/aarch64.Dockerfile b/serving/docker/aarch64.Dockerfile index 4051932aea..327de9302c 100644 --- a/serving/docker/aarch64.Dockerfile +++ b/serving/docker/aarch64.Dockerfile @@ -10,7 +10,8 @@ # BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for # the specific language governing permissions and limitations under the License. FROM arm64v8/ubuntu:22.04 -ARG djl_version=0.30.0~SNAPSHOT +ARG djl_version +ARG djl_serving_version ARG torch_version=2.3.1 EXPOSE 8080 @@ -38,13 +39,13 @@ COPY config.properties /opt/djl/conf/ COPY distribution[s]/ ./ RUN mv *.deb djl-serving_all.deb || true -RUN scripts/install_djl_serving.sh $djl_version && \ - scripts/install_djl_serving.sh $djl_version $torch_version && \ - scripts/install_djl_serving.sh $djl_version $torch_version && \ +RUN scripts/install_djl_serving.sh $djl_version $djl_serving_version && \ + scripts/install_djl_serving.sh $djl_version $djl_serving_version $torch_version && \ + scripts/install_djl_serving.sh $djl_version $djl_serving_version $torch_version && \ scripts/install_s5cmd.sh aarch64 && \ djl-serving -i ai.djl.pytorch:pytorch-native-cpu-precxx11:$torch_version:linux-aarch64 && \ mkdir -p /opt/djl/bin && cp scripts/telemetry.sh /opt/djl/bin && \ - echo "${djl_version} aarch" > /opt/djl/bin/telemetry && \ + echo "${djl_serving_version} aarch" > /opt/djl/bin/telemetry && \ rm -f /usr/local/djl-serving-*/lib/mxnet-* && \ rm -f /usr/local/djl-serving-*/lib/tensorflow-* && \ rm -f /usr/local/djl-serving-*/lib/tensorrt-* && \ @@ -61,4 +62,5 @@ LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.djl.v0-30-0.aarch64="true LABEL com.amazonaws.sagemaker.capabilities.multi-models="true" LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true" LABEL djl-version=$djl_version +LABEL djl-serving-version=$djl_serving_version LABEL torch-version=$torch_version diff --git a/serving/docker/lmi.Dockerfile b/serving/docker/lmi.Dockerfile index 434e8723a6..06b7619a0e 100644 --- a/serving/docker/lmi.Dockerfile +++ b/serving/docker/lmi.Dockerfile @@ -12,7 +12,8 @@ ARG version=12.4.1-devel-ubuntu22.04 FROM nvidia/cuda:$version ARG cuda_version=cu124 -ARG djl_version=0.30.0~SNAPSHOT +ARG djl_version +ARG djl_serving_version # Base Deps ARG python_version=3.10 ARG torch_version=2.4.0 @@ -88,14 +89,14 @@ RUN mv *.deb djl-serving_all.deb || true RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -yq libaio-dev libopenmpi-dev g++ \ && scripts/install_openssh.sh \ - && scripts/install_djl_serving.sh $djl_version \ - && scripts/install_djl_serving.sh $djl_version ${torch_version} \ + && scripts/install_djl_serving.sh $djl_version $djl_serving_version \ + && scripts/install_djl_serving.sh $djl_version $djl_serving_version ${torch_version} \ && djl-serving -i ai.djl.onnxruntime:onnxruntime-engine:$djl_version \ && djl-serving -i com.microsoft.onnxruntime:onnxruntime_gpu:$onnx_version \ && scripts/install_python.sh ${python_version} \ && scripts/install_s5cmd.sh x64 \ && mkdir -p /opt/djl/bin && cp scripts/telemetry.sh /opt/djl/bin \ - && echo "${djl_version} lmi" > /opt/djl/bin/telemetry \ + && echo "${djl_serving_version} lmi" > /opt/djl/bin/telemetry \ && pip3 cache purge \ && apt-get clean -y && rm -rf /var/lib/apt/lists/* @@ -132,6 +133,7 @@ LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.djl.v0-30-0.lmi="true" LABEL com.amazonaws.sagemaker.capabilities.multi-models="true" LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true" LABEL djl-version=$djl_version +LABEL djl-serving-version=$djl_serving_version LABEL cuda-version=$cuda_version # To use the 535 CUDA driver, CUDA 12.4 can work on this one too LABEL com.amazonaws.sagemaker.inference.cuda.verified_versions=12.4 diff --git a/serving/docker/pytorch-gpu.Dockerfile b/serving/docker/pytorch-gpu.Dockerfile index 5f0c428d84..2080528683 100644 --- a/serving/docker/pytorch-gpu.Dockerfile +++ b/serving/docker/pytorch-gpu.Dockerfile @@ -13,7 +13,8 @@ ARG version=12.4.1-devel-ubuntu22.04 FROM nvidia/cuda:$version as base -ARG djl_version=0.30.0~SNAPSHOT +ARG djl_version +ARG djl_serving_version ARG cuda_version=cu124 ARG torch_version=2.4.0 ARG torch_vision_version=0.19.0 @@ -52,8 +53,8 @@ RUN mv *.deb djl-serving_all.deb || true COPY scripts scripts/ SHELL ["/bin/bash", "-c"] RUN chmod +x /usr/local/bin/dockerd-entrypoint.sh && \ - scripts/install_djl_serving.sh $djl_version && \ - scripts/install_djl_serving.sh $djl_version ${torch_version} && \ + scripts/install_djl_serving.sh $djl_version $djl_serving_version && \ + scripts/install_djl_serving.sh $djl_version $djl_serving_version ${torch_version} && \ djl-serving -i ai.djl.onnxruntime:onnxruntime-engine:$djl_version && \ djl-serving -i com.microsoft.onnxruntime:onnxruntime_gpu:$onnx_version && \ scripts/install_python.sh ${python_version} && \ @@ -62,7 +63,7 @@ RUN chmod +x /usr/local/bin/dockerd-entrypoint.sh && \ scripts/patch_oss_dlc.sh python && \ scripts/security_patch.sh pytorch-gpu && \ mkdir -p /opt/djl/bin && cp scripts/telemetry.sh /opt/djl/bin && \ - echo "${djl_version} pytorchgpu" > /opt/djl/bin/telemetry && \ + echo "${djl_serving_version} pytorchgpu" > /opt/djl/bin/telemetry && \ useradd -m -d /home/djl djl && \ chown -R djl:djl /opt/djl && \ rm -rf scripts && pip3 cache purge && \ @@ -81,6 +82,7 @@ LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.djl.v0-30-0.pytorch-cu124 LABEL com.amazonaws.sagemaker.capabilities.multi-models="true" LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true" LABEL djl-version=$djl_version +LABEL djl-serving-version=$djl_serving_version LABEL cuda-version=$cuda_version LABEL torch-version=$torch_version # To use the 535 CUDA driver diff --git a/serving/docker/pytorch-inf2.Dockerfile b/serving/docker/pytorch-inf2.Dockerfile index 18f2f51d38..e31bab2aac 100644 --- a/serving/docker/pytorch-inf2.Dockerfile +++ b/serving/docker/pytorch-inf2.Dockerfile @@ -10,7 +10,8 @@ # BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for # the specific language governing permissions and limitations under the License. FROM ubuntu:22.04 -ARG djl_version=0.30.0~SNAPSHOT +ARG djl_version +ARG djl_serving_version ARG torch_version=2.1.2 ARG torchvision_version=0.16.2 ARG python_version=3.10 @@ -69,16 +70,16 @@ RUN mkdir -p /opt/djl/conf && \ COPY config.properties /opt/djl/conf/ COPY partition /opt/djl/partition RUN mkdir -p /opt/djl/bin && cp scripts/telemetry.sh /opt/djl/bin && \ - echo "${djl_version} inf2" > /opt/djl/bin/telemetry && \ + echo "${djl_serving_version} inf2" > /opt/djl/bin/telemetry && \ scripts/install_python.sh && \ - scripts/install_djl_serving.sh $djl_version && \ - scripts/install_djl_serving.sh $djl_version ${torch_version} && \ + scripts/install_djl_serving.sh $djl_version $djl_serving_version && \ + scripts/install_djl_serving.sh $djl_version $djl_serving_version ${torch_version} && \ scripts/install_inferentia2.sh && \ pip install accelerate==${accelerate_version} safetensors torchvision==${torchvision_version} \ neuronx-cc==${neuronx_cc_version} torch-neuronx==${torch_neuronx_version} transformers-neuronx==${transformers_neuronx_version} \ torch_xla==${torch_xla_version} neuronx-cc-stubs==${neuronx_cc_stubs_version} \ neuronx_distributed==${neuronx_distributed_version} protobuf sentencepiece jinja2 \ - diffusers==${diffusers_version} opencv-contrib-python-headless Pillow --extra-index-url=https://pip.repos.neuron.amazonaws.com \ + diffusers==${diffusers_version} opencv-contrib-python-headless Pillow --extra-index-url=https://pip.repos.neuron.amazonaws.com \ pydantic==${pydantic_version} optimum optimum-neuron==${optimum_neuron_version} tiktoken blobfile && \ pip install transformers==${transformers_version} ${vllm_wheel} && \ echo y | pip uninstall triton && \ @@ -96,4 +97,5 @@ LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.djl.v0-30-0.inf2="true" LABEL com.amazonaws.sagemaker.capabilities.multi-models="true" LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true" LABEL djl-version=$djl_version +LABEL djl-serving-version=$djl_serving_version LABEL neuronsdk-version=$neuronsdk_version diff --git a/serving/docker/scripts/install_djl_serving.sh b/serving/docker/scripts/install_djl_serving.sh index 1ac6bb5ed5..3c1af537f3 100755 --- a/serving/docker/scripts/install_djl_serving.sh +++ b/serving/docker/scripts/install_djl_serving.sh @@ -3,7 +3,10 @@ set -ex DJL_VERSION=$1 -PYTORCH_JNI=$2 +DJL_SERVING_VERSION=$2 +PYTORCH_JNI=$3 + +S3_BUCKET_URL="s3://djl-ai/publish/djl-serving" if [ -z "$PYTORCH_JNI" ]; then # install Java @@ -18,15 +21,15 @@ if [ -z "$PYTORCH_JNI" ]; then ca-certificates \ fontconfig \ vim - # add corretto https://docs.aws.amazon.com/corretto/latest/corretto-17-ug/generic-linux-install.html +# add corretto https://docs.aws.amazon.com/corretto/latest/corretto-17-ug/generic-linux-install.html wget -O - https://apt.corretto.aws/corretto.key | gpg --dearmor -o /usr/share/keyrings/corretto-keyring.gpg && echo "deb [signed-by=/usr/share/keyrings/corretto-keyring.gpg] https://apt.corretto.aws stable main" | tee /etc/apt/sources.list.d/corretto.list apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ java-17-amazon-corretto-jdk - # install DJLServing + # install DJLServing package if [ ! -f djl-serving_all.deb ]; then - curl "https://publish.djl.ai/djl-serving/djl-serving_${DJL_VERSION//-/\~}-1_all.deb" -f -o djl-serving_all.deb + curl "https://publish.djl.ai/djl-serving/djl-serving_${DJL_SERVING_VERSION//-/\~}-1_all.deb" -f -o djl-serving_all.deb fi dpkg -i djl-serving_all.deb rm djl-serving_all.deb diff --git a/serving/docker/tensorrt-llm.Dockerfile b/serving/docker/tensorrt-llm.Dockerfile index 8ada4e7636..e809adc47a 100644 --- a/serving/docker/tensorrt-llm.Dockerfile +++ b/serving/docker/tensorrt-llm.Dockerfile @@ -14,7 +14,8 @@ FROM nvidia/cuda:$version ARG cuda_version=cu125 ARG python_version=3.10 ARG TORCH_VERSION=2.4.0 -ARG djl_version=0.30.0~SNAPSHOT +ARG djl_version +ARG djl_serving_version ARG transformers_version=4.44.2 ARG accelerate_version=0.32.1 ARG tensorrtlibs_version=10.1.0 @@ -98,12 +99,12 @@ RUN pip install ${triton_toolkit_wheel} ${trtllm_toolkit_wheel} && \ apt-get clean -y && rm -rf /var/lib/apt/lists/* # Final steps -RUN scripts/install_djl_serving.sh $djl_version && \ +RUN scripts/install_djl_serving.sh $djl_version $djl_serving_version && \ scripts/install_s5cmd.sh x64 && \ scripts/security_patch.sh trtllm && \ scripts/patch_oss_dlc.sh python && \ mkdir -p /opt/djl/bin && cp scripts/telemetry.sh /opt/djl/bin && \ - echo "${djl_version} tensorrtllm" > /opt/djl/bin/telemetry && \ + echo "${djl_serving_version} tensorrtllm" > /opt/djl/bin/telemetry && \ useradd -m -d /home/djl djl && \ chown -R djl:djl /opt/djl && \ rm -rf scripts && \ @@ -121,6 +122,7 @@ LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.djl.v0-30-0.tensorrtllm=" LABEL com.amazonaws.sagemaker.capabilities.multi-models="true" LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true" LABEL djl-version=$djl_version +LABEL djl-serving-version=$djl_serving_version LABEL trtllm-version=$trtllm_version LABEL cuda-version=$cuda_version # To use the 535 CUDA driver