Skip to content

Commit 75cd88b

Browse files
yf711guschmue
authored andcommitted
Update range of gpu arch (#23309)
### Description <!-- Describe your changes. --> * Remove deprecated gpu arch to control nuget/python package size (latest TRT supports sm75 Turing and newer arch) * Add 90 to support blackwell series in next release (86;89 not considered as adding them will rapidly increase package size) | arch_range | Python-cuda12 | Nuget-cuda12 | | -------------- | ------------------------------------------------------------ | ---------------------------------- | | 60;61;70;75;80 | Linux: 279MB Win: 267MB | Linux: 247MB Win: 235MB | | 75;80 | Linux: 174MB Win: 162MB | Linux: 168MB Win: 156MB | | **75;80;90** | **Linux: 299MB Win: 277MB** | **Linux: 294MB Win: 271MB** | | 75;80;86;89 | [Linux: MB Win: 390MB](https://aiinfra.visualstudio.com/Lotus/_build/results?buildId=647457&view=results) | Linux: 416MB Win: 383MB | | 75;80;86;89;90 | [Linux: MB Win: 505MB](https://aiinfra.visualstudio.com/Lotus/_build/results?buildId=646536&view=results) | Linux: 541MB Win: 498MB | ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. --> Callout: While adding sm90 support, the build of cuda11.8+cudnn8 will be dropped in the coming ORT release, as the build has issue with blackwell (mentioned in comments) and demand on cuda 11 is minor, according to internal ort-cuda11 repo.
1 parent 2f17aa2 commit 75cd88b

8 files changed

+15
-11
lines changed

dockerfiles/Dockerfile.cuda

+3-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ ARG OS=ubuntu24.04
1212
FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-devel-${OS}
1313
ARG CUDA_VERSION
1414
ARG CUDNN_VERSION
15-
ARG CMAKE_CUDA_ARCHITECTURES="61;70;75;80;86;90"
15+
# Adjust as needed
16+
# Check your CUDA arch: https://developer.nvidia.com/cuda-gpus
17+
ARG CMAKE_CUDA_ARCHITECTURES="75;80;90"
1618

1719
ENV DEBIAN_FRONTEND=noninteractive
1820

dockerfiles/Dockerfile.tensorrt

+3-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ FROM nvcr.io/nvidia/tensorrt:${TRT_CONTAINER_VERSION}-py3
1010

1111
ARG ONNXRUNTIME_REPO=https://github.com/Microsoft/onnxruntime
1212
ARG ONNXRUNTIME_BRANCH=main
13-
ARG CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80
13+
# Adjust as needed
14+
# Check your CUDA arch: https://developer.nvidia.com/cuda-gpus
15+
ARG CMAKE_CUDA_ARCHITECTURES=75;80;90
1416

1517
RUN apt-get update &&\
1618
apt-get install -y sudo git bash unattended-upgrades

tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ stages:
5050
msbuildPlatform: x64
5151
packageName: x64-cuda
5252
CudaVersion: ${{ parameters.CudaVersion }}
53-
buildparameter: --use_cuda --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80"
53+
buildparameter: --use_cuda --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=75;80;90"
5454
runTests: ${{ parameters.RunOnnxRuntimeTests }}
5555
buildJava: ${{ parameters.buildJava }}
5656
java_artifact_id: onnxruntime_gpu
@@ -68,7 +68,7 @@ stages:
6868
msbuildPlatform: x64
6969
CudaVersion: ${{ parameters.CudaVersion }}
7070
packageName: x64-tensorrt
71-
buildparameter: --use_tensorrt --tensorrt_home=${{ parameters.win_trt_home }} --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80"
71+
buildparameter: --use_tensorrt --tensorrt_home=${{ parameters.win_trt_home }} --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=75;80;90"
7272
runTests: ${{ parameters.RunOnnxRuntimeTests }}
7373
buildJava: ${{ parameters.buildJava }}
7474
java_artifact_id: onnxruntime_gpu

tools/ci_build/github/azure-pipelines/stages/py-gpu-packaging-stage.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ stages:
5656
PYTHON_VERSION: ${{ python_version }}
5757
EP_NAME: gpu
5858
CudaVersion: ${{ parameters.cuda_version }}
59-
EP_BUILD_FLAGS: --enable_lto --cuda_home=$(Agent.TempDirectory)\v${{ parameters.cuda_version }} --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80"
59+
EP_BUILD_FLAGS: --enable_lto --cuda_home=$(Agent.TempDirectory)\v${{ parameters.cuda_version }} --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=75;80;90"
6060
use_tensorrt: True
6161

6262
- ${{ if eq(parameters.enable_linux_cuda, true) }}:

tools/ci_build/github/azure-pipelines/templates/py-packaging-selectable-stage.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ stages:
319319
--build_wheel \
320320
--enable_onnx_tests --use_tensorrt --cuda_version=11.8 --tensorrt_home=/usr --cuda_home=/usr/local/cuda-11.8 --cudnn_home=/usr/local/cuda-11.8 \
321321
${{ parameters.gpu_build_py_parameters }} \
322-
--cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-11/root/usr/bin/cc 'CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80'
322+
--cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-11/root/usr/bin/cc 'CMAKE_CUDA_ARCHITECTURES=75;80;90'
323323
workingDirectory: $(Build.SourcesDirectory)
324324

325325
- task: CmdLine@2
@@ -349,7 +349,7 @@ stages:
349349
--build_wheel \
350350
--enable_onnx_tests --use_tensorrt --cuda_version=11.8 --tensorrt_home=/usr --cuda_home=/usr/local/cuda-11.8 --cudnn_home=/usr/local/cuda-11.8 \
351351
${{ parameters.gpu_build_py_parameters }} --ctest_path '' \
352-
--cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-11/root/usr/bin/cc 'CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80'
352+
--cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-11/root/usr/bin/cc 'CMAKE_CUDA_ARCHITECTURES=75;80;90'
353353
354354
- task: CopyFiles@2
355355
displayName: 'Copy Python Wheel to: $(Build.ArtifactStagingDirectory)'
@@ -379,7 +379,7 @@ stages:
379379
- template: common-variables.yml
380380
CUDA_VERSION: '11.8'
381381
buildArch: x64
382-
EpBuildFlags: --use_tensorrt --tensorrt_home="$(Agent.TempDirectory)\${{ variables.win_trt_folder_cuda11 }}" --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
382+
EpBuildFlags: --use_tensorrt --tensorrt_home="$(Agent.TempDirectory)\${{ variables.win_trt_folder_cuda11 }}" --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=75;80;90"
383383
EnvSetupScript: setup_env_gpu.bat
384384
EP_NAME: gpu
385385
VSGenerator: 'Visual Studio 17 2022'

tools/ci_build/github/linux/build_cuda_c_api_package.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
set -e -x
33
docker run --rm --volume \
44
$BUILD_SOURCESDIRECTORY:/onnxruntime_src --volume $BUILD_BINARIESDIRECTORY:/build -e NIGHTLY_BUILD onnxruntimecuda${CUDA_VERSION_MAJOR}build \
5-
/bin/bash -c "/usr/bin/python3.12 /onnxruntime_src/tools/ci_build/build.py --enable_lto --build_java --build_nodejs --build_dir /build --config Release --skip_submodule_sync --parallel --use_binskim_compliant_compile_flags --build_shared_lib --use_cuda --cuda_version=$CUDA_VERSION --cuda_home=/usr/local/cuda-$CUDA_VERSION --cudnn_home=/usr/local/cuda-$CUDA_VERSION --skip_tests --cmake_extra_defines 'CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80' && cd /build/Release && make install DESTDIR=/build/installed"
5+
/bin/bash -c "/usr/bin/python3.12 /onnxruntime_src/tools/ci_build/build.py --enable_lto --build_java --build_nodejs --build_dir /build --config Release --skip_submodule_sync --parallel --use_binskim_compliant_compile_flags --build_shared_lib --use_cuda --cuda_version=$CUDA_VERSION --cuda_home=/usr/local/cuda-$CUDA_VERSION --cudnn_home=/usr/local/cuda-$CUDA_VERSION --skip_tests --cmake_extra_defines 'CMAKE_CUDA_ARCHITECTURES=75;80;90' && cd /build/Release && make install DESTDIR=/build/installed"

tools/ci_build/github/linux/build_linux_python_package.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ fi
7070
if [ "$BUILD_DEVICE" == "GPU" ]; then
7171
SHORT_CUDA_VERSION=$(echo $CUDA_VERSION | sed 's/\([[:digit:]]\+\.[[:digit:]]\+\)\.[[:digit:]]\+/\1/')
7272
#Enable CUDA and TRT EPs.
73-
BUILD_ARGS+=("--use_cuda" "--use_tensorrt" "--cuda_version=$SHORT_CUDA_VERSION" "--tensorrt_home=/usr" "--cuda_home=/usr/local/cuda-$SHORT_CUDA_VERSION" "--cudnn_home=/usr/local/cuda-$SHORT_CUDA_VERSION" "--cmake_extra_defines" "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80")
73+
BUILD_ARGS+=("--use_cuda" "--use_tensorrt" "--cuda_version=$SHORT_CUDA_VERSION" "--tensorrt_home=/usr" "--cuda_home=/usr/local/cuda-$SHORT_CUDA_VERSION" "--cudnn_home=/usr/local/cuda-$SHORT_CUDA_VERSION" "--cmake_extra_defines" "CMAKE_CUDA_ARCHITECTURES=75;80;90")
7474
fi
7575

7676
if [ "$BUILD_DEVICE" == "NPU" ]; then

tools/ci_build/github/linux/build_tensorrt_c_api_package.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@ set -e -x
33
mkdir -p $HOME/.onnx
44
docker run --rm --volume /data/onnx:/data/onnx:ro --volume $BUILD_SOURCESDIRECTORY:/onnxruntime_src --volume $BUILD_BINARIESDIRECTORY:/build \
55
--volume /data/models:/build/models:ro --volume $HOME/.onnx:/home/onnxruntimedev/.onnx -e NIGHTLY_BUILD onnxruntimecuda${CUDA_VERSION_MAJOR}xtrt86build \
6-
/bin/bash -c "/usr/bin/python3.12 /onnxruntime_src/tools/ci_build/build.py --build_dir /build --config Release --skip_tests --skip_submodule_sync --parallel --use_binskim_compliant_compile_flags --build_shared_lib --build_java --build_nodejs --use_tensorrt --cuda_version=$CUDA_VERSION --cuda_home=/usr/local/cuda-$CUDA_VERSION --cudnn_home=/usr --tensorrt_home=/usr --cmake_extra_defines 'CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80' && cd /build/Release && make install DESTDIR=/build/installed"
6+
/bin/bash -c "/usr/bin/python3.12 /onnxruntime_src/tools/ci_build/build.py --build_dir /build --config Release --skip_tests --skip_submodule_sync --parallel --use_binskim_compliant_compile_flags --build_shared_lib --build_java --build_nodejs --use_tensorrt --cuda_version=$CUDA_VERSION --cuda_home=/usr/local/cuda-$CUDA_VERSION --cudnn_home=/usr --tensorrt_home=/usr --cmake_extra_defines 'CMAKE_CUDA_ARCHITECTURES=75;80;90' && cd /build/Release && make install DESTDIR=/build/installed"

0 commit comments

Comments
 (0)