Build CUDA 11.8 and Python 3.10 Packages (#533)

bdice · web-flow · commit c0657ef40ba1 · 2023-01-11T02:05:25.000Z
This PR updates `cusignal` to build against branch [cuda-118](https://github.com/rapidsai/shared-action-workflows/compare/cuda-118) of the `shared-action-workflow` repository. That branch contains updates for CUDA 11.8 and Python 3.10 packages. I enabled `sm_90` support in builds using CUDA 11.8+. It also includes some minor file renames. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Ray Douglass (https://github.com/raydouglass) URL: #533
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
@@ -28,7 +28,7 @@ concurrency:
 jobs:
   python-build:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-matrix-build.yaml@main
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@cuda-118
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
@@ -37,7 +37,7 @@ jobs:
   upload-conda:
     needs: [python-build]
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@main
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-upload-packages.yaml@cuda-118
     with:
       build_type: ${{ inputs.build_type || 'branch' }}
       branch: ${{ inputs.branch }}
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
@@ -17,26 +17,26 @@ jobs:
       - conda-python-tests
       - conda-notebook-tests
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@main
+    uses: rapidsai/shared-action-workflows/.github/workflows/pr-builder.yaml@cuda-118
   checks:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@main
+    uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@cuda-118
   conda-python-build:
     needs: checks
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-matrix-build.yaml@main
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-build.yaml@cuda-118
     with:
       build_type: pull-request
   conda-python-tests:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@main
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@cuda-118
     with:
       build_type: pull-request
   conda-notebook-tests:
     needs: conda-python-build
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@main
+    uses: rapidsai/shared-action-workflows/.github/workflows/custom-job.yaml@cuda-118
     with:
       build_type: pull-request
       node_type: "gpu-latest-1"
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -16,7 +16,7 @@ on:
 jobs:
   conda-python-tests:
     secrets: inherit
-    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@main
+    uses: rapidsai/shared-action-workflows/.github/workflows/conda-python-tests.yaml@cuda-118
     with:
       build_type: nightly
       branch: ${{ inputs.branch }}
diff --git a/README.md b/README.md
@@ -42,7 +42,7 @@ num_samps = int(1e8)
 resample_up = 2
 resample_down = 3
 
-cx = np.linspace(start, stop, num_samps, endpoint=False) 
+cx = np.linspace(start, stop, num_samps, endpoint=False)
 cy = np.cos(-cx**2/6.0)
 
 %%timeit
@@ -61,7 +61,7 @@ num_samps = int(1e8)
 resample_up = 2
 resample_down = 3
 
-gx = cp.linspace(start, stop, num_samps, endpoint=False) 
+gx = cp.linspace(start, stop, num_samps, endpoint=False)
 gy = cp.cos(-gx**2/6.0)
 
 %%timeit
@@ -84,7 +84,7 @@ resample_up = 2
 resample_down = 3
 
 # Generate Data on CPU with NumPy
-cx = np.linspace(start, stop, num_samps, endpoint=False) 
+cx = np.linspace(start, stop, num_samps, endpoint=False)
 cy = np.cos(-cx**2/6.0)
 
 # Create shared memory between CPU and GPU and load with CPU signal (cy)
@@ -112,7 +112,7 @@ resample_up = 2
 resample_down = 3
 
 # Generate Data on CPU
-cx = np.linspace(start, stop, num_samps, endpoint=False) 
+cx = np.linspace(start, stop, num_samps, endpoint=False)
 cy = np.cos(-cx**2/6.0)
 
 %%time
@@ -132,9 +132,9 @@ cuSignal can be installed with ([Miniconda](https://docs.conda.io/en/latest/mini
 conda install -c rapidsai -c conda-forge -c nvidia \
     cusignal
 
-# To specify a certain CUDA or Python version (e.g. 11.5 and 3.8, respectively)
+# To specify a certain CUDA or Python version (e.g. 11.8 and 3.8, respectively)
 conda install -c rapidsai -c conda-forge -c nvidia \
-    cusignal python=3.8 cudatoolkit=11.5
+    cusignal python=3.8 cudatoolkit=11.8
 ```
 
 For the nightly verison of `cusignal`, which includes pre-release features:
@@ -143,9 +143,9 @@ For the nightly verison of `cusignal`, which includes pre-release features:
 conda install -c rapidsai-nightly -c conda-forge -c nvidia \
     cusignal
 
-# To specify a certain CUDA or Python version (e.g. 11.5 and 3.8, respectively)
+# To specify a certain CUDA or Python version (e.g. 11.8 and 3.8, respectively)
 conda install -c rapidsai-nightly -c conda-forge -c nvidia \
-    cusignal python=3.8 cudatoolkit=11.5
+    cusignal python=3.8 cudatoolkit=11.8
 ```
 
 While only CUDA versions >= 11.2 are officially supported, cuSignal has been confirmed to work with CUDA version 10.2 and above. If you run into any issues with the conda install, please follow the source installation instructions, below.
@@ -179,7 +179,7 @@ Since the Jetson platform is based on the arm chipset, we need to use an aarch64
     ```bash
     export CUPY_NVCC_GENERATE_CODE="arch=compute_XX,code=sm_XX"
     ```
-    
+
     where `XX` is your GPU's [compute capability](https://developer.nvidia.com/cuda-gpus#compute). If you'd like to compile to multiple architectures (e.g Nano and Xavier), concatenate the `arch=...` string with semicolins.
 
 3. Activate created conda environment
@@ -268,9 +268,9 @@ Since the Jetson platform is based on the arm chipset, we need to use an aarch64
 
 ### Source, Windows OS
 
-We have confirmed that cuSignal successfully builds and runs on Windows by using [CUDA on WSL](https://docs.nvidia.com/cuda/wsl-user-guide/index.html). Please follow the instructions in the link to install WSL 2 and the associated CUDA drivers. You can then proceed to follow the cuSignal source build instructions, below. 
+We have confirmed that cuSignal successfully builds and runs on Windows by using [CUDA on WSL](https://docs.nvidia.com/cuda/wsl-user-guide/index.html). Please follow the instructions in the link to install WSL 2 and the associated CUDA drivers. You can then proceed to follow the cuSignal source build instructions, below.
 
-1. Download and install [Andaconda](https://www.anaconda.com/distribution/) for Windows. In an Anaconda Prompt, navigate to your checkout of cuSignal.
+1. Download and install [Anaconda](https://www.anaconda.com/distribution/) for Windows. In an Anaconda Prompt, navigate to your checkout of cuSignal.
 
 2. Create cuSignal conda environment
 
@@ -287,7 +287,7 @@ We have confirmed that cuSignal successfully builds and runs on Windows by using
     pip install cupy-cudaXXX
     ```
 
-    Where XXX is the version of the CUDA toolkit you have installed. 11.5, for example is `cupy-cuda115`. See the [CuPy Documentation](https://docs-cupy.chainer.org/en/stable/install.html#install-cupy) for information on getting Windows wheels for other versions of CUDA.
+    Where XXX is the version of the CUDA toolkit you have installed. 11.5, for example is `cupy-cuda115`. See the [CuPy Documentation](https://docs-cupy.chainer.org/en/stable/install.html#install-cupy) for information on getting wheels for other versions of CUDA.
 
 5. Install cuSignal module
 
@@ -301,7 +301,7 @@ We have confirmed that cuSignal successfully builds and runs on Windows by using
     pip install pytest pytest-benchmark
     pytest
     ```
-    
+
 
 ### Docker - All RAPIDS Libraries, including cuSignal
 
@@ -360,7 +360,7 @@ As with the standard pytest tool, the user can use the `-v` and `-k` flags for v
 To reduce columns in benchmark result's table, add `--benchmark-columns=LABELS`, like `--benchmark-columns=min,max,mean`.
 For more information on `pytest-benchmark` please visit the [Usage Guide](https://pytest-benchmark.readthedocs.io/en/latest/usage.html).
 
-Parameter `--benchmark-gpu-disable` is to disable memory checks from [Rapids GPU benchmark tool](https://github.com/rapidsai/benchmark). 
+Parameter `--benchmark-gpu-disable` is to disable memory checks from [Rapids GPU benchmark tool](https://github.com/rapidsai/benchmark).
 Doing so speeds up benchmarking.
 
 If you wish to skip benchmarks of SciPy functions add `-m "not cpu"`
@@ -376,26 +376,26 @@ cusignal/test/test_filtering.py ..................
 
 
 ---------- benchmark 'UpFirDn2d': 18 tests -----------
-Name (time in us, mem in bytes)         Mean          
+Name (time in us, mem in bytes)         Mean
 ------------------------------------------------------
-test_upfirdn2d_gpu[-1-1-3-256]      195.2299 (1.0)    
-test_upfirdn2d_gpu[-1-9-3-256]      196.1766 (1.00)   
-test_upfirdn2d_gpu[-1-1-7-256]      196.2881 (1.01)   
-test_upfirdn2d_gpu[0-2-3-256]       196.9984 (1.01)   
-test_upfirdn2d_gpu[0-9-3-256]       197.5675 (1.01)   
-test_upfirdn2d_gpu[0-1-7-256]       197.9015 (1.01)   
-test_upfirdn2d_gpu[-1-9-7-256]      198.0923 (1.01)   
-test_upfirdn2d_gpu[-1-2-7-256]      198.3325 (1.02)   
-test_upfirdn2d_gpu[0-2-7-256]       198.4676 (1.02)   
-test_upfirdn2d_gpu[0-9-7-256]       198.6437 (1.02)   
-test_upfirdn2d_gpu[0-1-3-256]       198.7477 (1.02)   
-test_upfirdn2d_gpu[-1-2-3-256]      200.1589 (1.03)   
-test_upfirdn2d_gpu[-1-2-2-256]      213.0316 (1.09)   
-test_upfirdn2d_gpu[0-1-2-256]       213.0944 (1.09)   
-test_upfirdn2d_gpu[-1-9-2-256]      214.6168 (1.10)   
-test_upfirdn2d_gpu[0-2-2-256]       214.6975 (1.10)   
-test_upfirdn2d_gpu[-1-1-2-256]      216.4033 (1.11)   
-test_upfirdn2d_gpu[0-9-2-256]       217.1675 (1.11)   
+test_upfirdn2d_gpu[-1-1-3-256]      195.2299 (1.0)
+test_upfirdn2d_gpu[-1-9-3-256]      196.1766 (1.00)
+test_upfirdn2d_gpu[-1-1-7-256]      196.2881 (1.01)
+test_upfirdn2d_gpu[0-2-3-256]       196.9984 (1.01)
+test_upfirdn2d_gpu[0-9-3-256]       197.5675 (1.01)
+test_upfirdn2d_gpu[0-1-7-256]       197.9015 (1.01)
+test_upfirdn2d_gpu[-1-9-7-256]      198.0923 (1.01)
+test_upfirdn2d_gpu[-1-2-7-256]      198.3325 (1.02)
+test_upfirdn2d_gpu[0-2-7-256]       198.4676 (1.02)
+test_upfirdn2d_gpu[0-9-7-256]       198.6437 (1.02)
+test_upfirdn2d_gpu[0-1-3-256]       198.7477 (1.02)
+test_upfirdn2d_gpu[-1-2-3-256]      200.1589 (1.03)
+test_upfirdn2d_gpu[-1-2-2-256]      213.0316 (1.09)
+test_upfirdn2d_gpu[0-1-2-256]       213.0944 (1.09)
+test_upfirdn2d_gpu[-1-9-2-256]      214.6168 (1.10)
+test_upfirdn2d_gpu[0-2-2-256]       214.6975 (1.10)
+test_upfirdn2d_gpu[-1-1-2-256]      216.4033 (1.11)
+test_upfirdn2d_gpu[0-9-2-256]       217.1675 (1.11)
 ------------------------------------------------------
 ```
 
diff --git a/build.sh b/build.sh
@@ -132,7 +132,7 @@ RETURN_ALL(){
         --generate-code arch=compute_62,code=sm_62 \
         --generate-code arch=compute_70,code=sm_70 \
         --generate-code arch=compute_72,code=sm_72"
-    
+
     if [ "$NVCC_MAJOR" -lt 11 ]; then
         GPU_ARCH="${GPU_ARCH} --generate-code arch=compute_75,code=[sm_75,compute_75]"
         echo -e "\t including: CUDA 10.X - {50,52,53,60,61,62,70,72,75}"
@@ -141,10 +141,15 @@ RETURN_ALL(){
         if [ "$NVCC_MINOR" -eq 0 ]; then
             GPU_ARCH="${GPU_ARCH} --generate-code arch=compute_80,code=[sm_80,compute_80]"
             echo -e "\t including: CUDA 11.0 - {50,52,53,60,61,62,70,72,75,80}"
-        else
+        elif [ "$NVCC_MINOR" -lt 8 ]; then
             GPU_ARCH="${GPU_ARCH} --generate-code arch=compute_80,code=sm_80 \
                 --generate-code arch=compute_86,code=[sm_86,compute_86]"
             echo -e "\t including: CUDA 11.1+ - {50,52,53,60,61,62,70,72,75,80,86}"
+        else
+            GPU_ARCH="${GPU_ARCH} --generate-code arch=compute_80,code=sm_80 \
+                --generate-code arch=compute_86,code=[sm_86,compute_86] \
+                --generate-code arch=compute_90,code=[sm_90,compute_90]"
+            echo -e "\t including: CUDA 11.8+ - {50,52,53,60,61,62,70,72,75,80,86,90}"
         fi
     fi
 }
@@ -178,7 +183,7 @@ if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then
             GPU_ARCH="${GPU_ARCH} --generate-code arch=compute_${MAJOR}${MINOR},code=sm_${MAJOR}${MINOR}"
         done
     fi
-    
+
 else
     RETURN_ALL
 fi
diff --git a/ci/test_notebooks.sh b/ci/test_notebooks.sh
@@ -9,7 +9,7 @@ rapids-logger "Generate notebook testing dependencies"
 rapids-dependency-file-generator \
   --output conda \
   --file_key test_notebooks \
-  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch)" | tee env.yaml
+  --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
 
 rapids-mamba-retry env create --force -f env.yaml -n test
 
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -6,7 +6,7 @@ channels:
 - conda-forge
 - nvidia
 dependencies:
-- cudatoolkit=11.5
+- cudatoolkit=11.8
 - cupy >=10,<12.0.0a0
 - ipython
 - matplotlib-base
@@ -18,7 +18,7 @@ dependencies:
 - pytest-benchmark
 - pytest-cov
 - pytest-xdist
-- python>=3.8,<3.10
+- python>=3.8,<3.11
 - pytorch <=1.12.1
 - scipy >=1.6.0
-name: all_cuda-115_arch-x86_64
+name: all_cuda-118_arch-x86_64
diff --git a/conda/recipes/cusignal/meta.yaml b/conda/recipes/cusignal/meta.yaml
@@ -16,6 +16,10 @@ build:
   noarch: python
 
 requirements:
+  build:
+    - {{ compiler('c') }}
+    - {{ compiler('cxx') }}
+    - {{ compiler('cuda') }} {{ cuda_version }}
   host:
     - python
     - setuptools
diff --git a/dependencies.yaml b/dependencies.yaml
@@ -3,7 +3,7 @@ files:
   all:
     output: conda
     matrix:
-      cuda: ["11.5"]
+      cuda: ["11.8"]
       arch: [x86_64]
     includes:
       - checks
@@ -51,6 +51,10 @@ dependencies:
               cuda: "11.5"
             packages:
               - cudatoolkit=11.5
+          - matrix:
+              cuda: "11.8"
+            packages:
+              - cudatoolkit=11.8
   checks:
     common:
       - output_types: [conda, requirements]
@@ -68,9 +72,13 @@ dependencies:
               py: "3.9"
             packages:
               - python=3.9
+          - matrix:
+              py: "3.10"
+            packages:
+              - python=3.10
           - matrix:
             packages:
-              - python>=3.8,<3.10
+              - python>=3.8,<3.11
   run:
     common:
       - output_types: [conda, requirements]