IntelPython · vlad-perevezentsev · Jun 4, 2025 · Jun 5, 2025 · Jun 5, 2025 · Jun 5, 2025
@@ -25,9 +25,15 @@ option(DPCTL_GENERATE_COVERAGE_FOR_PYBIND11_EXTENSIONS
     "Build dpctl pybind11 offloading extensions with coverage instrumentation"
     OFF
 )
-option(DPCTL_TARGET_CUDA
-    "Build DPCTL to target CUDA devices"
-    OFF
+string(CONCAT _desc_target_cuda
+    "Build DPCTL to target CUDA device. "
+    "Set to a truthy value (e.g., ON, TRUE) to use default architecture (sm_50), "
+    "or to a specific architecture like sm_80."
+)
+set(DPCTL_TARGET_CUDA
+    ""
+    CACHE STRING
+    "${_desc_target_cuda}"
 )
 set(DPCTL_TARGET_HIP
     ""
@@ -51,15 +57,24 @@ set(_dpctl_sycl_target_compile_options)
 set(_dpctl_sycl_target_link_options)
 
 set(_dpctl_sycl_targets)
+set(_dpctl_cuda_arch)
 set(_dpctl_amd_targets)
+
 if ("x${DPCTL_SYCL_TARGETS}" STREQUAL "x")
-   if (DPCTL_TARGET_CUDA)
-      set(_dpctl_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown")
-   else()
-      if (DEFINED ENV{DPCTL_TARGET_CUDA})
-          set(_dpctl_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown")
+   if (NOT "x${DPCTL_TARGET_CUDA}" STREQUAL "x")
+      if(DPCTL_TARGET_CUDA MATCHES "^sm_")
+         set(_dpctl_cuda_arch ${DPCTL_TARGET_CUDA})
+      elseif(DPCTL_TARGET_CUDA MATCHES "^(ON|TRUE|YES|Y|1)$")
+         set(_dpctl_cuda_arch "sm_50")
+      else()
+         message(FATAL_ERROR
+            "Invalid value for DPCTL_TARGET_CUDA: \"${DPCTL_TARGET_CUDA}\". "
+            "Expected 'ON', 'TRUE', 'YES', 'Y', '1', or a CUDA architecture like 'sm_80'."
+         )
       endif()
+      set(_dpctl_sycl_targets "nvidia_gpu_${_dpctl_cuda_arch},spir64-unknown-unknown")
    endif()
+
    if (NOT "x${DPCTL_TARGET_HIP}" STREQUAL "x")
       set(_dpctl_amd_targets ${DPCTL_TARGET_HIP})
       if(_dpctl_sycl_targets)

@@ -159,13 +159,33 @@ The following plugins from CodePlay are supported:
 .. _codeplay_nv_plugin: https://developer.codeplay.com/products/oneapi/nvidia/
 .. _codeplay_amd_plugin: https://developer.codeplay.com/products/oneapi/amd/
 
-``dpctl`` can be built for CUDA devices as follows:
+``dpctl`` can be built for CUDA devices using the ``DPCTL_TARGET_CUDA`` CMake option,
+which accepts a specific compute architecture string:
+
+.. code-block:: bash
+
+    python scripts/build_locally.py --verbose --cmake-opts="-DDPCTL_TARGET_CUDA=sm_80"
+
+To use the default architecture (``sm_50``),
+set ``DPCTL_TARGET_CUDA`` to a value such as ``ON``, ``TRUE``, ``YES``, ``Y``, or ``1``:
 
 .. code-block:: bash
 
     python scripts/build_locally.py --verbose --cmake-opts="-DDPCTL_TARGET_CUDA=ON"
 
-And for AMD devices
+Note that kernels are built for ``sm_50`` by default, allowing them to work on a wider
+range of architectures, but limiting the usage of more recent CUDA features.
+
+For reference, compute architecture strings like ``sm_80`` correspond to specific
+CUDA Compute Capabilities (e.g., Compute Capability 8.0 corresponds to ``sm_80``).
+A complete mapping between NVIDIA GPU models and their respective
+Compute Capabilities can be found in the official
+`CUDA GPU Compute Capability <https://developer.nvidia.com/cuda-gpus>`_ documentation.
+
+A full list of available SYCL alias targets is available in the
+`DPC++ Compiler User Manual <https://intel.github.io/llvm/UsersManual.html>`_.
+
+To build for AMD devices, use:
 
 .. code-block:: bash