Skip to content

Enable dpnp build on AMD GPU #2302

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 19 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 67 additions & 14 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -75,27 +75,73 @@ option(DPNP_USE_ONEMKL_INTERFACES
"Build DPNP with oneMKL Interfaces"
OFF
)
set(HIP_TARGETS "" CACHE STRING "HIP architecture for target")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume there is no support for multiple values:

Suggested change
set(HIP_TARGETS "" CACHE STRING "HIP architecture for target")
set(HIP_TARGET "" CACHE STRING "HIP architecture for target")

Copy link
Collaborator

@ndgrigorian ndgrigorian Apr 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At some point, it was clear in docs that only one architecture was supported at a time, but now it isn't as clear and should be tested

Also, there is new information in the extension guide

The compiler driver also offers alias targets for each target+architecture pair to make the command line shorter and easier to understand for humans. Thanks to the aliases, the -Xsycl-target-backend flags no longer need to be specified.

It shows that the command

icpx -fsycl -fsycl-targets=spir64_gen,amdgcn-amd-amdhsa,nvptx64-nvidia-cuda \
        -Xsycl-target-backend=spir64_gen '-device pvc' \
        -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=gfx1030 \
        -Xsycl-target-backend=nvptx64-nvidia-cuda --offload-arch=sm_80 \
        -o sycl-app sycl-app.cpp

is equivalent to

icpx -fsycl -fsycl-targets=intel_gpu_pvc,amd_gpu_gfx1030,nvidia_gpu_sm_80 \
        -o sycl-app sycl-app.cpp

so maybe both dpctl and dpnp can simplify by removing the need for -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=[X] completely

list of aliases:
https://intel.github.io/llvm/UsersManual.html

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aliases list seems to claim only one alias is supported at a time. So probably only one architecture at once is possible? That would be my guess


set(_dpnp_sycl_targets)
set(_dpnp_amd_targets)
set(_use_onemkl_interfaces OFF)
set(_use_onemkl_interfaces_cuda OFF)
set(_use_onemkl_interfaces_hip OFF)

set(_dpnp_sycl_target_compile_options)
set(_dpnp_sycl_target_link_options)
set(_dpnp_sycl_mkl_target_compile_options)
set(_dpnp_sycl_mkl_target_link_options)

if ("x${DPNP_SYCL_TARGETS}" STREQUAL "x")
if(DPNP_TARGET_CUDA)
set(_dpnp_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown")
set(_use_onemkl_interfaces_cuda ON)
else()
if(DEFINED ENV{DPNP_TARGET_CUDA})
set(_dpnp_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown")
set(_use_onemkl_interfaces_cuda ON)
endif()
endif()
if(DPNP_TARGET_CUDA)
set(_dpnp_sycl_targets "nvptx64-nvidia-cuda,spir64-unknown-unknown")
set(_use_onemkl_interfaces_cuda ON)
endif()

if (NOT "x${HIP_TARGETS}" STREQUAL "x")
set(_dpnp_amd_targets ${HIP_TARGETS})
set(_use_onemkl_interfaces_hip ON)

if ("x${_dpnp_sycl_targets}" STREQUAL "x")
set(_dpnp_sycl_targets "amdgcn-amd-amdhsa,${_dpnp_sycl_targets}")
else()
set(_dpnp_sycl_targets "amdgcn-amd-amdhsa,spir64-unknown-unknown")
endif()
endif()
else()
set(_dpnp_sycl_targets ${DPNP_SYCL_TARGETS})
set(_dpnp_sycl_targets ${DPNP_SYCL_TARGETS})

if (DPNP_SYCL_TARGETS MATCHES "nvptx64-nvidia-cuda")
set(_use_onemkl_interfaces_cuda ON)
endif()

if (DPNP_SYCL_TARGETS MATCHES "amdgcn-amd-amdhsa")
set(_use_onemkl_interfaces_hip ON)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need here something similar to above?

        set(_dpnp_sycl_targets "amdgcn-amd-amdhsa,${_dpnp_sycl_targets}")

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think if we set DPNP_SYCL_TARGETS via --cmake_opts we expect them to be the right target e.g. amdgcn-amd-amdhsa or nvptx64-nvidia-cuda


if (NOT "x${HIP_TARGETS}" STREQUAL "x")
set(_dpnp_amd_targets ${HIP_TARGETS})
else()
message(FATAL_ERROR "HIP_TARGETS must be specified when using HIP backend")
endif()
endif()
endif()

if(_dpnp_sycl_targets)
if (_dpnp_sycl_targets)
message(STATUS "Compiling for -fsycl-targets=${_dpnp_sycl_targets}")
list(APPEND _dpnp_sycl_target_compile_options -fsycl-targets=${_dpnp_sycl_targets})
list(APPEND _dpnp_sycl_target_link_options -fsycl-targets=${_dpnp_sycl_targets})
list(APPEND _dpnp_sycl_mkl_target_compile_options -fsycl-targets=${_dpnp_sycl_targets})
list(APPEND _dpnp_sycl_mkl_target_link_options -fsycl-targets=${_dpnp_sycl_targets})
if(_dpnp_amd_targets)
set(_dpnp_hip_compile_options -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=${_dpnp_amd_targets})
set(_dpnp_hip_mkl_compile_options -Xsycl-target-backend --offload-arch=${_dpnp_amd_targets})

# Append HIP-specific flags for general SYCL modules
list(APPEND _dpnp_sycl_target_compile_options ${_dpnp_hip_compile_options})
list(APPEND _dpnp_sycl_target_link_options ${_dpnp_hip_compile_options})

# Append HIP-specific flags for MKL modules
list(APPEND _dpnp_sycl_mkl_target_compile_options -fsycl-unnamed-lambda ${_dpnp_hip_mkl_compile_options})
list(APPEND _dpnp_sycl_mkl_target_link_options ${_dpnp_hip_mkl_compile_options})
endif()
endif()

set(_use_onemkl_interfaces OFF)
if(DPNP_USE_ONEMKL_INTERFACES)
set(_use_onemkl_interfaces ON)
else()
@@ -107,13 +153,20 @@ endif()
if(_use_onemkl_interfaces)
set(BUILD_FUNCTIONAL_TESTS False)
set(BUILD_EXAMPLES False)
set(ENABLE_MKLGPU_BACKEND True)
set(ENABLE_MKLCPU_BACKEND True)

if(_use_onemkl_interfaces_cuda)
set(ENABLE_CUBLAS_BACKEND True)
set(ENABLE_CUSOLVER_BACKEND True)
set(ENABLE_CUFFT_BACKEND True)
# set(ENABLE_CURAND_BACKEND True)
set(ENABLE_MKLGPU_BACKEND True)
set(ENABLE_MKLCPU_BACKEND True)
endif()
if(_use_onemkl_interfaces_hip)
set(ENABLE_ROCBLAS_BACKEND True)
set(ENABLE_ROCSOLVER_BACKEND True)
set(ENABLE_ROCFFT_BACKEND True)
# set(ENABLE_ROCRAND_BACKEND True)
endif()

if(DPNP_ONEMKL_INTERFACES_DIR)
36 changes: 35 additions & 1 deletion doc/quick_start_guide.rst
Original file line number Diff line number Diff line change
@@ -130,18 +130,52 @@ Building ``dpnp`` for these targets requires that these CodePlay plugins be inst
installation layout of compatible version. The following plugins from CodePlay are supported:

- `oneAPI for NVIDIA(R) GPUs <codeplay_nv_plugin_>`_
- `oneAPI for AMD GPUs <codeplay_amd_plugin_>`_

.. _codeplay_nv_plugin: https://developer.codeplay.com/products/oneapi/nvidia/
.. _codeplay_amd_plugin: https://developer.codeplay.com/products/oneapi/amd/

Building ``dpnp`` also requires `building Data Parallel Control Library for custom SYCL targets.
<https://intelpython.github.io/dpctl/latest/beginners_guides/installation.html#building-for-custom-sycl-targets>`_

Build ``dpnp`` as follows:
``dpnp`` can be built for CUDA devices as follows:

.. code-block:: bash

python scripts/build_locally.py --target=cuda

And for AMD devices:

.. code-block:: bash

python scripts/build_locally.py --target-hip=<arch>

Note that the *oneAPI for AMD GPUs* plugin requires the architecture be specified and only
one architecture can be specified at a time.

To determine the architecture code (``<arch>``) for your AMD GPU, run:

.. code-block:: bash

rocminfo | grep 'Name: *gfx.*'

This will print names like ``gfx90a``, ``gfx1030``, etc.
You can then use one of them as the argument to ``--target-hip``.

For example:

.. code-block:: bash
python scripts/build_locally.py --target-hip=gfx90a


It is, however, possible to build for Intel devices, CUDA devices, and an AMD device
architecture all at once:

.. code-block:: bash

python scripts/build_locally.py --target=cuda --target-hip=gfx90a


Testing
=======

4 changes: 2 additions & 2 deletions dpnp/backend/extensions/blas/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -40,12 +40,12 @@ if(_dpnp_sycl_targets)
target_compile_options(
${python_module_name}
PRIVATE
-fsycl-targets=${_dpnp_sycl_targets}
${_dpnp_sycl_mkl_target_compile_options}
)
target_link_options(
${python_module_name}
PRIVATE
-fsycl-targets=${_dpnp_sycl_targets}
${_dpnp_sycl_mkl_target_link_options}
)
endif()

4 changes: 2 additions & 2 deletions dpnp/backend/extensions/fft/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -37,12 +37,12 @@ if(_dpnp_sycl_targets)
target_compile_options(
${python_module_name}
PRIVATE
-fsycl-targets=${_dpnp_sycl_targets}
${_dpnp_sycl_mkl_target_compile_options}
)
target_link_options(
${python_module_name}
PRIVATE
-fsycl-targets=${_dpnp_sycl_targets}
${_dpnp_sycl_mkl_target_link_options}
)
endif()

4 changes: 2 additions & 2 deletions dpnp/backend/extensions/indexing/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -38,12 +38,12 @@ if(_dpnp_sycl_targets)
target_compile_options(
${python_module_name}
PRIVATE
-fsycl-targets=${_dpnp_sycl_targets}
${_dpnp_sycl_target_compile_options}
)
target_link_options(
${python_module_name}
PRIVATE
-fsycl-targets=${_dpnp_sycl_targets}
${_dpnp_sycl_target_link_options}
)
endif()

4 changes: 2 additions & 2 deletions dpnp/backend/extensions/lapack/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -57,12 +57,12 @@ if(_dpnp_sycl_targets)
target_compile_options(
${python_module_name}
PRIVATE
-fsycl-targets=${_dpnp_sycl_targets}
${_dpnp_sycl_mkl_target_compile_options}
)
target_link_options(
${python_module_name}
PRIVATE
-fsycl-targets=${_dpnp_sycl_targets}
${_dpnp_sycl_mkl_target_link_options}
)
endif()

4 changes: 2 additions & 2 deletions dpnp/backend/extensions/statistics/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -43,12 +43,12 @@ if(_dpnp_sycl_targets)
target_compile_options(
${python_module_name}
PRIVATE
-fsycl-targets=${_dpnp_sycl_targets}
${_dpnp_sycl_target_compile_options}
)
target_link_options(
${python_module_name}
PRIVATE
-fsycl-targets=${_dpnp_sycl_targets}
${_dpnp_sycl_target_link_options}
)
endif()

4 changes: 2 additions & 2 deletions dpnp/backend/extensions/ufunc/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -78,12 +78,12 @@ if(_dpnp_sycl_targets)
target_compile_options(
${python_module_name}
PRIVATE
-fsycl-targets=${_dpnp_sycl_targets}
${_dpnp_sycl_target_compile_options}
)
target_link_options(
${python_module_name}
PRIVATE
-fsycl-targets=${_dpnp_sycl_targets}
${_dpnp_sycl_target_link_options}
)
endif()

4 changes: 2 additions & 2 deletions dpnp/backend/extensions/window/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -38,12 +38,12 @@ if(_dpnp_sycl_targets)
target_compile_options(
${python_module_name}
PRIVATE
-fsycl-targets=${_dpnp_sycl_targets}
${_dpnp_sycl_target_compile_options}
)
target_link_options(
${python_module_name}
PRIVATE
-fsycl-targets=${_dpnp_sycl_targets}
${_dpnp_sycl_target_link_options}
)
endif()

23 changes: 23 additions & 0 deletions scripts/build_locally.py
Original file line number Diff line number Diff line change
@@ -39,6 +39,7 @@ def run(
verbose=False,
cmake_opts="",
target="intel",
target_hip=None,
onemkl_interfaces=False,
onemkl_interfaces_dir=None,
):
@@ -97,13 +98,27 @@ def run(
if "DPL_ROOT" in os.environ:
os.environ["DPL_ROOT_HINT"] = os.environ["DPL_ROOT"]

if not target.strip():
target = "intel"

if target == "cuda":
cmake_args += [
"-DDPNP_TARGET_CUDA=ON",
]
# Always builds using oneMKL interfaces for the cuda target
onemkl_interfaces = True

if target_hip is not None:
if not target_hip.strip():
raise ValueError(
"--target-hip requires an architecture (e.g., gfx90a)"
)
cmake_args += [
f"-DHIP_TARGETS={target_hip}",
]
# Always builds using oneMKL interfaces for the hip target
onemkl_interfaces = True

if onemkl_interfaces:
cmake_args += [
"-DDPNP_USE_ONEMKL_INTERFACES=ON",
@@ -177,6 +192,13 @@ def run(
default="intel",
type=str,
)
driver.add_argument(
"--target-hip",
required=False,
help="Enable HIP target for build. "
"Must specify HIP architecture (e.g., --target-hip=gfx90a)",
type=str,
)
driver.add_argument(
"--onemkl-interfaces",
help="Build using oneMKL Interfaces",
@@ -244,6 +266,7 @@ def run(
verbose=args.verbose,
cmake_opts=args.cmake_opts,
target=args.target,
target_hip=args.target_hip,
onemkl_interfaces=args.onemkl_interfaces,
onemkl_interfaces_dir=args.onemkl_interfaces_dir,
)