Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 71 additions & 4 deletions src/Base/Array4.H
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#pragma once

#include "pyAMReX.H"
#include "dlpack/dlpack.h"

#include <AMReX_Array4.H>
#include <AMReX_BLassert.H>
Expand Down Expand Up @@ -185,6 +186,7 @@ namespace pyAMReX
*/


/*
// CPU: __array_interface__ v3
// https://numpy.org/doc/stable/reference/arrays.interface.html
.def_property_readonly("__array_interface__", [](Array4<T> const & a4) {
Expand Down Expand Up @@ -220,15 +222,80 @@ namespace pyAMReX
d["version"] = 3;
return d;
})
*/


// TODO: __dlpack__ __dlpack_device__
// DLPack protocol (CPU, NVIDIA GPU, AMD GPU, Intel GPU, etc.)
// DLPack v1.1 protocol (CPU, NVIDIA GPU, AMD GPU, Intel GPU, etc.)
// https://dmlc.github.io/dlpack/latest/
// https://data-apis.org/array-api/latest/design_topics/data_interchange.html
// https://github.com/data-apis/consortium-feedback/issues/1
// https://github.com/dmlc/dlpack/blob/master/include/dlpack/dlpack.h
// https://docs.cupy.dev/en/stable/user_guide/interoperability.html#dlpack-data-exchange-protocol
.def("__dlpack__", [](
Array4<T> const &a4,
/* TODO: Handle keyword arguments */
[[maybe_unused]] std::optional<py::handle> stream = std::nullopt,
[[maybe_unused]] std::optional<std::tuple<int, int>> max_version = std::nullopt,
[[maybe_unused]] std::optional<std::tuple<DLDeviceType, int32_t>> dl_device = std::nullopt,
[[maybe_unused]] std::optional<bool> copy = std::nullopt

)
{
// Allocate shape/strides arrays
constexpr int ndim = 4;
auto const len = length(a4);

// Construct DLManagedTensorVersioned (DLPack 1.1 standard)
auto *dl_mgt_tensor = new DLManagedTensorVersioned;
dl_mgt_tensor->version = DLPackVersion{};
dl_mgt_tensor->flags = 0; // No special flags
dl_mgt_tensor->dl_tensor.data = const_cast<void*>(static_cast<const void*>(a4.dataPtr()));
dl_mgt_tensor->dl_tensor.device = dlpack::detect_device_from_pointer(a4.dataPtr());
dl_mgt_tensor->dl_tensor.ndim = ndim;
dl_mgt_tensor->dl_tensor.dtype = dlpack::get_dlpack_dtype<T>();
dl_mgt_tensor->dl_tensor.shape = new int64_t[ndim]{a4.nComp(), len.z, len.y, len.x};
dl_mgt_tensor->dl_tensor.strides = new int64_t[ndim]{a4.nstride, a4.kstride, a4.jstride, 1};
dl_mgt_tensor->dl_tensor.byte_offset = 0;
dl_mgt_tensor->manager_ctx = nullptr; // TODO: we can increase/decrease the Python ref counter of the producer here
dl_mgt_tensor->deleter = [](DLManagedTensorVersioned *self) {
delete[] self->dl_tensor.shape;
delete[] self->dl_tensor.strides;
delete self;
};
// Return as Python capsule
return py::capsule(
dl_mgt_tensor,
"dltensor_versioned",
/*[](void* ptr) {
auto* tensor = static_cast<DLManagedTensorVersioned*>(ptr);
tensor->deleter(tensor);
}*/
[](PyObject *capsule)
{
if (PyCapsule_IsValid(capsule, "used_dltensor_versioned")) {
return; /* Do nothing if the capsule has been consumed. */
}
auto *p = static_cast<DLManagedTensorVersioned*>(
PyCapsule_GetPointer(capsule, "dltensor_versioned"));
if (p && p->deleter)
p->deleter(p);
}
);
},
py::arg("stream") = py::none(),
py::arg("max_version") = py::none(),
py::arg("dl_device") = py::none(),
py::arg("copy") = py::none(),
R"doc(
DLPack protocol for zero-copy tensor exchange.
See https://dmlc.github.io/dlpack/latest/ for details.
)doc"
)
.def("__dlpack_device__", [](Array4<T> const &a4) {
DLDevice device = dlpack::detect_device_from_pointer(a4.dataPtr());
return std::make_tuple(static_cast<int32_t>(device.device_type), device.device_id);
}, R"doc(
DLPack device info (device_type, device_id).
)doc")


.def("to_host", [](Array4<T> const & a4) {
// py::tuple to std::vector
Expand Down
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
add_subdirectory(AmrCore)
add_subdirectory(Base)
#add_subdirectory(Boundary)
add_subdirectory(dlpack)
#add_subdirectory(EB)
#add_subdirectory(Extern)
#add_subdirectory(LinearSolvers)
Expand Down
62 changes: 56 additions & 6 deletions src/amrex/extensions/Array4.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
This file is part of pyAMReX

Copyright 2023 AMReX community
Copyright 2023-2025 AMReX community
Authors: Axel Huebl
License: BSD-3-Clause-LBNL
"""
Expand Down Expand Up @@ -92,9 +92,52 @@ def array4_to_cupy(self, copy=False, order="F"):
raise ValueError("The order argument must be F or C.")


def array4_to_dpnp(self, copy=False, order="F"):
"""
Provide a dpnp view into an Array4.

This includes ngrow guard cells of the box.

Note on the order of indices:
By default, this is as in AMReX in Fortran contiguous order, indexing as
x,y,z. This has performance implications for use in external libraries such
as dpnp.
The order="C" option will index as z,y,x and may perform better.
https://github.com/AMReX-Codes/pyamrex/issues/55#issuecomment-1579610074

Parameters
----------
self : amrex.Array4_*
An Array4 class in pyAMReX
copy : bool, optional
Copy the data if true, otherwise create a view (default).
order : string, optional
F order (default) or C. C is faster with external libraries.

Returns
-------
dpnp.array
A dpnp n-dimensional array.

Raises
------
ImportError
Raises an exception if dpnp is not installed
"""
import dpnp as dp

if order == "F":
return dp.from_dlpack(self, copy=copy).T
elif order == "C":
return dp.from_dlpack(self, copy=copy)
else:
raise ValueError("The order argument must be F or C.")


def array4_to_xp(self, copy=False, order="F"):
"""
Provide a NumPy or CuPy view into an Array4, depending on amr.Config.have_gpu .
Provide a NumPy, CuPy or dpnp view into an Array4, depending on amr.Config.have_gpu
and amr.Config.gpu_backend .

This function is similar to CuPy's xp naming suggestion for CPU/GPU agnostic code:
https://docs.cupy.dev/en/stable/user_guide/basic.html#how-to-write-cpu-gpu-agnostic-code
Expand All @@ -120,14 +163,20 @@ def array4_to_xp(self, copy=False, order="F"):
Returns
-------
xp.array
A NumPy or CuPy n-dimensional array.
A NumPy, CuPy or dpnp n-dimensional array.
"""
import inspect

amr = inspect.getmodule(self)
return (
self.to_cupy(copy, order) if amr.Config.have_gpu else self.to_numpy(copy, order)
)

if amr.Config.have_gpu:
if amr.Config.gpu_backend == "SYCL":
return self.to_dpnp(copy, order)
else: # if not SYCL use cupy
return self.to_cupy(copy, order)

# if no GPU, use NumPy
return self.to_numpy(copy, order)


def register_Array4_extension(amr):
Expand All @@ -144,4 +193,5 @@ def register_Array4_extension(amr):
):
Array4_type.to_numpy = array4_to_numpy
Array4_type.to_cupy = array4_to_cupy
Array4_type.to_dpnp = array4_to_dpnp
Array4_type.to_xp = array4_to_xp
80 changes: 23 additions & 57 deletions src/amrex/extensions/MultiFab.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,50 +63,10 @@ def mf_to_numpy(self, copy=False, order="F"):
return views


def mf_to_cupy(self, copy=False, order="F"):
"""
Provide a CuPy view into a MultiFab.

This includes ngrow guard cells of each box.

Note on the order of indices:
By default, this is as in AMReX in Fortran contiguous order, indexing as
x,y,z. This has performance implications for use in external libraries such
as cupy.
The order="C" option will index as z,y,x and perform better with cupy.
https://github.com/AMReX-Codes/pyamrex/issues/55#issuecomment-1579610074

Parameters
----------
self : amrex.MultiFab
A MultiFab class in pyAMReX
copy : bool, optional
Copy the data if true, otherwise create a view (default).
order : string, optional
F order (default) or C. C is faster with external libraries.

Returns
-------
list of cupy.array
A list of CuPy n-dimensional arrays, for each local block in the
MultiFab.

Raises
------
ImportError
Raises an exception if cupy is not installed
"""
views = []
for mfi in self:
views.append(self.array(mfi).to_cupy(copy, order))

return views


def mf_to_xp(self, copy=False, order="F"):
"""
Provide a NumPy or CuPy view into a MultiFab,
depending on amr.Config.have_gpu .
Provide a NumPy, CuPy or dpnp view into a MultiFab,
depending on amr.Config.have_gpu and amr.Config.gpu_backend .

This function is similar to CuPy's xp naming suggestion for CPU/GPU agnostic code:
https://docs.cupy.dev/en/stable/user_guide/basic.html#how-to-write-cpu-gpu-agnostic-code
Expand All @@ -132,15 +92,14 @@ def mf_to_xp(self, copy=False, order="F"):
Returns
-------
list of xp.array
A list of NumPy or CuPy n-dimensional arrays, for each local block in the
MultiFab.
A list of NumPy, CuPy or dpnp n-dimensional arrays, for each local block
in the MultiFab.
"""
import inspect
views = []
for mfi in self:
views.append(self.array(mfi).to_xp(copy, order))

amr = inspect.getmodule(self)
return (
self.to_cupy(copy, order) if amr.Config.have_gpu else self.to_numpy(copy, order)
)
return views


def copy_multifab(amr, self):
Expand Down Expand Up @@ -490,6 +449,10 @@ def __getitem__(self, index, with_internal_ghosts=False):
Whether to include internal ghost cells. When true, data from ghost cells may be used that
overlaps valid cells.
"""
import inspect

amr = inspect.getmodule(self)

index4 = _process_index(self, index)

# Gather the data to be included in a list to be sent to other processes
Expand All @@ -503,17 +466,19 @@ def __getitem__(self, index, with_internal_ghosts=False):
device_arr = _get_field(self, mfi)
slice_arr = device_arr[block_slices]
try:
# Copy data from host to device using cupy syntax
slice_arr = slice_arr.get()
if amr.Config.gpu_backend == "SYCL":
import dpnp

slice_arr = dpnp.asnumpy(slice_arr)
else:
# Copy data from host to device using cupy syntax
slice_arr = slice_arr.get()
except AttributeError:
# Array is already a numpy array on the host
pass
datalist.append((global_slices, slice_arr))

# Gather the data from all processors
import inspect

amr = inspect.getmodule(self)
if amr.Config.have_mpi:
npes = amr.ParallelDescriptor.NProcs()
else:
Expand Down Expand Up @@ -604,7 +569,10 @@ def __setitem__(self, index, value):

amr = inspect.getmodule(self)
if amr.Config.have_gpu:
import cupy as xp
if amr.Config.gpu_backend == "SYCL":
import dpnp as xp
else:
import cupy as xp
else:
xp = np

Expand Down Expand Up @@ -653,7 +621,6 @@ def register_MultiFab_extension(amr):
amr.MultiFab.__iter__ = lambda mfab: amr.MFIter(mfab)

amr.MultiFab.to_numpy = mf_to_numpy
amr.MultiFab.to_cupy = mf_to_cupy
amr.MultiFab.to_xp = mf_to_xp

amr.MultiFab.copy = lambda self: copy_multifab(amr, self)
Expand All @@ -669,7 +636,6 @@ def register_MultiFab_extension(amr):
amr.iMultiFab.__iter__ = lambda imfab: amr.MFIter(imfab)

amr.iMultiFab.to_numpy = mf_to_numpy
amr.iMultiFab.to_cupy = mf_to_cupy
amr.iMultiFab.to_xp = mf_to_xp

amr.iMultiFab.copy = lambda self: copy_multifab(amr, self)
Expand Down
6 changes: 6 additions & 0 deletions src/dlpack/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
foreach(D IN LISTS AMReX_SPACEDIM)
target_sources(pyAMReX_${D}d
PRIVATE
DLPack.cpp
)
endforeach()
35 changes: 35 additions & 0 deletions src/dlpack/DLPack.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#include "pyAMReX.H"

#include "dlpack.h"


void init_DLPack(py::module& m)
{
using namespace amrex;

// register types only if not already present, e.g., from another library
// that also implements DLPack bindings and exposes the types

// TODO: py::type pyDLDeviceType = py::type::of<DLDeviceType>();
bool pyDLDeviceType = false;
if (!pyDLDeviceType) {
py::native_enum<DLDeviceType>(m, "DLDeviceType", "enum.IntEnum")
.value("kDLCPU", DLDeviceType::kDLCPU)
.value("kDLCUDA", DLDeviceType::kDLCUDA)
.value("kDLCUDAHost", DLDeviceType::kDLCUDAHost)
.value("kDLOpenCL", DLDeviceType::kDLOpenCL)
.value("kDLVulkan", DLDeviceType::kDLVulkan)
.value("kDLMetal", DLDeviceType::kDLMetal)
.value("kDLVPI", DLDeviceType::kDLVPI)
.value("kDLROCM", DLDeviceType::kDLROCM)
.value("kDLROCMHost", DLDeviceType::kDLROCMHost)
.value("kDLExtDev", DLDeviceType::kDLExtDev)
.value("kDLCUDAManaged", DLDeviceType::kDLCUDAManaged)
.value("kDLOneAPI", DLDeviceType::kDLOneAPI)
.value("kDLWebGPU", DLDeviceType::kDLWebGPU)
.value("kDLHexagon", DLDeviceType::kDLHexagon)
.value("kDLMAIA", DLDeviceType::kDLMAIA)
;
}

}
Loading
Loading