Skip to content

Commit 1db1f50

Browse files
authored
Add order parameter to to_dense (#94)
1 parent 2a0d323 commit 1db1f50

File tree

8 files changed

+145
-42
lines changed

8 files changed

+145
-42
lines changed

src/fast_array_utils/conv/__init__.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,25 +21,28 @@
2121

2222

2323
@overload
24-
def to_dense(x: CpuArray | DiskArray | types.sparray | types.spmatrix | types.CSDataset, /, *, to_cpu_memory: bool = False) -> NDArray[Any]: ...
24+
def to_dense(
25+
x: CpuArray | DiskArray | types.sparray | types.spmatrix | types.CSDataset, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False
26+
) -> NDArray[Any]: ...
2527

2628

2729
@overload
28-
def to_dense(x: types.DaskArray, /, *, to_cpu_memory: Literal[False] = False) -> types.DaskArray: ...
30+
def to_dense(x: types.DaskArray, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: Literal[False] = False) -> types.DaskArray: ...
2931
@overload
30-
def to_dense(x: types.DaskArray, /, *, to_cpu_memory: Literal[True]) -> NDArray[Any]: ...
32+
def to_dense(x: types.DaskArray, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: Literal[True]) -> NDArray[Any]: ...
3133

3234

3335
@overload
34-
def to_dense(x: GpuArray | types.CupySpMatrix, /, *, to_cpu_memory: Literal[False] = False) -> types.CupyArray: ...
36+
def to_dense(x: GpuArray | types.CupySpMatrix, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: Literal[False] = False) -> types.CupyArray: ...
3537
@overload
36-
def to_dense(x: GpuArray | types.CupySpMatrix, /, *, to_cpu_memory: Literal[True]) -> NDArray[Any]: ...
38+
def to_dense(x: GpuArray | types.CupySpMatrix, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: Literal[True]) -> NDArray[Any]: ...
3739

3840

3941
def to_dense(
4042
x: CpuArray | GpuArray | DiskArray | types.CSDataset | types.DaskArray | types.sparray | types.spmatrix | types.CupySpMatrix,
4143
/,
4244
*,
45+
order: Literal["K", "A", "C", "F"] = "K",
4346
to_cpu_memory: bool = False,
4447
) -> NDArray[Any] | types.DaskArray | types.CupyArray:
4548
r"""Convert x to a dense array.
@@ -52,6 +55,16 @@ def to_dense(
5255
----------
5356
x
5457
Input object to be converted.
58+
order
59+
The order of the output array: ``C`` (row-major) or ``F`` (column-major). ``K`` and ``A`` derive the order from ``x``.
60+
61+
The default matches numpy, and therefore diverges from the ``scipy.sparse`` matrices’
62+
:meth:`~scipy.sparse.csr_array.toarray`\ ’s default behavior
63+
of always returning a ``C``-contiguous array.
64+
Instead, CSC matrices become F-contiguous arrays when ``order="K"`` (the default).
65+
66+
Dask :class:`~dask.array.Array`\ s concatenation behavior will result in ``order``
67+
having no effect on the :func:`dask.compute` / ``to_cpu_memory=True`` result.
5568
to_cpu_memory
5669
Also load data into memory (resulting in a :class:`numpy.ndarray`).
5770
@@ -60,4 +73,4 @@ def to_dense(
6073
Dense form of ``x``
6174
6275
"""
63-
return to_dense_(x, to_cpu_memory=to_cpu_memory)
76+
return to_dense_(x, order=order, to_cpu_memory=to_cpu_memory)
Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# SPDX-License-Identifier: MPL-2.0
22
from __future__ import annotations
33

4+
import warnings
45
from functools import partial, singledispatch
56
from typing import TYPE_CHECKING, cast
67

@@ -11,7 +12,7 @@
1112

1213

1314
if TYPE_CHECKING:
14-
from typing import Any
15+
from typing import Any, Literal
1516

1617
from numpy.typing import NDArray
1718

@@ -22,40 +23,57 @@ def to_dense_(
2223
x: CpuArray | GpuArray | DiskArray | types.DaskArray | types.sparray | types.spmatrix | types.CupySpMatrix,
2324
/,
2425
*,
26+
order: Literal["K", "A", "C", "F"] = "K",
2527
to_cpu_memory: bool = False,
2628
) -> NDArray[Any] | types.CupyArray | types.DaskArray:
2729
del to_cpu_memory # it already is
28-
return np.asarray(x)
30+
return np.asarray(x, order=order)
2931

3032

3133
@to_dense_.register(types.spmatrix | types.sparray) # type: ignore[call-overload,misc]
32-
def _to_dense_cs(x: types.spmatrix | types.sparray, /, *, to_cpu_memory: bool = False) -> NDArray[Any]:
34+
def _to_dense_cs(x: types.spmatrix | types.sparray, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False) -> NDArray[Any]:
3335
from . import scipy
3436

3537
del to_cpu_memory # it already is
36-
return scipy.to_dense(x)
38+
return scipy.to_dense(x, order=sparse_order(x, order=order))
3739

3840

3941
@to_dense_.register(types.DaskArray)
40-
def _to_dense_dask(x: types.DaskArray, /, *, to_cpu_memory: bool = False) -> NDArray[Any] | types.DaskArray:
42+
def _to_dense_dask(x: types.DaskArray, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False) -> NDArray[Any] | types.DaskArray:
4143
from . import to_dense
4244

43-
x = x.map_blocks(partial(to_dense, to_cpu_memory=to_cpu_memory))
45+
if order == "F":
46+
msg = f"{order=!r} will probably be ignored: Dask can not be made to emit F-contiguous arrays reliably."
47+
warnings.warn(msg, RuntimeWarning, stacklevel=4)
48+
x = x.map_blocks(partial(to_dense, order=order, to_cpu_memory=to_cpu_memory))
4449
return x.compute() if to_cpu_memory else x # type: ignore[return-value]
4550

4651

4752
@to_dense_.register(types.CSDataset)
48-
def _to_dense_ooc(x: types.CSDataset, /, *, to_cpu_memory: bool = False) -> NDArray[Any]:
53+
def _to_dense_ooc(x: types.CSDataset, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False) -> NDArray[Any]:
4954
from . import to_dense
5055

5156
if not to_cpu_memory:
5257
msg = "to_cpu_memory must be True if x is an CS{R,C}Dataset"
5358
raise ValueError(msg)
5459
# TODO(flying-sheep): why is to_memory of type Any? # noqa: TD003
55-
return to_dense(cast("types.CSBase", x.to_memory()))
60+
return to_dense(cast("types.CSBase", x.to_memory()), order=sparse_order(x, order=order))
5661

5762

5863
@to_dense_.register(types.CupyArray | types.CupySpMatrix) # type: ignore[call-overload,misc]
59-
def _to_dense_cupy(x: GpuArray, /, *, to_cpu_memory: bool = False) -> NDArray[Any] | types.CupyArray:
60-
x = x.toarray() if isinstance(x, types.CupySpMatrix) else x
61-
return x.get() if to_cpu_memory else x
64+
def _to_dense_cupy(x: GpuArray, /, *, order: Literal["K", "A", "C", "F"] = "K", to_cpu_memory: bool = False) -> NDArray[Any] | types.CupyArray:
65+
import cupy as cu
66+
67+
x = x.toarray(sparse_order(x, order=order)) if isinstance(x, types.CupySpMatrix) else cu.asarray(x, order=order)
68+
return x.get(order="A") if to_cpu_memory else x
69+
70+
71+
def sparse_order(x: types.spmatrix | types.sparray | types.CupySpMatrix | types.CSDataset, /, *, order: Literal["K", "A", "C", "F"]) -> Literal["C", "F"]:
72+
if TYPE_CHECKING:
73+
from scipy.sparse._base import _spbase
74+
75+
assert isinstance(x, _spbase | types.CSDataset)
76+
77+
if order in {"K", "A"}:
78+
order = "F" if x.format == "csc" else "C"
79+
return cast("Literal['C', 'F']", order)

src/fast_array_utils/types.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,13 @@
88

99

1010
__all__ = [
11+
"COOBase",
1112
"CSArray",
1213
"CSBase",
1314
"CSDataset",
1415
"CSMatrix",
1516
"CupyArray",
17+
"CupyCOOMatrix",
1618
"CupyCSCMatrix",
1719
"CupyCSMatrix",
1820
"CupyCSRMatrix",
@@ -22,30 +24,41 @@
2224
"H5Group",
2325
"ZarrArray",
2426
"ZarrGroup",
27+
"coo_array",
28+
"coo_matrix",
29+
"csc_array",
30+
"csc_matrix",
31+
"csr_array",
32+
"csr_matrix",
33+
"sparray",
34+
"spmatrix",
2535
]
2636

2737
T_co = TypeVar("T_co", covariant=True)
2838

2939

3040
# scipy sparse
3141
if TYPE_CHECKING:
32-
from scipy.sparse import csc_array, csc_matrix, csr_array, csr_matrix, sparray, spmatrix
42+
from scipy.sparse import coo_array, coo_matrix, csc_array, csc_matrix, csr_array, csr_matrix, sparray, spmatrix
3343
else:
3444
try: # cs?_array isn’t available in older scipy versions
35-
from scipy.sparse import csc_array, csr_array, sparray
45+
from scipy.sparse import coo_array, csc_array, csr_array, sparray
3646
except ImportError: # pragma: no cover
47+
coo_array = type("coo_array", (), {})
3748
csc_array = type("csc_array", (), {})
3849
csr_array = type("csr_array", (), {})
3950
sparray = type("sparray", (), {})
40-
csc_array.__module__ = csr_array.__module__ = sparray.__module__ = "scipy.sparse"
51+
coo_array.__module__ = csc_array.__module__ = csr_array.__module__ = sparray.__module__ = "scipy.sparse"
4152

4253
try: # cs?_matrix is available when scipy is installed
43-
from scipy.sparse import csc_matrix, csr_matrix, spmatrix
54+
from scipy.sparse import coo_matrix, csc_matrix, csr_matrix, spmatrix
4455
except ImportError: # pragma: no cover
56+
coo_matrix = type("coo_matrix", (), {})
4557
csc_matrix = type("csc_matrix", (), {})
4658
csr_matrix = type("csr_matrix", (), {})
4759
spmatrix = type("spmatrix", (), {})
48-
csc_matrix.__module__ = csr_matrix.__module__ = spmatrix.__module__ = "scipy.sparse"
60+
coo_matrix.__module__ = csc_matrix.__module__ = csr_matrix.__module__ = spmatrix.__module__ = "scipy.sparse"
61+
COOBase = coo_matrix | coo_array
4962
CSMatrix = csc_matrix | csr_matrix
5063
CSArray = csc_array | csr_array
5164
CSBase = CSMatrix | CSArray
@@ -54,16 +67,18 @@
5467

5568
if TYPE_CHECKING or find_spec("cupy"): # cupy always comes with cupyx
5669
from cupy import ndarray as CupyArray
70+
from cupyx.scipy.sparse import coo_matrix as CupyCOOMatrix
5771
from cupyx.scipy.sparse import csc_matrix as CupyCSCMatrix
5872
from cupyx.scipy.sparse import csr_matrix as CupyCSRMatrix
5973
from cupyx.scipy.sparse import spmatrix as CupySpMatrix
6074
else: # pragma: no cover
6175
CupyArray = type("ndarray", (), {})
6276
CupyArray.__module__ = "cupy"
77+
CupyCOOMatrix = type("coo_matrix", (), {})
6378
CupyCSCMatrix = type("csc_matrix", (), {})
6479
CupyCSRMatrix = type("csr_matrix", (), {})
6580
CupySpMatrix = type("spmatrix", (), {})
66-
CupyCSCMatrix.__module__ = CupyCSRMatrix.__module__ = CupySpMatrix.__module__ = "cupyx.scipy.sparse"
81+
CupyCOOMatrix.__module__ = CupyCSCMatrix.__module__ = CupyCSRMatrix.__module__ = CupySpMatrix.__module__ = "cupyx.scipy.sparse"
6782
CupyCSMatrix = CupyCSRMatrix | CupyCSCMatrix
6883

6984

src/testing/fast_array_utils/_array_type.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,14 @@
2222
import h5py
2323
from numpy.typing import ArrayLike, DTypeLike, NDArray
2424

25-
from fast_array_utils.types import CSBase
2625
from fast_array_utils.typing import CpuArray, DiskArray, GpuArray
2726

2827
InnerArray = CpuArray | GpuArray | DiskArray
2928
Array: TypeAlias = InnerArray | types.DaskArray | types.CSDataset
29+
ExtendedArray = Array | types.COOBase | types.CupyCOOMatrix
3030

31-
Arr = TypeVar("Arr", bound=Array, default=Array)
32-
Arr_co = TypeVar("Arr_co", bound=Array, covariant=True)
31+
Arr = TypeVar("Arr", bound=ExtendedArray, default=Array)
32+
Arr_co = TypeVar("Arr_co", bound=ExtendedArray, covariant=True)
3333

3434
Inner = TypeVar("Inner", bound="ArrayType[InnerArray, None] | None", default=Any)
3535

@@ -305,7 +305,7 @@ def _to_scipy_sparse(
305305
/,
306306
*,
307307
dtype: DTypeLike | None = None,
308-
cls: type[CSBase] | None = None,
308+
cls: type[types.CSBase] | None = None,
309309
) -> types.CSBase:
310310
"""Convert to a scipy sparse matrix/array."""
311311
if isinstance(x, types.DaskArray):

tests/conftest.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
if TYPE_CHECKING:
1414
from collections.abc import Callable
1515

16+
from fast_array_utils import types
17+
1618

1719
@pytest.fixture
1820
def dask_viz(request: pytest.FixtureRequest, cache: pytest.Cache) -> Callable[[object], None]:
@@ -41,5 +43,5 @@ def viz(obj: object) -> None:
4143

4244

4345
@pytest.fixture(scope="session", params=COO_PARAMS)
44-
def coo_matrix_type(request: pytest.FixtureRequest) -> ArrayType:
45-
return cast("ArrayType", request.param)
46+
def coo_matrix_type(request: pytest.FixtureRequest) -> ArrayType[types.COOBase | types.CupyCOOMatrix]:
47+
return cast("ArrayType[types.COOBase | types.CupyCOOMatrix]", request.param)

tests/test_test_utils.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,7 @@
1515
if TYPE_CHECKING:
1616
from typing import Any
1717

18-
from cupyx.scipy.sparse import coo_matrix as CupyCooMatrix
1918
from numpy.typing import DTypeLike, NDArray
20-
from scipy.sparse import coo_array, coo_matrix
2119

2220
from testing.fast_array_utils import Array, ArrayType
2321

@@ -54,7 +52,7 @@ def test_conv_other(array_type: ArrayType, other_array_type: ArrayType) -> None:
5452
@pytest.mark.array_type(skip=Flags.Dask | Flags.Disk | Flags.Gpu)
5553
def test_conv_extra(
5654
array_type: ArrayType[NDArray[np.number[Any]] | types.CSBase],
57-
coo_matrix_type: ArrayType[coo_matrix | coo_array | CupyCooMatrix],
55+
coo_matrix_type: ArrayType[types.COOBase | types.CupyCOOMatrix],
5856
) -> None:
5957
src_arr = array_type(np.arange(12).reshape(3, 4), dtype=np.float32)
6058
arr = coo_matrix_type(src_arr)

tests/test_to_dense.py

Lines changed: 61 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,46 +13,98 @@
1313

1414

1515
if TYPE_CHECKING:
16-
from typing import TypeAlias
16+
from collections.abc import Iterable
17+
from typing import Literal, TypeAlias
1718

1819
from fast_array_utils.typing import CpuArray, DiskArray, GpuArray
1920
from testing.fast_array_utils import ArrayType
2021

2122
Array: TypeAlias = CpuArray | GpuArray | DiskArray | types.CSDataset | types.DaskArray
23+
ExtendedArray: TypeAlias = Array | types.COOBase | types.CupyCOOMatrix
2224

2325

2426
WARNS_NUMBA = pytest.warns(RuntimeWarning, match="numba is not installed; falling back to slow conversion")
2527

2628

2729
@pytest.mark.parametrize("to_cpu_memory", [True, False], ids=["to_cpu_memory", "not_to_cpu_memory"])
28-
def test_to_dense(array_type: ArrayType[Array], *, to_cpu_memory: bool) -> None:
30+
@pytest.mark.parametrize("order", argvalues=["K", "C", "F"]) # “A” behaves like “K”
31+
def test_to_dense(array_type: ArrayType[Array], *, order: Literal["K", "C", "F"], to_cpu_memory: bool) -> None:
2932
x = array_type([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
3033
if not to_cpu_memory and array_type.cls in {types.CSCDataset, types.CSRDataset}:
3134
with pytest.raises(ValueError, match="to_cpu_memory must be True if x is an CS{R,C}Dataset"):
32-
to_dense(x, to_cpu_memory=to_cpu_memory)
35+
to_dense(x, order=order, to_cpu_memory=to_cpu_memory)
3336
return
3437

35-
with WARNS_NUMBA if issubclass(array_type.cls, types.CSBase) and not find_spec("numba") else nullcontext():
36-
arr = to_dense(x, to_cpu_memory=to_cpu_memory)
38+
with (
39+
pytest.warns(RuntimeWarning, match="Dask can not be made to emit F-contiguous arrays")
40+
if (order == "F" and array_type.cls is types.DaskArray)
41+
else nullcontext(),
42+
WARNS_NUMBA if issubclass(array_type.cls, types.CSBase) and not find_spec("numba") else nullcontext(),
43+
):
44+
arr = to_dense(x, order=order, to_cpu_memory=to_cpu_memory)
45+
3746
assert_expected_cls(x, arr, to_cpu_memory=to_cpu_memory)
3847
assert arr.shape == (2, 3)
48+
# Dask is unreliable: for explicit “F”, we emit a warning (tested above), for “K” we just ignore the result
49+
if not (array_type.cls is types.DaskArray and order in {"F", "K"}):
50+
assert_expected_order(x, arr, order=order)
3951

4052

4153
@pytest.mark.parametrize("to_cpu_memory", [True, False], ids=["to_cpu_memory", "not_to_cpu_memory"])
42-
def test_to_dense_extra(coo_matrix_type: ArrayType[Array], *, to_cpu_memory: bool) -> None:
54+
@pytest.mark.parametrize("order", argvalues=["K", "C", "F"]) # “A” behaves like “K”
55+
def test_to_dense_extra(coo_matrix_type: ArrayType[types.COOBase | types.CupyCOOMatrix], *, order: Literal["K", "C", "F"], to_cpu_memory: bool) -> None:
4356
src_mtx = coo_matrix_type([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
57+
4458
with WARNS_NUMBA if not find_spec("numba") else nullcontext():
45-
arr = to_dense(src_mtx, to_cpu_memory=to_cpu_memory)
59+
arr = to_dense(src_mtx, order=order, to_cpu_memory=to_cpu_memory)
60+
4661
assert_expected_cls(src_mtx, arr, to_cpu_memory=to_cpu_memory)
4762
assert arr.shape == (2, 3)
63+
assert_expected_order(src_mtx, arr, order=order)
4864

4965

50-
def assert_expected_cls(orig: Array, converted: Array, *, to_cpu_memory: bool) -> None:
66+
def assert_expected_cls(orig: ExtendedArray, converted: Array, *, to_cpu_memory: bool) -> None:
5167
match (to_cpu_memory, orig):
5268
case False, types.DaskArray():
5369
assert isinstance(converted, types.DaskArray)
54-
assert_expected_cls(orig._meta, converted._meta, to_cpu_memory=to_cpu_memory) # noqa: SLF001
70+
assert_expected_cls(orig.compute(), converted.compute(), to_cpu_memory=to_cpu_memory)
5571
case False, types.CupyArray() | types.CupySpMatrix():
5672
assert isinstance(converted, types.CupyArray)
5773
case _:
5874
assert isinstance(converted, np.ndarray)
75+
76+
77+
def assert_expected_order(orig: ExtendedArray, converted: Array, *, order: Literal["K", "C", "F"]) -> None:
78+
match converted:
79+
case types.CupyArray() | np.ndarray():
80+
orders = {order_exp: converted.flags[f"{order_exp}_CONTIGUOUS"] for order_exp in (get_orders(orig) if order == "K" else {order})} # type: ignore[index]
81+
assert any(orders.values()), orders
82+
case types.DaskArray():
83+
assert_expected_order(orig, converted.compute(), order=order)
84+
case _:
85+
pytest.fail(f"Unsupported array type: {type(converted)}")
86+
87+
88+
def get_orders(orig: ExtendedArray) -> Iterable[Literal["C", "F"]]:
89+
"""Get the orders of an array.
90+
91+
Numpy arrays with at most one axis of a length >1 are valid in both orders.
92+
So are COO sparse matrices/arrays.
93+
"""
94+
match orig:
95+
case np.ndarray() | types.CupyArray():
96+
if orig.flags.c_contiguous:
97+
yield "C"
98+
if orig.flags.f_contiguous:
99+
yield "F"
100+
case _ if isinstance(orig, types.CSBase | types.COOBase | types.CupyCSMatrix | types.CupyCOOMatrix | types.CSDataset):
101+
if orig.format in {"csr", "coo"}:
102+
yield "C"
103+
if orig.format in {"csc", "coo"}:
104+
yield "F"
105+
case types.DaskArray():
106+
yield from get_orders(orig.compute())
107+
case types.ZarrArray() | types.H5Dataset():
108+
yield "C"
109+
case _:
110+
pytest.fail(f"Unsupported array type: {type(orig)}")

0 commit comments

Comments
 (0)