Skip to content

Commit 585feb7

Browse files
committed
Renamed the SciPy conversion functions and made them generics.
This should make it easier to extend for classes that don't need to go through block processing, e.g., HDF5 compressed sparse matrices.
1 parent 1df3701 commit 585feb7

File tree

4 files changed

+55
-77
lines changed

4 files changed

+55
-77
lines changed

README.md

+1-3
Original file line numberDiff line numberDiff line change
@@ -192,9 +192,7 @@ delayedarray.to_sparse_array(d)
192192
Users can easily convert a 2-dimensional `SparseNdarray` to some of the common SciPy sparse matrix classes downstream calculations.
193193

194194
```python
195-
delayedarray.to_scipy_csc_matrix(current)
196-
delayedarray.to_scipy_csr_matrix(current)
197-
delayedarray.to_scipy_coo_matrix(current)
195+
delayedarray.to_scipy_sparse_matrix(current, "csc")
198196
```
199197

200198
More simply, users can just call `numpy.array()` to realize the delayed operations into a standard NumPy array for consumption.

src/delayedarray/__init__.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
from .extract_sparse_array import extract_sparse_array
3232
from .to_dense_array import to_dense_array
3333
from .to_sparse_array import to_sparse_array
34+
from .to_scipy_sparse_matrix import *
3435

3536
from .create_dask_array import create_dask_array
3637
from .is_sparse import is_sparse
@@ -40,5 +41,3 @@
4041
from .apply_over_dimension import apply_over_dimension, choose_block_size_for_1d_iteration, guess_iteration_block_size
4142
from .apply_over_blocks import apply_over_blocks, choose_block_shape_for_iteration
4243
from .wrap import wrap
43-
44-
from .to_scipy import *
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,17 @@
11
import numpy
2-
from typing import Any
2+
from functools import singledispatch
3+
from typing import Any, Literal
34
from biocutils.package_utils import is_package_installed
45

56
from .SparseNdarray import SparseNdarray
6-
from .extract_sparse_array import to_sparse_array
7+
from .to_sparse_array import to_sparse_array
78

89

910
if is_package_installed("scipy"):
1011
import scipy.sparse
1112

1213

13-
def to_scipy_csc_matrix(x: Any) -> scipy.sparse.csc_matrix:
14-
"""
15-
Convert a 2-dimensional ``DelayedArray`` or ``SparseNdarray`` into a
16-
SciPy compressed sparse column (CSC) matrix.
17-
18-
Args:
19-
x:
20-
Input matrix where :py:func:`~delayedarray.is_sparse.is_sparse`
21-
returns True and :py:func:`~delayedarray.is_sparse.is_masked`
22-
returns False.
23-
24-
Returns:
25-
A CSC matrix with the contents of ``x``.
26-
"""
27-
# One might think that we could be more memory-efficient by doing block
28-
# processing. However, there is no advantage from doing so as we eventually
29-
# need to hold all the blocks in memory before concatenation. We'd only
30-
# avoid this if we did two passes; one to collect the total size for
31-
# allocation, and another to actually fill the vectors; not good, so we
32-
# just forget about it and load it all into memory up-front.
33-
if not isinstance(x, SparseNdarray):
34-
x = to_sparse_array(x)
35-
14+
def _to_csc(x: Any) -> scipy.sparse.csc_matrix:
3615
all_indptrs = numpy.zeros(x.shape[1] + 1, dtype=numpy.uint64)
3716
if x.contents is not None:
3817
all_indices = []
@@ -53,26 +32,7 @@ def to_scipy_csc_matrix(x: Any) -> scipy.sparse.csc_matrix:
5332
return scipy.sparse.csc_matrix((all_values, all_indices, all_indptrs), shape=x.shape)
5433

5534

56-
def to_scipy_csr_matrix(x: Any) -> scipy.sparse.csr_matrix:
57-
"""
58-
Convert a 2-dimensional ``DelayedArray`` or ``SparseNdarray`` into a
59-
SciPy compressed sparse row (CSR) matrix.
60-
61-
Args:
62-
x:
63-
Input matrix where :py:func:`~delayedarray.is_sparse.is_sparse`
64-
returns True and :py:func:`~delayedarray.is_sparse.is_masked`
65-
returns False.
66-
67-
Returns:
68-
A CSR matrix with the contents of ``x``.
69-
"""
70-
# Same logic as above; block processing just ends up reading the entire
71-
# thing into memory before forming the full arrays, so we just load it
72-
# all in to start with and save ourselves the trouble.
73-
if not isinstance(x, SparseNdarray):
74-
x = to_sparse_array(x)
75-
35+
def _to_csr(x: Any) -> scipy.sparse.csr_matrix:
7636
all_indptrs = numpy.zeros(x.shape[0] + 1, dtype=numpy.uint64)
7737
if x.contents is not None:
7838
# First pass (in memory) to obtain the total sizes.
@@ -103,24 +63,7 @@ def to_scipy_csr_matrix(x: Any) -> scipy.sparse.csr_matrix:
10363
return scipy.sparse.csr_matrix((all_values, all_indices, all_indptrs), shape=x.shape)
10464

10565

106-
def to_scipy_coo_matrix(x: Any) -> scipy.sparse.coo_matrix:
107-
"""
108-
Convert a 2-dimensional ``DelayedArray`` or ``SparseNdarray`` into a
109-
SciPy sparse coordinate (COO) matrix.
110-
111-
Args:
112-
x:
113-
Input matrix where :py:func:`~delayedarray.is_sparse.is_sparse`
114-
returns True and :py:func:`~delayedarray.is_sparse.is_masked`
115-
returns False.
116-
117-
Returns:
118-
A COO matrix with the contents of ``x``.
119-
"""
120-
# Same logic as above.
121-
if not isinstance(x, SparseNdarray):
122-
x = to_sparse_array(x)
123-
66+
def _to_coo(x: Any) -> scipy.sparse.coo_matrix:
12467
if x.contents is not None:
12568
# First pass (in memory) to obtain the total sizes.
12669
total_count = 0
@@ -147,3 +90,41 @@ def to_scipy_coo_matrix(x: Any) -> scipy.sparse.coo_matrix:
14790
all_values = numpy.zeros(0, dtype=x.dtype)
14891

14992
return scipy.sparse.coo_matrix((all_values, (all_rows, all_cols)), shape=x.shape)
93+
94+
95+
@singledispatch
96+
def to_scipy_sparse_matrix(x: Any, format: Literal["coo", "csr", "csc"] = "csc") -> scipy.sparse.spmatrix:
97+
"""
98+
Convert a 2-dimensional array into a SciPy sparse matrix.
99+
100+
Args:
101+
x:
102+
Input matrix where :py:func:`~delayedarray.is_sparse.is_sparse`
103+
returns True and :py:func:`~delayedarray.is_masked.is_masked`
104+
returns False.
105+
106+
format:
107+
Type of SciPy matrix to create - coordinate (coo), compressed
108+
sparse row (csr) or compressed sparse column (csc).
109+
110+
Returns:
111+
A SciPy sparse matrix with the contents of ``x``.
112+
"""
113+
# One might think that we could be more memory-efficient by doing block
114+
# processing. However, there is no advantage from doing so as we eventually
115+
# need to hold all the blocks in memory before concatenation. We'd only
116+
# avoid this if we did two passes; one to collect the total size for
117+
# allocation, and another to actually fill the vectors; not good, so we
118+
# just forget about it and load it all into memory up-front.
119+
return to_scipy_sparse_matrix_from_SparseNdarray(to_sparse_array(x), format=format)
120+
121+
122+
@to_scipy_sparse_matrix.register
123+
def to_scipy_sparse_matrix_from_SparseNdarray(x: SparseNdarray, format: Literal["coo", "csr", "csc"] = "csc") -> scipy.sparse.spmatrix:
124+
"""See :py:meth:`~to_scipy_sparse_matrix`."""
125+
if format == "csc":
126+
return _to_csc(x)
127+
elif format == "csr":
128+
return _to_csr(x)
129+
else:
130+
return _to_coo(x)

tests/test_to_scipy.py tests/test_to_scipy_sparse_matrix.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -5,37 +5,37 @@
55
from utils import simulate_SparseNdarray
66

77

8-
def test_to_scipy_csc_matrix():
8+
def test_to_scipy_sparse_matrix_csc():
99
test_shape = (100, 150)
1010
y = simulate_SparseNdarray(test_shape)
11-
z = delayedarray.to_scipy_csc_matrix(y)
11+
z = delayedarray.to_scipy_sparse_matrix(y, "csc")
1212
assert isinstance(z, scipy.sparse.csc_matrix)
1313
assert (z.toarray() == delayedarray.to_dense_array(y)).all()
1414

15-
z = delayedarray.to_scipy_csc_matrix(delayedarray.wrap(y))
15+
z = delayedarray.to_scipy_sparse_matrix(delayedarray.wrap(y), "csc")
1616
assert isinstance(z, scipy.sparse.csc_matrix)
1717
assert (z.toarray() == delayedarray.to_dense_array(y)).all()
1818

1919

20-
def test_to_scipy_csr_matrix():
20+
def test_to_scipy_sparse_matrix_csr():
2121
test_shape = (150, 80)
2222
y = simulate_SparseNdarray(test_shape)
23-
z = delayedarray.to_scipy_csr_matrix(y)
23+
z = delayedarray.to_scipy_sparse_matrix(y, "csr")
2424
assert isinstance(z, scipy.sparse.csr_matrix)
2525
assert (z.toarray() == delayedarray.to_dense_array(y)).all()
2626

27-
z = delayedarray.to_scipy_csr_matrix(delayedarray.wrap(y))
27+
z = delayedarray.to_scipy_sparse_matrix(delayedarray.wrap(y), "csr")
2828
assert isinstance(z, scipy.sparse.csr_matrix)
2929
assert (z.toarray() == delayedarray.to_dense_array(y)).all()
3030

3131

32-
def test_to_scipy_coo_matrix():
32+
def test_to_scipy_sparse_matrix_coo():
3333
test_shape = (70, 90)
3434
y = simulate_SparseNdarray(test_shape)
35-
z = delayedarray.to_scipy_coo_matrix(y)
35+
z = delayedarray.to_scipy_sparse_matrix(y, "coo")
3636
assert isinstance(z, scipy.sparse.coo_matrix)
3737
assert (z.toarray() == delayedarray.to_dense_array(y)).all()
3838

39-
z = delayedarray.to_scipy_coo_matrix(delayedarray.wrap(y))
39+
z = delayedarray.to_scipy_sparse_matrix(delayedarray.wrap(y), "coo")
4040
assert isinstance(z, scipy.sparse.coo_matrix)
4141
assert (z.toarray() == delayedarray.to_dense_array(y)).all()

0 commit comments

Comments
 (0)