1
1
import numpy
2
- from typing import Any
2
+ from functools import singledispatch
3
+ from typing import Any , Literal
3
4
from biocutils .package_utils import is_package_installed
4
5
5
6
from .SparseNdarray import SparseNdarray
6
- from .extract_sparse_array import to_sparse_array
7
+ from .to_sparse_array import to_sparse_array
7
8
8
9
9
10
if is_package_installed ("scipy" ):
10
11
import scipy .sparse
11
12
12
13
13
- def to_scipy_csc_matrix (x : Any ) -> scipy .sparse .csc_matrix :
14
- """
15
- Convert a 2-dimensional ``DelayedArray`` or ``SparseNdarray`` into a
16
- SciPy compressed sparse column (CSC) matrix.
17
-
18
- Args:
19
- x:
20
- Input matrix where :py:func:`~delayedarray.is_sparse.is_sparse`
21
- returns True and :py:func:`~delayedarray.is_sparse.is_masked`
22
- returns False.
23
-
24
- Returns:
25
- A CSC matrix with the contents of ``x``.
26
- """
27
- # One might think that we could be more memory-efficient by doing block
28
- # processing. However, there is no advantage from doing so as we eventually
29
- # need to hold all the blocks in memory before concatenation. We'd only
30
- # avoid this if we did two passes; one to collect the total size for
31
- # allocation, and another to actually fill the vectors; not good, so we
32
- # just forget about it and load it all into memory up-front.
33
- if not isinstance (x , SparseNdarray ):
34
- x = to_sparse_array (x )
35
-
14
+ def _to_csc (x : Any ) -> scipy .sparse .csc_matrix :
36
15
all_indptrs = numpy .zeros (x .shape [1 ] + 1 , dtype = numpy .uint64 )
37
16
if x .contents is not None :
38
17
all_indices = []
@@ -53,26 +32,7 @@ def to_scipy_csc_matrix(x: Any) -> scipy.sparse.csc_matrix:
53
32
return scipy .sparse .csc_matrix ((all_values , all_indices , all_indptrs ), shape = x .shape )
54
33
55
34
56
- def to_scipy_csr_matrix (x : Any ) -> scipy .sparse .csr_matrix :
57
- """
58
- Convert a 2-dimensional ``DelayedArray`` or ``SparseNdarray`` into a
59
- SciPy compressed sparse row (CSR) matrix.
60
-
61
- Args:
62
- x:
63
- Input matrix where :py:func:`~delayedarray.is_sparse.is_sparse`
64
- returns True and :py:func:`~delayedarray.is_sparse.is_masked`
65
- returns False.
66
-
67
- Returns:
68
- A CSR matrix with the contents of ``x``.
69
- """
70
- # Same logic as above; block processing just ends up reading the entire
71
- # thing into memory before forming the full arrays, so we just load it
72
- # all in to start with and save ourselves the trouble.
73
- if not isinstance (x , SparseNdarray ):
74
- x = to_sparse_array (x )
75
-
35
+ def _to_csr (x : Any ) -> scipy .sparse .csr_matrix :
76
36
all_indptrs = numpy .zeros (x .shape [0 ] + 1 , dtype = numpy .uint64 )
77
37
if x .contents is not None :
78
38
# First pass (in memory) to obtain the total sizes.
@@ -103,24 +63,7 @@ def to_scipy_csr_matrix(x: Any) -> scipy.sparse.csr_matrix:
103
63
return scipy .sparse .csr_matrix ((all_values , all_indices , all_indptrs ), shape = x .shape )
104
64
105
65
106
- def to_scipy_coo_matrix (x : Any ) -> scipy .sparse .coo_matrix :
107
- """
108
- Convert a 2-dimensional ``DelayedArray`` or ``SparseNdarray`` into a
109
- SciPy sparse coordinate (COO) matrix.
110
-
111
- Args:
112
- x:
113
- Input matrix where :py:func:`~delayedarray.is_sparse.is_sparse`
114
- returns True and :py:func:`~delayedarray.is_sparse.is_masked`
115
- returns False.
116
-
117
- Returns:
118
- A COO matrix with the contents of ``x``.
119
- """
120
- # Same logic as above.
121
- if not isinstance (x , SparseNdarray ):
122
- x = to_sparse_array (x )
123
-
66
+ def _to_coo (x : Any ) -> scipy .sparse .coo_matrix :
124
67
if x .contents is not None :
125
68
# First pass (in memory) to obtain the total sizes.
126
69
total_count = 0
@@ -147,3 +90,41 @@ def to_scipy_coo_matrix(x: Any) -> scipy.sparse.coo_matrix:
147
90
all_values = numpy .zeros (0 , dtype = x .dtype )
148
91
149
92
return scipy .sparse .coo_matrix ((all_values , (all_rows , all_cols )), shape = x .shape )
93
+
94
+
95
+ @singledispatch
96
+ def to_scipy_sparse_matrix (x : Any , format : Literal ["coo" , "csr" , "csc" ] = "csc" ) -> scipy .sparse .spmatrix :
97
+ """
98
+ Convert a 2-dimensional array into a SciPy sparse matrix.
99
+
100
+ Args:
101
+ x:
102
+ Input matrix where :py:func:`~delayedarray.is_sparse.is_sparse`
103
+ returns True and :py:func:`~delayedarray.is_masked.is_masked`
104
+ returns False.
105
+
106
+ format:
107
+ Type of SciPy matrix to create - coordinate (coo), compressed
108
+ sparse row (csr) or compressed sparse column (csc).
109
+
110
+ Returns:
111
+ A SciPy sparse matrix with the contents of ``x``.
112
+ """
113
+ # One might think that we could be more memory-efficient by doing block
114
+ # processing. However, there is no advantage from doing so as we eventually
115
+ # need to hold all the blocks in memory before concatenation. We'd only
116
+ # avoid this if we did two passes; one to collect the total size for
117
+ # allocation, and another to actually fill the vectors; not good, so we
118
+ # just forget about it and load it all into memory up-front.
119
+ return to_scipy_sparse_matrix_from_SparseNdarray (to_sparse_array (x ), format = format )
120
+
121
+
122
+ @to_scipy_sparse_matrix .register
123
+ def to_scipy_sparse_matrix_from_SparseNdarray (x : SparseNdarray , format : Literal ["coo" , "csr" , "csc" ] = "csc" ) -> scipy .sparse .spmatrix :
124
+ """See :py:meth:`~to_scipy_sparse_matrix`."""
125
+ if format == "csc" :
126
+ return _to_csc (x )
127
+ elif format == "csr" :
128
+ return _to_csr (x )
129
+ else :
130
+ return _to_coo (x )
0 commit comments