12
12
__license__ = "MIT"
13
13
14
14
15
- def _chunk_shape_to_grid (chunks : Sequence [int ], shape : Tuple [int , ...], cost_factor : int ):
15
+ def chunk_shape_to_grid (chunks : Sequence [int ], shape : Tuple [int , ...], cost_factor : int ) -> SimpleGrid :
16
+ """
17
+ Convert a chunk shape to a :py:class:`~delayedarray.Grid.SimpleGrid`.
18
+ This assumes that the underlying array is split up into regular intervals
19
+ on each dimension; the first chunk should start from zero, and only the
20
+ last chunk may be of a different size (bounded by the dimension extent).
21
+
22
+ Args:
23
+ chunks:
24
+ Chunk size for each dimension. These should be positive.
25
+
26
+ shape:
27
+ Extent of each dimension of the array. These should be non-negative
28
+ and of the same length as ``chunks``.
29
+
30
+ cost_factor:
31
+ Cost factor for iterating over each element of the associated
32
+ array. This is used to decide between iteration schemes and can be
33
+ increased for more expensive types, e.g., file-backed arrays. As a
34
+ reference, in-memory NumPy arrays are assigned a cost factor of 1.
35
+
36
+ Returns:
37
+ A ``SimpleGrid`` object with the chunk shape as the boundaries.
38
+ """
16
39
out = []
17
40
for i , ch in enumerate (chunks ):
18
41
sh = shape [i ]
@@ -42,8 +65,13 @@ def chunk_grid(x: Any) -> AbstractGrid:
42
65
43
66
44
67
@chunk_grid .register
45
- def chunk_grid_ndarray (x : ndarray ):
46
- """See :py:meth:`~delayedarray.chunk_grid.chunk_grid`."""
68
+ def chunk_grid_ndarray (x : ndarray ) -> SimpleGrid :
69
+ """
70
+ See :py:meth:`~delayedarray.chunk_grid.chunk_grid`.
71
+
72
+ The cost factor for iteration is set to 1, which is considered the lowest
73
+ cost for data extraction given that everything is stored in memory.
74
+ """
47
75
raw = [1 ] * len (x .shape )
48
76
if x .flags .f_contiguous :
49
77
raw [0 ] = x .shape [0 ]
@@ -52,15 +80,21 @@ def chunk_grid_ndarray(x: ndarray):
52
80
# to figure that out from NumPy flags. Guess we should just assume
53
81
# that it's C-contiguous, given that most things are.
54
82
raw [- 1 ] = x .shape [- 1 ]
55
- return _chunk_shape_to_grid (raw , x .shape , cost_factor = 1 )
83
+ return chunk_shape_to_grid (raw , x .shape , cost_factor = 1 )
56
84
57
85
58
86
@chunk_grid .register
59
- def chunk_grid_SparseNdarray (x : SparseNdarray ):
60
- """See :py:meth:`~delayedarray.chunk_grid.chunk_grid`."""
87
+ def chunk_grid_SparseNdarray (x : SparseNdarray ) -> SimpleGrid :
88
+ """
89
+ See :py:meth:`~delayedarray.chunk_grid.chunk_grid`.
90
+
91
+ The cost factor for iteration is set to 1.5. This is slightly higher than
92
+ that of dense NumPy arrays as the ``SparseNdarray`` is a bit more expensive
93
+ for random access on the first dimension.
94
+ """
61
95
raw = [1 ] * len (x .shape )
62
96
raw [0 ] = x .shape [0 ]
63
- return _chunk_shape_to_grid (raw , x .shape , cost_factor = 1.5 )
97
+ return chunk_shape_to_grid (raw , x .shape , cost_factor = 1.5 )
64
98
65
99
66
100
# If scipy is installed, we add all the methods for the various scipy.sparse matrices.
@@ -70,19 +104,36 @@ def chunk_grid_SparseNdarray(x: SparseNdarray):
70
104
71
105
72
106
@chunk_grid .register
73
- def chunk_grid_csc_matrix (x : sp .csc_matrix ):
74
- """See :py:meth:`~delayedarray.chunk_grid.chunk_grid`."""
75
- return _chunk_shape_to_grid ((x .shape [0 ], 1 ), x .shape , cost_factor = 1.5 )
107
+ def chunk_grid_csc_matrix (x : sp .csc_matrix ) -> SimpleGrid :
108
+ """
109
+ See :py:meth:`~delayedarray.chunk_grid.chunk_grid`.
110
+
111
+ The cost factor for iteration is set to 1.5. This is slightly higher
112
+ than that of dense NumPy arrays as CSC matrices are a bit more
113
+ expensive for random row access.
114
+ """
115
+ return chunk_shape_to_grid ((x .shape [0 ], 1 ), x .shape , cost_factor = 1.5 )
76
116
77
117
78
118
@chunk_grid .register
79
- def chunk_grid_csr_matrix (x : sp .csr_matrix ):
80
- """See :py:meth:`~delayedarray.chunk_grid.chunk_grid`."""
81
- return _chunk_shape_to_grid ((1 , x .shape [1 ]), x .shape , cost_factor = 1.5 )
119
+ def chunk_grid_csr_matrix (x : sp .csr_matrix ) -> SimpleGrid :
120
+ """
121
+ See :py:meth:`~delayedarray.chunk_grid.chunk_grid`.
122
+
123
+ The cost factor for iteration is set to 1.5. This is slightly higher
124
+ than that of dense NumPy arrays as CSR matrices are a bit more
125
+ expensive for random column access.
126
+ """
127
+ return chunk_shape_to_grid ((1 , x .shape [1 ]), x .shape , cost_factor = 1.5 )
82
128
83
129
84
130
@chunk_grid .register
85
- def chunk_grid_coo_matrix (x : sp .coo_matrix ):
86
- """See :py:meth:`~delayedarray.chunk_grid.chunk_grid`."""
131
+ def chunk_grid_coo_matrix (x : sp .coo_matrix ) -> SimpleGrid :
132
+ """
133
+ See :py:meth:`~delayedarray.chunk_grid.chunk_grid`.
134
+
135
+ The cost factor for iteration is set to 5, as any extraction from a COO
136
+ matrix requires a full scan through all elements.
137
+ """
87
138
# ???? let's just do our best here, there's no nice way to access COO.
88
- return _chunk_shape_to_grid (x .shape , x .shape , cost_factor = 1. 5 )
139
+ return chunk_shape_to_grid (x .shape , x .shape , cost_factor = 5 )
0 commit comments