Skip to content

Commit 3c2381e

Browse files
committed
Cleaned up the internal subsetting utilities.
This reduces the support for Nones in the subset argument, requiring no-op subsets to be specified as ranges instead.
1 parent 3117799 commit 3c2381e

7 files changed

+44
-37
lines changed

src/delayedarray/Grid.py

+15-8
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
import bisect
33
import abc
44

5+
from ._subset import _is_single_subset_noop
6+
57

68
class AbstractGrid(abc.ABC):
79
"""
@@ -39,7 +41,7 @@ def transpose(self, perm: Tuple[int, ...]) -> "AbstractGrid":
3941

4042

4143
@abc.abstractmethod
42-
def subset(self, subset: Tuple[Optional[Sequence[int]], ...]) -> "AbstractGrid":
44+
def subset(self, subset: Tuple[Sequence[int], ...]) -> "AbstractGrid":
4345
pass
4446

4547

@@ -169,7 +171,7 @@ def transpose(self, perm: Tuple[int, ...]) -> "SimpleGrid":
169171
)
170172

171173

172-
def subset(self, subset: Tuple[Optional[Sequence[int]], ...]) -> "SimpleGrid":
174+
def subset(self, subset: Tuple[Sequence[int], ...]) -> "SimpleGrid":
173175
"""
174176
Subset a grid to reflect the same operation on the associated array.
175177
For any given dimension, consecutive elements in the subset are only
@@ -195,7 +197,7 @@ def subset(self, subset: Tuple[Optional[Sequence[int]], ...]) -> "SimpleGrid":
195197
new_maxgap = []
196198
for i, bounds in enumerate(self._boundaries):
197199
cursub = subset[i]
198-
if cursub is None:
200+
if _is_single_subset_noop(self._shape[i], cursub):
199201
new_boundaries.append(bounds)
200202
new_shape.append(self._shape[i])
201203
new_maxgap.append(self._maxgap[i])
@@ -504,7 +506,7 @@ def transpose(self, perm: Tuple[int, ...]) -> "CompositeGrid":
504506
)
505507

506508

507-
def subset(self, subset: Tuple[Optional[Sequence[int]], ...]) -> "CompositeGrid":
509+
def subset(self, subset: Tuple[Sequence[int], ...]) -> "CompositeGrid":
508510
"""
509511
Subset a grid to reflect the same operation on the associated array.
510512
This splits up the subset sequence for the ``along`` dimension and
@@ -515,14 +517,19 @@ def subset(self, subset: Tuple[Optional[Sequence[int]], ...]) -> "CompositeGrid"
515517
Tuple of length equal to the number of grid dimensions. Each
516518
entry should be a (possibly unsorted) sequence of integers,
517519
specifying the subset to apply to each dimension of the grid.
518-
Alternatively, an entry may be None if no subsetting is to be
519-
applied to the corresponding dimension.
520520
521521
Returns:
522522
A new ``CompositeGrid`` object.
523523
"""
524-
if subset[self._along] is None:
525-
new_components = [grid.subset(subset) for grid in self._components]
524+
if len(subset) != len(self._shape):
525+
raise ValueError("'shape' and 'subset' should have the same length")
526+
527+
if _is_single_subset_noop(self._shape[self._along], subset[self._along]):
528+
new_components = []
529+
new_subset = list(subset)
530+
for grid in self._components:
531+
new_subset[self._along] = range(grid.shape[self._along])
532+
new_components.append(grid.subset((*new_subset,)))
526533
return CompositeGrid(new_components, self._along)
527534

528535
component_limits = []

src/delayedarray/Subset.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from .DelayedOp import DelayedOp
66
from .SparseNdarray import SparseNdarray
7-
from ._subset import _spawn_indices, _sanitize_subset
7+
from ._subset import _sanitize_subset
88
from .extract_dense_array import extract_dense_array
99
from .extract_sparse_array import extract_sparse_array
1010
from .create_dask_array import create_dask_array
@@ -88,9 +88,6 @@ def subset(self) -> Tuple[Sequence[int], ...]:
8888

8989

9090
def _extract_array(x: Subset, subset: Tuple[Sequence[int], ...], f: Callable):
91-
if subset is None:
92-
subset = _spawn_indices(x.shape)
93-
9491
newsub = list(subset)
9592
expanded = []
9693
is_safe = 0

src/delayedarray/Transpose.py

-4
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
from .DelayedOp import DelayedOp
66
from .SparseNdarray import SparseNdarray
7-
from ._subset import _spawn_indices
87
from .extract_dense_array import extract_dense_array
98
from .extract_sparse_array import extract_sparse_array
109
from .create_dask_array import create_dask_array
@@ -96,9 +95,6 @@ def perm(self) -> Tuple[int, ...]:
9695

9796

9897
def _extract_array(x: Transpose, subset: Tuple[Sequence[int], ...], f: Callable):
99-
if subset is None:
100-
subset = _spawn_indices(x.shape)
101-
10298
permsub = [None] * len(subset)
10399
for i, j in enumerate(x._perm):
104100
permsub[j] = subset[i]

src/delayedarray/UnaryIsometricOpWithArgs.py

-3
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
from .DelayedOp import DelayedOp
77
from .SparseNdarray import SparseNdarray
88
from ._isometric import ISOMETRIC_OP_WITH_ARGS, _execute, _infer_along_with_args
9-
from ._subset import _spawn_indices
109
from .extract_dense_array import extract_dense_array
1110
from .extract_sparse_array import extract_sparse_array
1211
from .create_dask_array import create_dask_array
@@ -149,8 +148,6 @@ def _extract_array(x: UnaryIsometricOpWithArgs, subset: Tuple[Sequence[int], ...
149148

150149
subvalue = x._value
151150
if isinstance(subvalue, ndarray) and not subvalue is numpy.ma.masked:
152-
if subset is None:
153-
subset = _spawn_indices(x.shape)
154151
if len(subvalue.shape) == 1:
155152
subvalue = subvalue[subset[-1]]
156153
else:

src/delayedarray/_subset.py

+25-15
Original file line numberDiff line numberDiff line change
@@ -7,31 +7,41 @@ def _spawn_indices(shape: Tuple[int, ...]) -> Tuple[Sequence[int], ...]:
77

88

99
def _is_subset_consecutive(subset: Sequence):
10+
if isinstance(subset, range):
11+
return subset.step == 1
1012
for s in range(1, len(subset)):
1113
if subset[s] != subset[s-1]+1:
1214
return False
1315
return True
1416

1517

16-
def _is_subset_noop(shape: Tuple[int, ...], subset: Tuple[Sequence, ...]):
17-
if subset is not None:
18-
for i, s in enumerate(shape):
19-
cursub = subset[i]
20-
if len(cursub) != s:
21-
return False
22-
for j in range(s):
23-
if cursub[j] != j:
24-
return False
18+
def _is_single_subset_noop(extent: int, subset: Sequence[int]) -> bool:
19+
if isinstance(subset, range):
20+
return subset == range(extent)
21+
if len(subset) != extent:
22+
return False
23+
for i, s in enumerate(subset):
24+
if s != i:
25+
return False
2526
return True
2627

2728

28-
def _sanitize_subset(subset: Sequence):
29-
okay = True
30-
for i in range(1, len(subset)):
31-
if subset[i] <= subset[i - 1]:
32-
okay = False
33-
break
29+
def _is_subset_noop(shape: Tuple[int, ...], subset: Tuple[Sequence, ...]) -> bool:
30+
for i, s in enumerate(shape):
31+
if not _is_single_subset_noop(s, subset[i]):
32+
return False
33+
return True
3434

35+
36+
def _sanitize_subset(subset: Sequence):
37+
if isinstance(subset, range):
38+
okay = (subset.step > 0)
39+
else:
40+
okay = True
41+
for i in range(1, len(subset)):
42+
if subset[i] <= subset[i - 1]:
43+
okay = False
44+
break
3545
if okay:
3646
return subset, None
3747

src/delayedarray/extract_sparse_array.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from typing import Any, Tuple, Sequence, List, Union
55
from biocutils.package_utils import is_package_installed
66

7-
from ._subset import _spawn_indices, _is_subset_noop, _is_subset_consecutive
7+
from ._subset import _is_subset_noop, _is_subset_consecutive
88
from ._mask import _convert_to_unmasked_1darray, _convert_to_maybe_masked_1darray, _allocate_unmasked_ndarray, _allocate_maybe_masked_ndarray
99
from .SparseNdarray import SparseNdarray, _extract_sparse_array_from_SparseNdarray
1010

tests/test_Grid.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ def test_SimpleGrid_subset():
130130
grid = delayedarray.SimpleGrid((range(10, 51, 10), range(2, 21, 3)), cost_factor=1)
131131

132132
# No-op subsetting.
133-
subgrid = grid.subset((None, None))
133+
subgrid = grid.subset((*(range(s) for s in grid.shape),))
134134
assert subgrid.shape == grid.shape
135135
assert subgrid.boundaries == grid.boundaries
136136
assert subgrid.cost == grid.cost
@@ -267,7 +267,7 @@ def test_CompositeGrid_subset():
267267
combined = delayedarray.CompositeGrid([grid1, grid2], along=0)
268268

269269
# No-op subsetting.
270-
subcombined = combined.subset((None, None))
270+
subcombined = combined.subset((*(range(s) for s in combined.shape),))
271271
assert combined.shape == subcombined.shape
272272
assert subcombined.boundaries == combined.boundaries
273273
assert subcombined.cost == combined.cost

0 commit comments

Comments
 (0)