Skip to content

Commit 4c12481

Browse files
committed
Handle zero-extent arrays during apply_over_* functions.
Now, these functions just don't do any iterations at all. We also document that all block/chunk shapes are expected to be positive, so as to avoid divide-by-zero errors in downstream functions.
1 parent bed54ba commit 4c12481

7 files changed

+73
-25
lines changed

src/delayedarray/apply_over_blocks.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,14 @@ def choose_block_shape_for_iteration(x, memory: int = 10000000) -> Tuple[int, ..
2424
memory: Available memory in bytes, to hold a single block in memory.
2525
2626
Returns:
27-
Dimensions of the blocks.
27+
Dimensions of the blocks. All values are guaranteed to be positive,
28+
even if the extent of any dimension of ``x`` is zero.
2829
"""
30+
# Checking for empty dimensions and bailing out if we find any.
31+
for d in x.shape:
32+
if d == 0:
33+
return (*(max(1, d) for d in x.shape),)
34+
2935
num_elements = memory / x.dtype.itemsize
3036
chunk_dims = chunk_shape(x)
3137
block_size = 1
@@ -55,7 +61,6 @@ def choose_block_shape_for_iteration(x, memory: int = 10000000) -> Tuple[int, ..
5561
block_size = block_size_other
5662
block_dims[i] = 1
5763

58-
5964
return (*block_dims,)
6065

6166

@@ -74,8 +79,9 @@ def apply_over_blocks(x, fun: Callable, block_shape: Optional[Tuple] = None, all
7479
block is typically provided as a :py:class:`~numpy.ndarray`.
7580
7681
block_shape:
77-
Dimensionsof the block on the iteration dimension. If None, this is
78-
chosen by :py:func:`~choose_block_shape_for_iteration`.
82+
Dimensions of the block. All entries should be positive, even for
83+
zero-extent dimensions of ``x``. If None, this is chosen by
84+
:py:func:`~choose_block_shape_for_iteration`.
7985
8086
allow_sparse:
8187
Whether to allow extraction of sparse subarrays. If true and

src/delayedarray/apply_over_dimension.py

+14-4
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,14 @@
1010
__copyright__ = "ltla"
1111
__license__ = "MIT"
1212

13+
1314
def guess_iteration_block_size(x, dimension, memory: int = 10000000) -> int:
1415
"""
1516
Soft-deprecated alias for :py:func:`~choose_block_size_for_1d_iteration`.
1617
"""
1718
return choose_block_size_for_1d_iteration(x, dimension, memory)
1819

20+
1921
def choose_block_size_for_1d_iteration(x, dimension: int, memory: int = 10000000) -> int:
2022
"""
2123
Choose a block size for iterating over an array on a certain dimension,
@@ -29,24 +31,31 @@ def choose_block_size_for_1d_iteration(x, dimension: int, memory: int = 10000000
2931
memory: Available memory in bytes, to hold a single block in memory.
3032
3133
Returns:
32-
Size of the block on the iteration dimension.
34+
Size of the block on the iteration dimension. This is guaranteed to be
35+
positive, even if the extent of the dimension of ``x`` is zero.
3336
"""
34-
num_elements = memory / x.dtype.itemsize
3537
shape = x.shape
38+
fulldim = shape[dimension]
3639

3740
prod_other = 1
3841
for i, s in enumerate(shape):
42+
if s == 0:
43+
# Bailing out if there's a zero-length dimension anywhere.
44+
# We set a floor of 1 to avoid divide-by-zero errors.
45+
return max(1, fulldim)
3946
if i != dimension:
4047
prod_other *= s
4148

49+
num_elements = memory / x.dtype.itemsize
4250
ideal = int(num_elements / prod_other)
4351
if ideal == 0:
4452
return 1
53+
if ideal >= fulldim:
54+
return fulldim
4555

4656
curdim = chunk_shape(x)[dimension]
4757
if ideal <= curdim:
4858
return ideal
49-
5059
return int(ideal / curdim) * curdim
5160

5261

@@ -69,7 +78,8 @@ def apply_over_dimension(x, dimension: int, fun: Callable, block_size: Optional[
6978
Each block is typically provided as a :py:class:`~numpy.ndarray`.
7079
7180
block_size:
72-
Size of the block on the iteration dimension. If None, this is
81+
Size of the block on the iteration dimension. This should be a
82+
positive integer, even for zero-extent dimensions. If None, this is
7383
chosen by :py:func:`~choose_block_size_for_1d_iteration`.
7484
7585
allow_sparse:

src/delayedarray/chunk_shape.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,13 @@
1212

1313
@singledispatch
1414
def chunk_shape(x: Any) -> Tuple[int, ...]:
15-
"""Get the dimensions of the array chunks. These define the preferred
16-
blocks with which to iterate over the array in each dimension.
15+
"""
16+
Get the dimensions of the array chunks. These define the preferred
17+
intervals with which to iterate over the array in each dimension, usually
18+
reflecting a particular layout on disk or in memory. The extent of each
19+
chunk dimension should be positive and less than that of the array's;
20+
except for zero-length dimensions, in which case the chunk's extent should
21+
be greater than the array (typically 1 to avoid divide by zero errors).
1722
1823
Args:
1924
x: An array-like object.

tests/test_DelayedArray.py

+16-12
Original file line numberDiff line numberDiff line change
@@ -150,9 +150,10 @@ def test_SparseNdarray_mean_dense(mask_rate, buffer_size):
150150
y = delayedarray.wrap(numpy.ma.MaskedArray([1], mask=True)) + 20
151151
assert y.mean() is numpy.ma.masked
152152

153-
# # Zero-length array is respected.
154-
# y = delayedarray.wrap(numpy.ndarray((10, 0))) * 50
155-
# assert numpy.isnan(y.mean())
153+
# Zero-length array is respected.
154+
with pytest.warns(RuntimeWarning):
155+
y = delayedarray.wrap(numpy.ndarray((10, 0))) * 50
156+
assert numpy.isnan(y.mean())
156157

157158

158159
@pytest.mark.parametrize("mask_rate", [0, 0.5])
@@ -179,9 +180,10 @@ def test_SparseNdarray_mean_sparse(mask_rate, buffer_size):
179180
y = delayedarray.wrap(ref) / 5
180181
assert y.mean() is numpy.ma.masked
181182

182-
# # Zero-length array is respected.
183-
# y = delayedarray.wrap(delayedarray.SparseNdarray((0,), None)) * 50
184-
# assert numpy.isnan(y.mean())
183+
# Zero-length array is respected.
184+
with pytest.warns(RuntimeWarning):
185+
y = delayedarray.wrap(delayedarray.SparseNdarray((0,), None, dtype=numpy.int32, index_dtype=numpy.int32)) * 50
186+
assert numpy.isnan(y.mean())
185187

186188

187189
@pytest.mark.parametrize("mask_rate", [0, 0.5])
@@ -210,9 +212,10 @@ def test_SparseNdarray_var_dense(mask_rate, buffer_size):
210212
with pytest.warns(RuntimeWarning):
211213
assert y.var() is numpy.ma.masked
212214

213-
# # Zero-length array is respected.
214-
# y = delayedarray.wrap(numpy.ndarray((10, 0))) * 50
215-
# assert numpy.isnan(y.var())
215+
# Zero-length array is respected.
216+
with pytest.warns(RuntimeWarning):
217+
y = delayedarray.wrap(numpy.ndarray((10, 0))) * 50
218+
assert numpy.isnan(y.var())
216219

217220

218221
@pytest.mark.parametrize("mask_rate", [0, 0.5])
@@ -240,6 +243,7 @@ def test_SparseNdarray_var_sparse(mask_rate, buffer_size):
240243
with pytest.warns(RuntimeWarning):
241244
assert y.var() is numpy.ma.masked
242245

243-
# # Zero-length array is respected.
244-
# y = delayedarray.wrap(delayedarray.SparseNdarray((0,), None)) * 50
245-
# assert numpy.isnan(y.var())
246+
# Zero-length array is respected.
247+
with pytest.warns(RuntimeWarning):
248+
y = delayedarray.wrap(delayedarray.SparseNdarray((0,), None, dtype=numpy.int32, index_dtype=numpy.int32)) * 50
249+
assert numpy.isnan(y.var())

tests/test_SparseNdarray.py

-1
Original file line numberDiff line numberDiff line change
@@ -1223,4 +1223,3 @@ def test_SparseNdarray_var(mask_rate):
12231223
y = delayedarray.SparseNdarray((0,), None, dtype=numpy.dtype("float64"), index_dtype=numpy.dtype("int8"))
12241224
with pytest.warns(RuntimeWarning):
12251225
assert numpy.isnan(y.var())
1226-

tests/test_apply_over_blocks.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@ def test_choose_block_shape_for_iteration():
3434
assert da.choose_block_shape_for_iteration(x, memory=0) == (1, 1)
3535
assert da.choose_block_shape_for_iteration(x, memory=40) == (1, 5)
3636

37+
# Behaves correctly with empty objects.
38+
empty = np.random.rand(100, 0)
39+
assert da.choose_block_shape_for_iteration(empty) == (100, 1)
40+
3741
x = _ChunkyBoi((100, 200), (20, 25))
3842
assert da.choose_block_shape_for_iteration(x, memory=4000) == (20, 25)
3943
assert da.choose_block_shape_for_iteration(x, memory=40000) == (100, 50)
@@ -48,7 +52,7 @@ def _dense_sum(position, block):
4852

4953

5054
@pytest.mark.parametrize("mask_rate", [0, 0.2])
51-
def test_apply_over_dimension_dense(mask_rate):
55+
def test_apply_over_block_dense(mask_rate):
5256
x = np.ndarray([100, 200])
5357
counter = 0
5458
for i in range(x.shape[0]):
@@ -72,7 +76,7 @@ def test_apply_over_dimension_dense(mask_rate):
7276

7377

7478
@pytest.mark.parametrize("mask_rate", [0, 0.2])
75-
def test_apply_over_dimension_sparse(mask_rate):
79+
def test_apply_over_block_sparse(mask_rate):
7680
x = simulate_SparseNdarray((100, 200), mask_rate=mask_rate)
7781

7882
expected = 0
@@ -105,3 +109,9 @@ def _sparse_sum(position, block):
105109
assert np.allclose(expected, sum(y[1] for y in output))
106110
assert output[0][0] == [(0, 3), (0, 7)]
107111
assert output[-1][0] == [(99, 100), (196, 200)]
112+
113+
114+
def test_apply_over_block_empty():
115+
x = np.ndarray([100, 0])
116+
output = da.apply_over_blocks(x, _dense_sum)
117+
assert len(output) == 0

tests/test_apply_over_dimension.py

+14
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,11 @@ def test_choose_block_size_for_1d_iteration():
3434
assert da.choose_block_size_for_1d_iteration(x, 0, memory=0) == 1
3535
assert da.choose_block_size_for_1d_iteration(x, 1, memory=0) == 1
3636

37+
# Behaves correctly with empty objects.
38+
empty = np.random.rand(100, 0)
39+
assert da.choose_block_size_for_1d_iteration(empty, 0) == 100
40+
assert da.choose_block_size_for_1d_iteration(empty, 1) == 1
41+
3742
# Making a slightly more complex situation.
3843
x = _ChunkyBoi((100, 200), (20, 25))
3944
assert da.choose_block_size_for_1d_iteration(x, 0, memory=4000) == 2
@@ -125,3 +130,12 @@ def _sparse_sum(position, block):
125130
assert np.allclose(expected, sum(y[1] for y in output))
126131
assert output[0][0] == (0, 7)
127132
assert output[-1][0] == (196, 200)
133+
134+
135+
def test_apply_over_dimension_empty():
136+
x = np.ndarray([100, 0])
137+
output = da.apply_over_dimension(x, 0, _dense_sum)
138+
assert len(output) == 1
139+
140+
output = da.apply_over_dimension(x, 1, _dense_sum)
141+
assert len(output) == 0

0 commit comments

Comments
 (0)