Skip to content

Commit 3117799

Browse files
authored
Switch to Grid classes to control how iteration is performed. (#61)
The SimpleGrid class supports non-regular chunks and improves the alignment of blocks to the true underlying chunks for arrays wrapped by delayed subsetting operations. Delayed combines are also handled by CompositeGrid class; this tries to dispatch iteration to each of the component array's grids, reducing the chance of inefficient iteration across incompatible chunk layouts. The chunk_shape() generic has been replaced with the chunk_grid() generic, which allows each array/operation class to determine how to construct an appropriate grid for its own iteration. We also add a RegularTicks class to more easily construct regular boundaries for grid construction.
1 parent 4892daf commit 3117799

30 files changed

+1246
-470
lines changed

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,7 @@ Any array-like object can be used as a "seed" in a `DelayedArray` provided it ha
272272
- `dtype` and `shape` properties, like those in NumPy arrays.
273273
- a method for the `extract_dense_array()` generic.
274274
- a method for the `is_masked()` generic.
275+
- a method for the `chunk_grid()` generic.
275276

276277
If the object may contain sparse data, it should also implement:
277278

@@ -280,7 +281,6 @@ If the object may contain sparse data, it should also implement:
280281

281282
It may also be desirable to implement:
282283

283-
- a method for the `chunk_shape()` generic.
284284
- a method for the `create_dask_array()` generic.
285285
- a method for the `wrap()` generic.
286286

src/delayedarray/BinaryIsometricOp.py

+23-17
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from .extract_dense_array import extract_dense_array
88
from .extract_sparse_array import extract_sparse_array
99
from .create_dask_array import create_dask_array
10-
from .chunk_shape import chunk_shape
10+
from .chunk_grid import chunk_grid
1111
from .is_sparse import is_sparse
1212
from .is_masked import is_masked
1313

@@ -130,22 +130,28 @@ def create_dask_array_BinaryIsometricOp(x: BinaryIsometricOp):
130130
return _execute(ls, rs, x._op)
131131

132132

133-
@chunk_shape.register
134-
def chunk_shape_BinaryIsometricOp(x: BinaryIsometricOp):
135-
"""See :py:meth:`~delayedarray.chunk_shape.chunk_shape`."""
136-
lchunk = chunk_shape(x._left)
137-
rchunk = chunk_shape(x._right)
138-
139-
# Not bothering with taking the lowest common denominator, as that
140-
# might be too aggressive and expanding to the entire matrix size.
141-
# We instead use the maximum chunk size (which might also expand, e.g.,
142-
# if you're combining column-major and row-major matrices; oh well).
143-
# Just accept that we'll probably need to break chunks during iteration.
144-
output = []
145-
for i in range(len(lchunk)):
146-
output.append(max(lchunk[i], rchunk[i]))
147-
148-
return (*output,)
133+
@chunk_grid.register
134+
def chunk_grid_BinaryIsometricOp(x: BinaryIsometricOp):
135+
"""See :py:meth:`~delayedarray.chunk_grid.chunk_grid`."""
136+
lchunk = chunk_grid(x._left)
137+
rchunk = chunk_grid(x._right)
138+
139+
# Favor the chunking for the more expensive grid, to avoid being penalized
140+
# heavily from suboptimal chunking for that array.
141+
#
142+
# Technically, we could optimize for the case where multiple dimensions
143+
# have the same boundaries, in which case we should favor full extraction
144+
# of the other dimensions and just iterate over the common dimensions.
145+
# This avoids any chunk discrepancies but seems like a pretty unlikely case
146+
# - if two arrays of the same shape disagree on the chunk boundaries of one
147+
# dimension, they'd probably disagree on the others as well.
148+
#
149+
# The other solution is to figure out some high-dimensional caching scheme
150+
# for the partially consumed chunks. Sounds like a royal pain.
151+
if lchunk.cost > rchunk.cost:
152+
return lchunk
153+
else:
154+
return rchunk
149155

150156

151157
@is_sparse.register

src/delayedarray/Cast.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from .extract_dense_array import extract_dense_array
66
from .extract_sparse_array import extract_sparse_array
77
from .create_dask_array import create_dask_array
8-
from .chunk_shape import chunk_shape
8+
from .chunk_grid import chunk_grid
99
from .is_sparse import is_sparse
1010
from .is_masked import is_masked
1111

@@ -84,10 +84,10 @@ def create_dask_array_Cast(x: Cast):
8484
return target.astype(x._dtype)
8585

8686

87-
@chunk_shape.register
88-
def chunk_shape_Cast(x: Cast):
89-
"""See :py:meth:`~delayedarray.chunk_shape.chunk_shape`."""
90-
return chunk_shape(x._seed)
87+
@chunk_grid.register
88+
def chunk_grid_Cast(x: Cast):
89+
"""See :py:meth:`~delayedarray.chunk_grid.chunk_grid`."""
90+
return chunk_grid(x._seed)
9191

9292

9393
@is_sparse.register

src/delayedarray/Combine.py

+7-18
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
from .SparseNdarray import _concatenate_SparseNdarrays
88
from .extract_sparse_array import extract_sparse_array
99
from .create_dask_array import create_dask_array
10-
from .chunk_shape import chunk_shape
10+
from .chunk_grid import chunk_grid
11+
from .Grid import CompositeGrid
1112
from .is_sparse import is_sparse
1213
from .is_masked import is_masked
1314

@@ -149,23 +150,11 @@ def create_dask_array_Combine(x: Combine):
149150
return numpy.concatenate((*extracted,), axis=x._along)
150151

151152

152-
@chunk_shape.register
153-
def chunk_shape_Combine(x: Combine):
154-
"""See :py:meth:`~delayedarray.chunk_shape.chunk_shape`."""
155-
chunks = [chunk_shape(s) for s in x._seeds]
156-
157-
# Not bothering with doing anything too fancy here. We just use the
158-
# maximum chunk size (which might also expand, e.g., if you're
159-
# combining column-major and row-major matrices; oh well). Just accept
160-
# that we'll probably need to break chunks during iteration.
161-
output = []
162-
for i in range(len(x._shape)):
163-
dim = []
164-
for ch in chunks:
165-
dim.append(ch[i])
166-
output.append(max(*dim))
167-
168-
return (*output,)
153+
@chunk_grid.register
154+
def chunk_grid_Combine(x: Combine):
155+
"""See :py:meth:`~delayedarray.chunk_grid.chunk_grid`."""
156+
chunks = [chunk_grid(s) for s in x._seeds]
157+
return CompositeGrid(chunks, x._along)
169158

170159

171160
@is_sparse.register

src/delayedarray/DelayedArray.py

+8-7
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from .extract_sparse_array import extract_sparse_array
1919
from .apply_over_blocks import apply_over_blocks
2020
from .create_dask_array import create_dask_array
21-
from .chunk_shape import chunk_shape
21+
from .chunk_grid import chunk_grid
2222
from .is_sparse import is_sparse
2323
from .is_masked import is_masked
2424

@@ -78,6 +78,7 @@ class DelayedArray:
7878
- A method for the
7979
:py:meth:`~delayedarray.extract_dense_array.extract_dense_array` generic.
8080
- A method for the :py:meth:`~delayedarray.is_masked.is_masked` generic.
81+
- A method for the :py:meth:`~delayedarray.chunk_grid.chunk_grid` generic.
8182
8283
If the seed contains sparse data, it should also implement:
8384
@@ -88,11 +89,11 @@ class DelayedArray:
8889
8990
Optionally, a seed class may have:
9091
91-
- A method for the :py:meth:`~delayedarray.chunk_shape.chunk_shape` generic,
92-
if there is some preferred dimension in which to take chunks of the array.
9392
- A method for the
9493
:py:meth:`~delayedarray.create_dask_array.create_dask_array` generic,
9594
if the seed is not already compatible with the **dask** package.
95+
- a method for the `wrap()` generic, to create a ``DelayedArray``
96+
subclass that is specific to this seed class.
9697
"""
9798

9899
def __init__(self, seed):
@@ -847,10 +848,10 @@ def create_dask_array_DelayedArray(x: DelayedArray):
847848
return create_dask_array(x._seed)
848849

849850

850-
@chunk_shape.register
851-
def chunk_shape_DelayedArray(x: DelayedArray):
852-
"""See :py:meth:`~delayedarray.chunk_shape.chunk_shape`."""
853-
return chunk_shape(x._seed)
851+
@chunk_grid.register
852+
def chunk_grid_DelayedArray(x: DelayedArray):
853+
"""See :py:meth:`~delayedarray.chunk_grid.chunk_grid`."""
854+
return chunk_grid(x._seed)
854855

855856

856857
@is_sparse.register

0 commit comments

Comments
 (0)