Skip to content

Commit 63ddd47

Browse files
committed
Merge branch 'main' into one-pass
2 parents e29e929 + 8f3fb27 commit 63ddd47

File tree

10 files changed

+222
-122
lines changed

10 files changed

+222
-122
lines changed

.pre-commit-config.yaml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ repos:
2424
- --target-version=py312
2525

2626
- repo: https://github.com/astral-sh/ruff-pre-commit
27-
rev: v0.4.3
27+
rev: v0.5.2
2828
hooks:
2929
- id: ruff
3030

@@ -35,7 +35,7 @@ repos:
3535
language_version: python3
3636

3737
- repo: https://github.com/asottile/pyupgrade
38-
rev: v3.15.2
38+
rev: v3.16.0
3939
hooks:
4040
- id: pyupgrade
4141
args:
@@ -52,16 +52,17 @@ repos:
5252
- id: yesqa
5353

5454
- repo: https://github.com/adamchainz/blacken-docs
55-
rev: 1.16.0
55+
rev: 1.18.0
5656
hooks:
5757
- id: blacken-docs
5858
additional_dependencies:
5959
- black
6060

6161
- repo: https://github.com/pre-commit/mirrors-mypy
62-
rev: v1.10.0
62+
rev: v1.10.1
6363
hooks:
6464
- id: mypy
65+
files: "src/"
6566
args: [--ignore-missing-imports]
6667
additional_dependencies:
6768
- dask

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,8 @@ src_paths = ["src", "tests"]
111111

112112
[tool.mypy]
113113
python_version = "3.9"
114-
files = ["src", "tests"]
114+
files = ["src"]
115+
exclude = ["tests/"]
115116
strict = false
116117
warn_unused_configs = true
117118
show_error_codes = true

src/dask_awkward/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@
9494
with_field,
9595
with_name,
9696
with_parameter,
97+
without_field,
9798
without_parameters,
9899
zeros_like,
99100
zip,

src/dask_awkward/lib/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@
8383
with_field,
8484
with_name,
8585
with_parameter,
86+
without_field,
8687
without_parameters,
8788
zeros_like,
8889
zip,

src/dask_awkward/lib/core.py

Lines changed: 14 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
TypeTracerArray,
2828
create_unknown_scalar,
2929
is_unknown_scalar,
30-
touch_data,
3130
)
3231
from dask.base import (
3332
DaskMethodsMixin,
@@ -48,7 +47,6 @@
4847

4948
from dask_awkward.layers import AwkwardBlockwiseLayer, AwkwardMaterializedLayer
5049
from dask_awkward.lib.optimize import all_optimizations
51-
from dask_awkward.lib.utils import commit_to_reports
5250
from dask_awkward.utils import (
5351
DaskAwkwardNotImplemented,
5452
IncompatiblePartitions,
@@ -400,10 +398,6 @@ def name(self) -> str:
400398
def key(self) -> Key:
401399
return (self._name, 0)
402400

403-
@property
404-
def report(self):
405-
return getattr(self._meta, "_report", set())
406-
407401
def _check_meta(self, m):
408402
if isinstance(m, MaybeNone):
409403
return ak.Array(m.content)
@@ -524,7 +518,6 @@ def f(self, other):
524518
meta = op(self._meta, other._meta)
525519
else:
526520
meta = op(self._meta, other)
527-
commit_to_reports(name, self.report)
528521
return new_scalar_object(graph, name, meta=meta)
529522

530523
return f
@@ -720,9 +713,7 @@ def _check_meta(self, m: Any | None) -> Any | None:
720713
def __getitem__(self, where):
721714
token = tokenize(self, where)
722715
new_name = f"{where}-{token}"
723-
report = self.report
724716
new_meta = self._meta[where]
725-
commit_to_reports(new_name, report)
726717

727718
# first check for array type return
728719
if isinstance(new_meta, ak.Array):
@@ -732,8 +723,6 @@ def __getitem__(self, where):
732723
graphlayer,
733724
dependencies=[self],
734725
)
735-
new_meta._report = report
736-
hlg.layers[new_name].meta = new_meta
737726
return new_array_object(hlg, new_name, meta=new_meta, npartitions=1)
738727

739728
# then check for scalar (or record) type
@@ -744,8 +733,6 @@ def __getitem__(self, where):
744733
dependencies=[self],
745734
)
746735
if isinstance(new_meta, ak.Record):
747-
new_meta._report = report
748-
hlg.layers[new_name].meta = new_meta
749736
return new_record_object(hlg, new_name, meta=new_meta)
750737
else:
751738
return new_scalar_object(hlg, new_name, meta=new_meta)
@@ -819,7 +806,7 @@ def new_record_object(dsk: HighLevelGraph, name: str, *, meta: Any) -> Record:
819806
raise TypeError(
820807
f"meta Record must have a typetracer backend, not {ak.backend(meta)}"
821808
)
822-
return out
809+
return Record(dsk, name, meta)
823810

824811

825812
def _is_numpy_or_cupy_like(arr: Any) -> bool:
@@ -950,10 +937,6 @@ def reset_meta(self) -> None:
950937
"""Assign an empty typetracer array as the collection metadata."""
951938
self._meta = empty_typetracer()
952939

953-
@property
954-
def report(self):
955-
return getattr(self._meta, "_report", set())
956-
957940
def repartition(
958941
self,
959942
npartitions: int | None = None,
@@ -989,7 +972,6 @@ def repartition(
989972
new_graph = HighLevelGraph.from_collections(
990973
key, new_layer, dependencies=(self,)
991974
)
992-
commit_to_reports(key, self.report)
993975
return new_array_object(
994976
new_graph,
995977
key,
@@ -1175,13 +1157,11 @@ def _partitions(self, index: Any) -> Array:
11751157
name = f"partitions-{token}"
11761158
new_keys = self.keys_array[index].tolist()
11771159
dsk = {(name, i): tuple(key) for i, key in enumerate(new_keys)}
1178-
layer = AwkwardMaterializedLayer(dsk, previous_layer_names=[self.name])
11791160
graph = HighLevelGraph.from_collections(
11801161
name,
1181-
layer,
1162+
AwkwardMaterializedLayer(dsk, previous_layer_names=[self.name]),
11821163
dependencies=(self,),
11831164
)
1184-
layer.meta = self._meta
11851165

11861166
# if a single partition was requested we trivially know the new divisions.
11871167
if len(raw) == 1 and isinstance(raw[0], int) and self.known_divisions:
@@ -1193,7 +1173,7 @@ def _partitions(self, index: Any) -> Array:
11931173
# otherwise nullify the known divisions
11941174
else:
11951175
new_divisions = (None,) * (len(new_keys) + 1) # type: ignore
1196-
commit_to_reports(name, self.report)
1176+
11971177
return new_array_object(
11981178
graph, name, meta=self._meta, divisions=tuple(new_divisions)
11991179
)
@@ -1415,7 +1395,6 @@ def _getitem_slice_on_zero(self, where):
14151395
AwkwardMaterializedLayer(dask, previous_layer_names=[self.name]),
14161396
dependencies=[self],
14171397
)
1418-
commit_to_reports(name, self.report)
14191398
return new_array_object(
14201399
hlg,
14211400
name,
@@ -1526,14 +1505,9 @@ def __getitem__(self, where):
15261505
raise RuntimeError("Lists containing integers are not supported.")
15271506

15281507
if isinstance(where, tuple):
1529-
out = self._getitem_tuple(where)
1530-
else:
1531-
out = self._getitem_single(where)
1532-
if self.report:
1533-
commit_to_reports(out.name, self.report)
1534-
out._meta._report = self._meta._report
1535-
out.dask.layers[out.name].meta = out._meta
1536-
return out
1508+
return self._getitem_tuple(where)
1509+
1510+
return self._getitem_single(where)
15371511

15381512
def _is_method_heuristic(self, resolved: Any) -> bool:
15391513
return callable(resolved)
@@ -1860,12 +1834,10 @@ def partitionwise_layer(
18601834
"""
18611835
pairs: list[Any] = []
18621836
numblocks: dict[str, tuple[int, ...]] = {}
1863-
reps = set()
18641837
for arg in args:
18651838
if isinstance(arg, Array):
18661839
pairs.extend([arg.name, "i"])
18671840
numblocks[arg.name] = (arg.npartitions,)
1868-
reps.update(arg.report)
18691841
elif isinstance(arg, BlockwiseDep):
18701842
if len(arg.numblocks) == 1:
18711843
pairs.extend([arg, "i"])
@@ -1885,8 +1857,6 @@ def partitionwise_layer(
18851857
)
18861858
else:
18871859
pairs.extend([arg, None])
1888-
commit_to_reports(name, reps)
1889-
18901860
layer = dask_blockwise(
18911861
func,
18921862
name,
@@ -1970,23 +1940,8 @@ def _map_partitions(
19701940
**kwargs,
19711941
)
19721942

1973-
reps = set()
1974-
try:
1975-
if meta is None:
1976-
meta = map_meta(fn, *args, **kwargs)
1977-
else:
1978-
# To do any touching??
1979-
map_meta(fn, *args, **kwargs)
1980-
meta._report = reps
1981-
lay.meta = meta
1982-
except (AssertionError, TypeError, NotImplementedError):
1983-
[touch_data(_._meta) for _ in dak_arrays]
1984-
1985-
for dep in dak_arrays:
1986-
for rep in dep.report:
1987-
if rep not in reps:
1988-
rep.commit(name)
1989-
reps.add(rep)
1943+
if meta is None:
1944+
meta = map_meta(fn, *args, **kwargs)
19901945

19911946
hlg = HighLevelGraph.from_collections(
19921947
name,
@@ -2009,6 +1964,7 @@ def _map_partitions(
20091964
new_divisions = tuple(map(lambda x: x * output_divisions, in_divisions))
20101965
else:
20111966
new_divisions = in_divisions
1967+
20121968
if output_divisions is not None:
20131969
return new_array_object(
20141970
hlg,
@@ -2239,6 +2195,10 @@ def non_trivial_reduction(
22392195
if combiner is None:
22402196
combiner = reducer
22412197

2198+
# is_positional == True is not implemented
2199+
# if is_positional:
2200+
# assert combiner is reducer
2201+
22422202
# For `axis=None`, we prepare each array to have the following structure:
22432203
# [[[ ... [x1 x2 x3 ... xN] ... ]]] (length-1 outer lists)
22442204
# This makes the subsequent reductions an `axis=-1` reduction
@@ -2313,16 +2273,14 @@ def non_trivial_reduction(
23132273
)
23142274

23152275
graph = HighLevelGraph.from_collections(name_finalize, trl, dependencies=(chunked,))
2276+
23162277
meta = reducer(
23172278
array._meta,
23182279
axis=axis,
23192280
keepdims=keepdims,
23202281
mask_identity=mask_identity,
23212282
)
2322-
trl.meta = meta
2323-
commit_to_reports(name_finalize, array.report)
23242283
if isinstance(meta, ak.highlevel.Array):
2325-
meta._report = array.report
23262284
return new_array_object(graph, name_finalize, meta=meta, npartitions=1)
23272285
else:
23282286
return new_scalar_object(graph, name_finalize, meta=meta)

0 commit comments

Comments
 (0)