Skip to content

Commit f8e057e

Browse files
authored
Deduplicate serialization for accessors (#334)
Previously, we had two copies of serialization that were doing essentially the same thing. This deduplicates accessor serialization to allow any "json-serializable" value through (a superset technically; whatever ipywidgets default serialization supports) but then otherwise assume the input is a pyarrow column and serialize that to a parquet file.
1 parent de30e82 commit f8e057e

File tree

3 files changed

+13
-28
lines changed

3 files changed

+13
-28
lines changed

lonboard/_serialization.py

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ def serialize_table_to_parquet(table: pa.Table, *, max_chunksize: int) -> List[b
2424
buffers: List[bytes] = []
2525
# NOTE: passing `max_chunksize=0` creates an infinite loop
2626
# https://github.com/apache/arrow/issues/39788
27+
assert max_chunksize > 0
28+
2729
for record_batch in table.to_batches(max_chunksize=max_chunksize):
2830
with BytesIO() as bio:
2931
with pq.ParquetWriter(
@@ -51,25 +53,13 @@ def serialize_pyarrow_column(data: pa.Array, *, max_chunksize: int) -> List[byte
5153
return serialize_table_to_parquet(pyarrow_table, max_chunksize=max_chunksize)
5254

5355

54-
def serialize_color_accessor(
55-
data: Union[List[int], Tuple[int], NDArray[np.uint8]], obj
56-
):
57-
if data is None:
58-
return None
59-
60-
if isinstance(data, (list, tuple)):
61-
return data
62-
63-
assert isinstance(data, (pa.ChunkedArray, pa.Array))
64-
validate_accessor_length_matches_table(data, obj.table)
65-
return serialize_pyarrow_column(data, max_chunksize=obj._rows_per_chunk)
66-
67-
68-
def serialize_float_accessor(data: Union[int, float, NDArray[np.floating]], obj):
56+
def serialize_accessor(data: Union[List[int], Tuple[int], NDArray[np.uint8]], obj):
6957
if data is None:
7058
return None
7159

72-
if isinstance(data, (str, int, float)):
60+
# We assume data has already been validated to the right type for this accessor
61+
# Allow any json-serializable type through
62+
if isinstance(data, (str, int, float, list, tuple, bytes)):
7363
return data
7464

7565
assert isinstance(data, (pa.ChunkedArray, pa.Array))
@@ -98,7 +88,5 @@ def validate_accessor_length_matches_table(accessor, table):
9888
raise TraitError("accessor must have same length as table")
9989

10090

101-
COLOR_SERIALIZATION = {"to_json": serialize_color_accessor}
102-
# TODO: rename as it's used for text as well
103-
FLOAT_SERIALIZATION = {"to_json": serialize_float_accessor}
91+
ACCESSOR_SERIALIZATION = {"to_json": serialize_accessor}
10492
TABLE_SERIALIZATION = {"to_json": serialize_table}

lonboard/experimental/traits.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,7 @@
77
import pyarrow as pa
88
from traitlets.traitlets import TraitType
99

10-
from lonboard._serialization import (
11-
COLOR_SERIALIZATION,
12-
)
10+
from lonboard._serialization import ACCESSOR_SERIALIZATION
1311
from lonboard.traits import FixedErrorTraitType
1412

1513

@@ -39,7 +37,7 @@ def __init__(
3937
**kwargs: Any,
4038
) -> None:
4139
super().__init__(*args, **kwargs)
42-
self.tag(sync=True, **COLOR_SERIALIZATION)
40+
self.tag(sync=True, **ACCESSOR_SERIALIZATION)
4341

4442
def validate(
4543
self, obj, value

lonboard/traits.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,7 @@
1818
from typing_extensions import Self
1919

2020
from lonboard._serialization import (
21-
COLOR_SERIALIZATION,
22-
FLOAT_SERIALIZATION,
21+
ACCESSOR_SERIALIZATION,
2322
TABLE_SERIALIZATION,
2423
)
2524

@@ -206,7 +205,7 @@ def __init__(
206205
**kwargs: Any,
207206
) -> None:
208207
super().__init__(*args, **kwargs)
209-
self.tag(sync=True, **COLOR_SERIALIZATION)
208+
self.tag(sync=True, **ACCESSOR_SERIALIZATION)
210209

211210
def validate(
212211
self, obj, value
@@ -332,7 +331,7 @@ def __init__(
332331
**kwargs: Any,
333332
) -> None:
334333
super().__init__(*args, **kwargs)
335-
self.tag(sync=True, **FLOAT_SERIALIZATION)
334+
self.tag(sync=True, **ACCESSOR_SERIALIZATION)
336335

337336
def validate(self, obj, value) -> Union[float, pa.ChunkedArray, pa.DoubleArray]:
338337
if isinstance(value, (int, float)):
@@ -402,7 +401,7 @@ def __init__(
402401
**kwargs: Any,
403402
) -> None:
404403
super().__init__(*args, **kwargs)
405-
self.tag(sync=True, **FLOAT_SERIALIZATION)
404+
self.tag(sync=True, **ACCESSOR_SERIALIZATION)
406405

407406
def validate(self, obj, value) -> Union[float, pa.ChunkedArray, pa.DoubleArray]:
408407
if isinstance(value, str):

0 commit comments

Comments
 (0)