Skip to content

Commit

Permalink
feat: add to_dicts
Browse files Browse the repository at this point in the history
  • Loading branch information
NickCrews committed Jan 21, 2025
1 parent dc23b9f commit f8ba765
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 1 deletion.
32 changes: 32 additions & 0 deletions ibis/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,38 @@ def to_delta(
with expr.to_pyarrow_batches(params=params) as batch_reader:
write_deltalake(path, batch_reader, **kwargs)

@util.experimental
def to_dicts(
self, expr: ir.Table, *, chunk_size: int = 1_000_000
) -> Iterable[dict[str, Any]]:
"""Iterate through each row as a `dict` of column_name -> value.
Parameters
----------
expr
The ibis expression to materiliaze to an iterable of dictionaries.
chunk_size
We materialize the results in chunks of this size, to keep memory usage under control.
Larger values probably will be faster but consume more memory.
Returns
-------
Iterable[dict[str, Any]]
An iterator of dictionaries, each representing a row in the table.
Examples
--------
>>> t = ibis.memtable({"i": [1, 2, 3], "s": ["a", "b", "c"]})
>>> list(t.to_dicts())
[{'i': 1, 's': 'a'}, {'i': 2, 's': 'b'}, {'i': 3, 's': 'c'}]
See Also
--------
[`Column.to_list`](./expression-generic.qmd##ibis.expr.types.generic.Column.to_list)
"""
for batch in self.to_pyarrow_batches(expr, chunk_size=chunk_size):
yield from batch.to_pylist()

@util.experimental
def to_json(
self,
Expand Down
18 changes: 18 additions & 0 deletions ibis/backends/tests/test_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import ibis.common.exceptions as com
import ibis.expr.datatypes as dt
from ibis import util
from ibis.backends.tests.base import BackendTest
from ibis.backends.tests.errors import (
DatabricksServerOperationError,
DuckDBInvalidInputException,
Expand Down Expand Up @@ -350,6 +351,23 @@ def test_table_to_csv(tmp_path, backend, awards_players):
backend.assert_frame_equal(awards_players.to_pandas(), df)


@pytest.mark.parametrize("chunk_size", [1, 1000])
def test_to_dicts(backend, chunk_size):
t = ibis.memtable({"i": [1, 2, 3], "s": ["a", "b", "c"]})

result = list(t.to_dicts(chunk_size=chunk_size))
expected = [{"i": 1, "s": "a"}, {"i": 2, "s": "b"}, {"i": 3, "s": "c"}]
assert result == expected

result = list(t.limit(0).to_dicts(chunk_size=chunk_size))
expected = []
assert result == expected

result = list(t.i.to_dicts(chunk_size=chunk_size))
expected = [{"i": 1}, {"i": 2}, {"i": 3}]
assert result == expected


@pytest.mark.notimpl(
[
"athena",
Expand Down
37 changes: 36 additions & 1 deletion ibis/expr/types/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from ibis.util import experimental

if TYPE_CHECKING:
from collections.abc import Iterator, Mapping
from collections.abc import Iterable, Iterator, Mapping
from pathlib import Path

import pandas as pd
Expand Down Expand Up @@ -771,6 +771,41 @@ def to_delta(
"""
self._find_backend(use_default=True).to_delta(self, path, **kwargs)

@experimental
def to_dicts(self, *, chunk_size: int = 1_000_000) -> Iterable[dict[str, Any]]:
"""Iterate through each row as a `dict` of column_name -> value.
Parameters
----------
chunk_size
We materialize the results in chunks of this size, to keep memory usage under control.
Larger values probably will be faster but consume more memory.
Returns
-------
Iterable[dict[str, Any]]
An iterator of dictionaries, each representing a row in the table.
Examples
--------
>>> t = ibis.memtable({"i": [1, 2, 3], "s": ["a", "b", "c"]})
>>> list(t.to_dicts())
[{'i': 1, 's': 'a'}, {'i': 2, 's': 'b'}, {'i': 3, 's': 'c'}]
Single Columns are returned as dictionaries with a single key:
>>> column = t.i
>>> list(column.to_dicts())
[{'i': 1}, {'i': 2}, {'i': 3}]
See Also
--------
[`Column.to_list`](./expression-generic.qmd##ibis.expr.types.generic.Column.to_list)
"""
return self._find_backend(use_default=True).to_dicts(
self, chunk_size=chunk_size
)

@experimental
def to_json(
self,
Expand Down

0 comments on commit f8ba765

Please sign in to comment.