Skip to content

Commit

Permalink
feat: add to_dicts
Browse files Browse the repository at this point in the history
  • Loading branch information
NickCrews committed Jan 21, 2025
1 parent dc23b9f commit 7703686
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 1 deletion.
28 changes: 28 additions & 0 deletions ibis/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,34 @@ def to_delta(
with expr.to_pyarrow_batches(params=params) as batch_reader:
write_deltalake(path, batch_reader, **kwargs)

@util.experimental
def to_dicts(self, chunk_size: int = 1_000_000) -> Iterable[dict[str, Any]]:
"""Iterate through each row as a `dict` of column_name -> value.
Parameters
----------
chunk_size
We materialize the results in chunks of this size, to keep memory usage under control.
Larger values probably will be faster but consume more memory.
Returns
-------
Iterable[dict[str, Any]]
An iterator of dictionaries, each representing a row in the table.
Examples
--------
>>> t = ibis.memtable({"i": [1, 2, 3], "s": ["a", "b", "c"]})
>>> list(t.to_dicts())
[{'i': 1, 's': 'a'}, {'i': 2, 's': 'b'}, {'i': 3, 's': 'c'}]
See Also
--------
[`Column.to_list`](./expression-generic.qmd##ibis.expr.types.generic.Column.to_list)
"""
for batch in self.to_pyarrow_batches(chunk_size=chunk_size):
yield from batch.to_pylist()

@util.experimental
def to_json(
self,
Expand Down
35 changes: 34 additions & 1 deletion ibis/expr/types/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from ibis.util import experimental

if TYPE_CHECKING:
from collections.abc import Iterator, Mapping
from collections.abc import Iterable, Iterator, Mapping
from pathlib import Path

import pandas as pd
Expand Down Expand Up @@ -771,6 +771,39 @@ def to_delta(
"""
self._find_backend(use_default=True).to_delta(self, path, **kwargs)

@experimental
def to_dicts(self, chunk_size: int = 1_000_000) -> Iterable[dict[str, Any]]:
"""Iterate through each row as a `dict` of column_name -> value.
Parameters
----------
chunk_size
We materialize the results in chunks of this size, to keep memory usage under control.
Larger values probably will be faster but consume more memory.
Returns
-------
Iterable[dict[str, Any]]
An iterator of dictionaries, each representing a row in the table.
Examples
--------
>>> t = ibis.memtable({"i": [1, 2, 3], "s": ["a", "b", "c"]})
>>> list(t.to_dicts())
[{'i': 1, 's': 'a'}, {'i': 2, 's': 'b'}, {'i': 3, 's': 'c'}]
Single Columns are returned as dictionaries with a single key:
>>> column = t.i
>>> list(column.to_dicts())
[{'i': 1}, {'i': 2}, {'i': 3}]
See Also
--------
[`Column.to_list`](./expression-generic.qmd##ibis.expr.types.generic.Column.to_list)
"""
self._find_backend(use_default=True).to_dicts(self, chunk_size=chunk_size)

@experimental
def to_json(
self,
Expand Down

0 comments on commit 7703686

Please sign in to comment.