Skip to content

Commit

Permalink
Allow to_pyarrow_table() to take an optional list of partitions (#221)
Browse files Browse the repository at this point in the history
This will help ensure that Python is only loading what's necessary

Fixes #220
  • Loading branch information
rtyler authored Apr 28, 2021
1 parent ad7407a commit e9dc51b
Showing 1 changed file with 17 additions and 4 deletions.
21 changes: 17 additions & 4 deletions python/deltalake/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,13 +183,23 @@ def pyarrow_schema(self) -> pyarrow.Schema:
"""
return pyarrow_schema_from_json(self._table.arrow_schema_json())

def to_pyarrow_dataset(self) -> pyarrow.dataset.Dataset:
def to_pyarrow_dataset(
self, partitions: Optional[List[Tuple]] = None
) -> pyarrow.dataset.Dataset:
"""
Build a PyArrow Dataset using data from the DeltaTable.
:param partitions: A list of partition filters, see help(DeltaTable.files_by_partitions) for filter syntax
:return: the PyArrow dataset in PyArrow
"""
file_paths = self._table.file_paths()
if partitions is None:
file_paths = self._table.file_paths()
else:
table_path = self._table.table_path()
file_paths = [
f"{table_path}/{file_name}"
for file_name in self._table.files_by_partitions(partitions)
]
paths = [urlparse(curr_file) for curr_file in file_paths]

# Decide based on the first file, if the file is on cloud storage or local
Expand Down Expand Up @@ -222,10 +232,13 @@ def to_pyarrow_dataset(self) -> pyarrow.dataset.Dataset:
partitioning=partitioning(flavor="hive"),
)

def to_pyarrow_table(self) -> pyarrow.Table:
def to_pyarrow_table(
self, partitions: Optional[List[Tuple]] = None
) -> pyarrow.Table:
"""
Build a PyArrow Table using data from the DeltaTable.
:param partitions: A list of partition filters, see help(DeltaTable.files_by_partitions) for filter syntax
:return: the PyArrow table
"""
return self.to_pyarrow_dataset().to_table()
return self.to_pyarrow_dataset(partitions).to_table()

0 comments on commit e9dc51b

Please sign in to comment.