Skip to content

Commit

Permalink
Manage empty delta table in Python binding
Browse files Browse the repository at this point in the history
  • Loading branch information
fvaleye authored and rtyler committed May 31, 2021
1 parent f6591b5 commit e48336d
Show file tree
Hide file tree
Showing 8 changed files with 24 additions and 0 deletions.
8 changes: 8 additions & 0 deletions python/deltalake/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,14 @@ def to_pyarrow_dataset(
file_paths = self._table.files_by_partitions(partitions)
paths = [urlparse(curr_file) for curr_file in file_paths]

empty_delta_table = len(paths) == 0
if empty_delta_table:
return dataset(
[],
schema=self.pyarrow_schema(),
partitioning=partitioning(flavor="hive"),
)

# Decide based on the first file, if the file is on cloud storage or local
if paths[0].netloc:
query_str = ""
Expand Down
8 changes: 8 additions & 0 deletions python/tests/test_table_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,14 @@ def test_read_partitioned_table_with_partitions_filters_to_dict():
assert dt.to_pyarrow_dataset(partitions).to_table().to_pydict() == expected


def test_read_empty_delta_table_after_delete():
table_path = "../rust/tests/data/delta-0.8-empty"
dt = DeltaTable(table_path)
expected = {"column": []}

assert dt.to_pyarrow_dataset().to_table().to_pydict() == expected


def test_vacuum_dry_run_simple_table():
table_path = "../rust/tests/data/delta-0.2.0"
dt = DeltaTable(table_path)
Expand Down
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{"commitInfo":{"timestamp":1622445541882,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputBytes":"779","numOutputRows":"1"}}}
{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}
{"metaData":{"id":"6af44890-613d-41fb-8629-1a49fa87d6d7","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"column\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1622445539346}}
{"add":{"path":"part-00000-b0cc5102-6177-4d60-80d3-b5d170011621-c000.snappy.parquet","partitionValues":{},"size":304,"modificationTime":1622445541807,"dataChange":true}}
{"add":{"path":"part-00007-02b8c308-e5a7-41a8-a653-cb5594582017-c000.snappy.parquet","partitionValues":{},"size":475,"modificationTime":1622445541825,"dataChange":true}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"commitInfo":{"timestamp":1622445548170,"operation":"DELETE","operationParameters":{"predicate":"[]"},"readVersion":0,"isBlindAppend":false,"operationMetrics":{"numRemovedFiles":"2"}}}
{"remove":{"path":"part-00007-02b8c308-e5a7-41a8-a653-cb5594582017-c000.snappy.parquet","deletionTimestamp":1622445548168,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":475}}
{"remove":{"path":"part-00000-b0cc5102-6177-4d60-80d3-b5d170011621-c000.snappy.parquet","deletionTimestamp":1622445548168,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":304}}
Binary file not shown.
Binary file not shown.

0 comments on commit e48336d

Please sign in to comment.