From e48336d4c602028b240ebd0cb69d9bb45af666ee Mon Sep 17 00:00:00 2001 From: Florian Date: Mon, 31 May 2021 09:28:29 +0200 Subject: [PATCH] Manage empty delta table in Python binding --- python/deltalake/table.py | 8 ++++++++ python/tests/test_table_read.py | 8 ++++++++ ...4d60-80d3-b5d170011621-c000.snappy.parquet.crc | Bin 0 -> 12 bytes ...41a8-a653-cb5594582017-c000.snappy.parquet.crc | Bin 0 -> 12 bytes .../_delta_log/00000000000000000000.json | 5 +++++ .../_delta_log/00000000000000000001.json | 3 +++ ...177-4d60-80d3-b5d170011621-c000.snappy.parquet | Bin 0 -> 304 bytes ...5a7-41a8-a653-cb5594582017-c000.snappy.parquet | Bin 0 -> 475 bytes 8 files changed, 24 insertions(+) create mode 100644 rust/tests/data/delta-0.8-empty/.part-00000-b0cc5102-6177-4d60-80d3-b5d170011621-c000.snappy.parquet.crc create mode 100644 rust/tests/data/delta-0.8-empty/.part-00007-02b8c308-e5a7-41a8-a653-cb5594582017-c000.snappy.parquet.crc create mode 100644 rust/tests/data/delta-0.8-empty/_delta_log/00000000000000000000.json create mode 100644 rust/tests/data/delta-0.8-empty/_delta_log/00000000000000000001.json create mode 100644 rust/tests/data/delta-0.8-empty/part-00000-b0cc5102-6177-4d60-80d3-b5d170011621-c000.snappy.parquet create mode 100644 rust/tests/data/delta-0.8-empty/part-00007-02b8c308-e5a7-41a8-a653-cb5594582017-c000.snappy.parquet diff --git a/python/deltalake/table.py b/python/deltalake/table.py index 1d64acf020..2daece84d8 100644 --- a/python/deltalake/table.py +++ b/python/deltalake/table.py @@ -187,6 +187,14 @@ def to_pyarrow_dataset( file_paths = self._table.files_by_partitions(partitions) paths = [urlparse(curr_file) for curr_file in file_paths] + empty_delta_table = len(paths) == 0 + if empty_delta_table: + return dataset( + [], + schema=self.pyarrow_schema(), + partitioning=partitioning(flavor="hive"), + ) + # Decide based on the first file, if the file is on cloud storage or local if paths[0].netloc: query_str = "" diff --git a/python/tests/test_table_read.py b/python/tests/test_table_read.py index 8b82a8f13f..04c3f219b1 100644 --- a/python/tests/test_table_read.py +++ b/python/tests/test_table_read.py @@ -43,6 +43,14 @@ def test_read_partitioned_table_with_partitions_filters_to_dict(): assert dt.to_pyarrow_dataset(partitions).to_table().to_pydict() == expected +def test_read_empty_delta_table_after_delete(): + table_path = "../rust/tests/data/delta-0.8-empty" + dt = DeltaTable(table_path) + expected = {"column": []} + + assert dt.to_pyarrow_dataset().to_table().to_pydict() == expected + + def test_vacuum_dry_run_simple_table(): table_path = "../rust/tests/data/delta-0.2.0" dt = DeltaTable(table_path) diff --git a/rust/tests/data/delta-0.8-empty/.part-00000-b0cc5102-6177-4d60-80d3-b5d170011621-c000.snappy.parquet.crc b/rust/tests/data/delta-0.8-empty/.part-00000-b0cc5102-6177-4d60-80d3-b5d170011621-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..4f4e319bfaae751b20c56cb9166e339d912322fe GIT binary patch literal 12 TcmYc;N@ieSU}DHP()$Yl6MzHm literal 0 HcmV?d00001 diff --git a/rust/tests/data/delta-0.8-empty/.part-00007-02b8c308-e5a7-41a8-a653-cb5594582017-c000.snappy.parquet.crc b/rust/tests/data/delta-0.8-empty/.part-00007-02b8c308-e5a7-41a8-a653-cb5594582017-c000.snappy.parquet.crc new file mode 100644 index 0000000000000000000000000000000000000000..312729117fe2913131d63e6bf7400492a0151d88 GIT binary patch literal 12 TcmYc;N@ieSU}E@RRS*LJ6ea_J literal 0 HcmV?d00001 diff --git a/rust/tests/data/delta-0.8-empty/_delta_log/00000000000000000000.json b/rust/tests/data/delta-0.8-empty/_delta_log/00000000000000000000.json new file mode 100644 index 0000000000..96b236a734 --- /dev/null +++ b/rust/tests/data/delta-0.8-empty/_delta_log/00000000000000000000.json @@ -0,0 +1,5 @@ +{"commitInfo":{"timestamp":1622445541882,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputBytes":"779","numOutputRows":"1"}}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"6af44890-613d-41fb-8629-1a49fa87d6d7","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"column\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1622445539346}} +{"add":{"path":"part-00000-b0cc5102-6177-4d60-80d3-b5d170011621-c000.snappy.parquet","partitionValues":{},"size":304,"modificationTime":1622445541807,"dataChange":true}} +{"add":{"path":"part-00007-02b8c308-e5a7-41a8-a653-cb5594582017-c000.snappy.parquet","partitionValues":{},"size":475,"modificationTime":1622445541825,"dataChange":true}} diff --git a/rust/tests/data/delta-0.8-empty/_delta_log/00000000000000000001.json b/rust/tests/data/delta-0.8-empty/_delta_log/00000000000000000001.json new file mode 100644 index 0000000000..cf8941479c --- /dev/null +++ b/rust/tests/data/delta-0.8-empty/_delta_log/00000000000000000001.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1622445548170,"operation":"DELETE","operationParameters":{"predicate":"[]"},"readVersion":0,"isBlindAppend":false,"operationMetrics":{"numRemovedFiles":"2"}}} +{"remove":{"path":"part-00007-02b8c308-e5a7-41a8-a653-cb5594582017-c000.snappy.parquet","deletionTimestamp":1622445548168,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":475}} +{"remove":{"path":"part-00000-b0cc5102-6177-4d60-80d3-b5d170011621-c000.snappy.parquet","deletionTimestamp":1622445548168,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":304}} diff --git a/rust/tests/data/delta-0.8-empty/part-00000-b0cc5102-6177-4d60-80d3-b5d170011621-c000.snappy.parquet b/rust/tests/data/delta-0.8-empty/part-00000-b0cc5102-6177-4d60-80d3-b5d170011621-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..1f22440ff9ce0ac265f8636caca1016010d5464a GIT binary patch literal 304 zcmZXQ!Arw17{y~zPrFu9*r5ak2Q?%sc1v%&iKpRF5lNbK7L&G1QbkJtcca#m$H)79 z?>)Z#?t#WplHDy_&+Xfbt6r7iaRlPis|cP~ogPdJEN(FVUP_I{*Lx literal 0 HcmV?d00001 diff --git a/rust/tests/data/delta-0.8-empty/part-00007-02b8c308-e5a7-41a8-a653-cb5594582017-c000.snappy.parquet b/rust/tests/data/delta-0.8-empty/part-00007-02b8c308-e5a7-41a8-a653-cb5594582017-c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..eb0177c345d23e94284f762e57f1b6ff619a1d99 GIT binary patch literal 475 zcmZ`$&r8EF7)|3GJ&3oI1ac?=#|AaDtLxU`#hZ8<9u<+aP3JIexBi$&*~LG?gMY1= z+t@+PA-v>!?|U!#W;gd90*E5pK~RG(GAT9}sKVkS8>K{~G($_J!wyZ^wB2l; zHUQu4Eibdu-e