@@ -190,8 +190,9 @@ def test_query_filter_v1_v2_append_null(
190
190
@pytest .mark .integration
191
191
def test_summaries (spark : SparkSession , session_catalog : Catalog , arrow_table_with_null : pa .Table ) -> None :
192
192
identifier = "default.arrow_table_summaries"
193
- tbl = _create_table (session_catalog , identifier , {"format-version" : "1" }, 2 * [arrow_table_with_null ])
194
- tbl .overwrite (arrow_table_with_null )
193
+ tbl = _create_table (session_catalog , identifier , {"format-version" : "1" }, 2 * [arrow_table_with_null ]) # append + append
194
+ tbl .overwrite (arrow_table_with_null ) # delete + append
195
+ tbl .delete (delete_filter = "int == 1" ) # overwrite, deletes 1 row
195
196
196
197
rows = spark .sql (
197
198
f"""
@@ -202,14 +203,14 @@ def test_summaries(spark: SparkSession, session_catalog: Catalog, arrow_table_wi
202
203
).collect ()
203
204
204
205
operations = [row .operation for row in rows ]
205
- assert operations == ["append" , "append" , "delete" , "append" ]
206
+ assert operations == ["append" , "append" , "delete" , "append" , "overwrite" ]
206
207
207
208
summaries = [row .summary for row in rows ]
208
209
209
210
file_size = int (summaries [0 ]["added-files-size" ])
210
211
assert file_size > 0
211
212
212
- # Append
213
+ # Append from _create_table
213
214
assert summaries [0 ] == {
214
215
"added-data-files" : "1" ,
215
216
"added-files-size" : str (file_size ),
@@ -222,7 +223,7 @@ def test_summaries(spark: SparkSession, session_catalog: Catalog, arrow_table_wi
222
223
"total-records" : "3" ,
223
224
}
224
225
225
- # Append
226
+ # Append from _create_table
226
227
assert summaries [1 ] == {
227
228
"added-data-files" : "1" ,
228
229
"added-files-size" : str (file_size ),
@@ -235,7 +236,7 @@ def test_summaries(spark: SparkSession, session_catalog: Catalog, arrow_table_wi
235
236
"total-records" : "6" ,
236
237
}
237
238
238
- # Delete
239
+ # Delete from tbl.overwrite
239
240
assert summaries [2 ] == {
240
241
"deleted-data-files" : "2" ,
241
242
"deleted-records" : "6" ,
@@ -248,7 +249,7 @@ def test_summaries(spark: SparkSession, session_catalog: Catalog, arrow_table_wi
248
249
"total-records" : "0" ,
249
250
}
250
251
251
- # Overwrite
252
+ # Append from tbl.overwrite
252
253
assert summaries [3 ] == {
253
254
"added-data-files" : "1" ,
254
255
"added-files-size" : str (file_size ),
@@ -261,6 +262,22 @@ def test_summaries(spark: SparkSession, session_catalog: Catalog, arrow_table_wi
261
262
"total-records" : "3" ,
262
263
}
263
264
265
+ # Delete from tbl.delete
266
+ assert summaries [4 ] == {
267
+ "added-data-files" : "1" ,
268
+ "added-files-size" : "4342" ,
269
+ "added-records" : "2" ,
270
+ "deleted-data-files" : "1" ,
271
+ "deleted-records" : "3" ,
272
+ "removed-files-size" : "4406" ,
273
+ "total-data-files" : "1" ,
274
+ "total-delete-files" : "0" ,
275
+ "total-equality-deletes" : "0" ,
276
+ "total-files-size" : "4342" ,
277
+ "total-position-deletes" : "0" ,
278
+ "total-records" : "2" ,
279
+ }
280
+
264
281
265
282
@pytest .mark .integration
266
283
def test_data_files (spark : SparkSession , session_catalog : Catalog , arrow_table_with_null : pa .Table ) -> None :
0 commit comments