@@ -48,6 +48,7 @@ def create_table_if_not_exists(
48
48
Returns
49
49
-------
50
50
None
51
+
51
52
"""
52
53
# Creates the database if it does not exist
53
54
spark .sql (f"CREATE SCHEMA IF NOT EXISTS { catalog_name } .{ database_name } " )
@@ -91,6 +92,7 @@ def read_table(spark: "SparkSession", catalog_name: str, database_name: str, tab
91
92
-------
92
93
DataFrame
93
94
A PySpark DataFrame representing the table.
95
+
94
96
"""
95
97
return spark .sql (f"SELECT * FROM { catalog_name } .{ database_name } .{ table_name } " )
96
98
@@ -125,6 +127,7 @@ def merge_into_table(
125
127
Returns
126
128
-------
127
129
None
130
+
128
131
"""
129
132
source_df .createOrReplaceTempView ("source" )
130
133
match_condition = " AND " .join ([f"target.{ col } = source.{ col } " for col in match_columns ])
@@ -149,8 +152,7 @@ def expire_snapshots(
149
152
retain_last : int ,
150
153
max_concurrent_deletes : int = 8 ,
151
154
) -> int :
152
- """
153
- Removes old snapshots from the specified Iceberg table.
155
+ """Removes old snapshots from the specified Iceberg table.
154
156
155
157
Parameters
156
158
----------
@@ -173,6 +175,7 @@ def expire_snapshots(
173
175
-------
174
176
int
175
177
The number of data files deleted during snapshot expiration.
178
+
176
179
"""
177
180
older_than_str = older_than .strftime ("%Y-%m-%d %H:%M:%S.%f" )[:- 3 ]
178
181
return spark .sql (f"""
@@ -193,8 +196,7 @@ def remove_orphan_files(
193
196
older_than : "datetime.datetime" ,
194
197
max_concurrent_deletes : int = 8 ,
195
198
) -> int :
196
- """
197
- Removes orphaned files from the specified Iceberg table.
199
+ """Removes orphaned files from the specified Iceberg table.
198
200
199
201
Parameters
200
202
----------
@@ -215,6 +217,7 @@ def remove_orphan_files(
215
217
-------
216
218
int
217
219
The count of orphaned files removed during the operation.
220
+
218
221
"""
219
222
older_than_str = older_than .strftime ("%Y-%m-%d %H:%M:%S.%f" )[:- 3 ]
220
223
return spark .sql (f"""
@@ -232,8 +235,7 @@ def rewrite_data_files(
232
235
database_name : str ,
233
236
table_name : str ,
234
237
) -> int :
235
- """
236
- Rewrites data files from the specified Iceberg table.
238
+ """Rewrites data files from the specified Iceberg table.
237
239
238
240
Uses the 'sort' strategy and defaults to the table's sort-order.
239
241
@@ -252,6 +254,7 @@ def rewrite_data_files(
252
254
-------
253
255
int
254
256
The sum of the rewritten and new data files.
257
+
255
258
"""
256
259
result = spark .sql (f"""
257
260
CALL { catalog_name } .system.rewrite_data_files(
@@ -268,8 +271,7 @@ def rewrite_manifests(
268
271
database_name : str ,
269
272
table_name : str ,
270
273
) -> int :
271
- """
272
- Rewrites manifest files from the specified Iceberg table.
274
+ """Rewrites manifest files from the specified Iceberg table.
273
275
274
276
Parameters
275
277
----------
@@ -286,6 +288,7 @@ def rewrite_manifests(
286
288
-------
287
289
int
288
290
The sum of the rewritten and new manifest files.
291
+
289
292
"""
290
293
result = spark .sql (f"""
291
294
CALL { catalog_name } .system.rewrite_manifests(
0 commit comments