Skip to content

Commit 291311f

Browse files
committed
Address comments
1 parent ae8a024 commit 291311f

File tree

2 files changed

+52
-12
lines changed

2 files changed

+52
-12
lines changed

pyiceberg/catalog/hive.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -558,22 +558,22 @@ def commit_table(
558558
)
559559

560560
# Detect properties that were removed from Iceberg metadata
561-
old_iceberg_keys = set(current_table.properties.keys())
562-
new_iceberg_keys = set(updated_staged_table.properties.keys())
563-
removed_keys = old_iceberg_keys - new_iceberg_keys
561+
removed_keys = current_table.properties.keys() - updated_staged_table.properties.keys()
564562

565-
# Merge HMS parameters: preserve HMS-only properties (e.g., table_category) while
566-
# ensuring Iceberg controls its metadata. Start with HMS params, remove deleted
567-
# Iceberg properties, then update with new Iceberg values (which take precedence).
568-
updated_parameters = {k: v for k, v in hive_table.parameters.items() if k not in removed_keys}
569-
updated_parameters.update(new_parameters)
570-
hive_table.parameters = updated_parameters
563+
# Sync HMS parameters: Iceberg metadata is the source of truth, HMS parameters are
564+
# a projection of Iceberg state plus any HMS-only properties.
565+
# Start with existing HMS params, remove deleted Iceberg properties, then apply Iceberg values.
566+
merged_params = dict(hive_table.parameters or {})
567+
for key in removed_keys:
568+
merged_params.pop(key, None)
569+
merged_params.update(new_parameters)
570+
hive_table.parameters = merged_params
571571

572572
# Update hive's schema and properties
573573
hive_table.sd = _construct_hive_storage_descriptor(
574574
updated_staged_table.schema(),
575575
updated_staged_table.location(),
576-
property_as_bool(updated_staged_table.properties, HIVE2_COMPATIBLE, HIVE2_COMPATIBLE_DEFAULT),
576+
property_as_bool(self.properties, HIVE2_COMPATIBLE, HIVE2_COMPATIBLE_DEFAULT),
577577
)
578578
open_client.alter_table_with_environment_context(
579579
dbname=database_name,

tests/integration/test_reads.py

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,8 +175,48 @@ def test_hive_preserves_hms_specific_properties(catalog: Catalog) -> None:
175175

176176
@pytest.mark.integration
177177
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive")])
178-
def test_hive_iceberg_property_takes_precedence(catalog: Catalog) -> None:
179-
"""Test that Iceberg properties take precedence over conflicting HMS properties.
178+
def test_iceberg_property_deletion_not_restored_from_old_hms_state(catalog: Catalog) -> None:
179+
"""Test that deleted Iceberg properties are truly removed and not restored from old HMS state.
180+
181+
When a property is removed through Iceberg, it should be deleted from HMS and not
182+
come back from the old HMS state during merge operations.
183+
"""
184+
table = create_table(catalog)
185+
hive_client: _HiveClient = _HiveClient(catalog.properties["uri"])
186+
187+
# Set multiple Iceberg properties
188+
table.transaction().set_properties({"prop_to_keep": "keep_value", "prop_to_delete": "delete_me"}).commit_transaction()
189+
190+
# Verify both properties exist
191+
with hive_client as open_client:
192+
hive_table = open_client.get_table(*TABLE_NAME)
193+
assert hive_table.parameters.get("prop_to_keep") == "keep_value"
194+
assert hive_table.parameters.get("prop_to_delete") == "delete_me"
195+
196+
# Delete one property through Iceberg
197+
table.transaction().remove_properties("prop_to_delete").commit_transaction()
198+
199+
# Verify property is deleted from HMS
200+
with hive_client as open_client:
201+
hive_table = open_client.get_table(*TABLE_NAME)
202+
assert hive_table.parameters.get("prop_to_keep") == "keep_value"
203+
assert hive_table.parameters.get("prop_to_delete") is None, "Deleted property should not exist in HMS!"
204+
205+
# Perform another Iceberg commit
206+
table.transaction().set_properties({"new_prop": "new_value"}).commit_transaction()
207+
208+
# Ensure deleted property doesn't come back from old state
209+
with hive_client as open_client:
210+
hive_table = open_client.get_table(*TABLE_NAME)
211+
assert hive_table.parameters.get("prop_to_keep") == "keep_value"
212+
assert hive_table.parameters.get("new_prop") == "new_value"
213+
assert hive_table.parameters.get("prop_to_delete") is None, "Deleted property should NOT be restored from old HMS state!"
214+
215+
216+
@pytest.mark.integration
217+
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive")])
218+
def test_iceberg_metadata_is_source_of_truth(catalog: Catalog) -> None:
219+
"""Test that Iceberg metadata is the source of truth for all Iceberg-managed properties.
180220
181221
If an external tool sets an HMS property with the same name as an Iceberg-managed
182222
property, Iceberg's value should win during commits.

0 commit comments

Comments
 (0)