Skip to content

Commit ae8a024

Browse files
committed
Add more tests, comment
1 parent be4e2a7 commit ae8a024

File tree

2 files changed

+77
-1
lines changed

2 files changed

+77
-1
lines changed

pyiceberg/catalog/hive.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -562,7 +562,9 @@ def commit_table(
562562
new_iceberg_keys = set(updated_staged_table.properties.keys())
563563
removed_keys = old_iceberg_keys - new_iceberg_keys
564564

565-
# Start with current HMS parameters, remove deleted Iceberg properties, then update with new ones
565+
# Merge HMS parameters: preserve HMS-only properties (e.g., table_category) while
566+
# ensuring Iceberg controls its metadata. Start with HMS params, remove deleted
567+
# Iceberg properties, then update with new Iceberg values (which take precedence).
566568
updated_parameters = {k: v for k, v in hive_table.parameters.items() if k not in removed_keys}
567569
updated_parameters.update(new_parameters)
568570
hive_table.parameters = updated_parameters

tests/integration/test_reads.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,80 @@ def test_hive_preserves_hms_specific_properties(catalog: Catalog) -> None:
173173
assert hive_table.parameters.get("iceberg_property") == "new_value"
174174

175175

176+
@pytest.mark.integration
177+
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive")])
178+
def test_hive_iceberg_property_takes_precedence(catalog: Catalog) -> None:
179+
"""Test that Iceberg properties take precedence over conflicting HMS properties.
180+
181+
If an external tool sets an HMS property with the same name as an Iceberg-managed
182+
property, Iceberg's value should win during commits.
183+
"""
184+
table = create_table(catalog)
185+
hive_client: _HiveClient = _HiveClient(catalog.properties["uri"])
186+
187+
# Set an Iceberg property
188+
table.transaction().set_properties({"my_prop": "iceberg_value"}).commit_transaction()
189+
190+
# External tool modifies the same property in HMS
191+
with hive_client as open_client:
192+
hive_table = open_client.get_table(*TABLE_NAME)
193+
hive_table.parameters["my_prop"] = "hms_value" # Conflicting value
194+
open_client.alter_table(TABLE_NAME[0], TABLE_NAME[1], hive_table)
195+
196+
# Verify HMS has the external value
197+
with hive_client as open_client:
198+
hive_table = open_client.get_table(*TABLE_NAME)
199+
assert hive_table.parameters.get("my_prop") == "hms_value"
200+
201+
# Perform another Iceberg commit
202+
table.transaction().set_properties({"another_prop": "test"}).commit_transaction()
203+
204+
# Iceberg's value should take precedence
205+
with hive_client as open_client:
206+
hive_table = open_client.get_table(*TABLE_NAME)
207+
assert hive_table.parameters.get("my_prop") == "iceberg_value", (
208+
"Iceberg property value should take precedence over conflicting HMS value!"
209+
)
210+
assert hive_table.parameters.get("another_prop") == "test"
211+
212+
213+
@pytest.mark.integration
214+
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive")])
215+
def test_hive_critical_properties_always_from_iceberg(catalog: Catalog) -> None:
216+
"""Test that critical properties (EXTERNAL, table_type, metadata_location) always come from Iceberg.
217+
218+
These properties should never be carried over from old HMS state.
219+
"""
220+
table = create_table(catalog)
221+
hive_client: _HiveClient = _HiveClient(catalog.properties["uri"])
222+
223+
# Get original metadata_location
224+
with hive_client as open_client:
225+
hive_table = open_client.get_table(*TABLE_NAME)
226+
original_metadata_location = hive_table.parameters.get("metadata_location")
227+
assert original_metadata_location is not None
228+
assert hive_table.parameters.get("EXTERNAL") == "TRUE"
229+
assert hive_table.parameters.get("table_type") == "ICEBERG"
230+
231+
# Try to tamper with critical properties via HMS
232+
with hive_client as open_client:
233+
hive_table = open_client.get_table(*TABLE_NAME)
234+
hive_table.parameters["EXTERNAL"] = "FALSE" # Try to change
235+
open_client.alter_table(TABLE_NAME[0], TABLE_NAME[1], hive_table)
236+
237+
# Perform Iceberg commit
238+
table.transaction().set_properties({"test_prop": "value"}).commit_transaction()
239+
240+
# Critical properties should be restored by Iceberg
241+
with hive_client as open_client:
242+
hive_table = open_client.get_table(*TABLE_NAME)
243+
assert hive_table.parameters.get("EXTERNAL") == "TRUE", "EXTERNAL should always be TRUE from Iceberg!"
244+
assert hive_table.parameters.get("table_type") == "ICEBERG", "table_type should always be ICEBERG!"
245+
# metadata_location should be updated (new metadata file)
246+
new_metadata_location = hive_table.parameters.get("metadata_location")
247+
assert new_metadata_location != original_metadata_location, "metadata_location should be updated!"
248+
249+
176250
@pytest.mark.integration
177251
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
178252
def test_table_properties_dict(catalog: Catalog) -> None:

0 commit comments

Comments
 (0)