Skip to content

Commit 11a2281

Browse files
kevinjqliuCopilot
andauthored
add hive catalog table properties docs and refactor (#2941)
<!-- Thanks for opening a pull request! --> <!-- In the case this PR will resolve an issue, please replace ${GITHUB_ISSUE_ID} below with the actual Github issue id. --> <!-- Closes #${GITHUB_ISSUE_ID} --> # Rationale for this change Follow up to #2927 Add notes on updating iceberg table properties and hive table properties when using the hive catalog (HMS) Minor refactor on variable naming to be more explicit I added a test for the new edge case as well ## Are these changes tested? ## Are there any user-facing changes? <!-- In the case of user-facing changes, please add the changelog label. --> --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 2d6a1b9 commit 11a2281

File tree

2 files changed

+81
-12
lines changed

2 files changed

+81
-12
lines changed

pyiceberg/catalog/hive.py

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -551,23 +551,32 @@ def commit_table(
551551

552552
if hive_table and current_table:
553553
# Table exists, update it.
554-
new_parameters = _construct_parameters(
554+
555+
# Note on table properties:
556+
# - Iceberg table properties are stored in both HMS and Iceberg metadata JSON.
557+
# - Updates are reflected in both locations
558+
# - Existing HMS table properties (set by external systems like Hive/Spark) are preserved.
559+
#
560+
# While it is possible to modify HMS table properties through this API, it is not recommended:
561+
# - Mixing HMS-specific properties in Iceberg metadata can cause confusion
562+
# - New/updated HMS table properties will also be stored in Iceberg metadata (even though it is HMS-specific)
563+
# - HMS-native properties (set outside Iceberg) cannot be deleted since they are not visible to Iceberg
564+
# (However, if you first SET an HMS property via Iceberg, it becomes tracked in Iceberg metadata,
565+
# and can then be deleted via Iceberg - which removes it from both Iceberg metadata and HMS)
566+
new_iceberg_properties = _construct_parameters(
555567
metadata_location=updated_staged_table.metadata_location,
556568
previous_metadata_location=current_table.metadata_location,
557569
metadata_properties=updated_staged_table.properties,
558570
)
559-
560571
# Detect properties that were removed from Iceberg metadata
561-
removed_keys = current_table.properties.keys() - updated_staged_table.properties.keys()
562-
563-
# Sync HMS parameters: Iceberg metadata is the source of truth, HMS parameters are
564-
# a projection of Iceberg state plus any HMS-only properties.
565-
# Start with existing HMS params, remove deleted Iceberg properties, then apply Iceberg values.
566-
merged_params = dict(hive_table.parameters or {})
567-
for key in removed_keys:
568-
merged_params.pop(key, None)
569-
merged_params.update(new_parameters)
570-
hive_table.parameters = merged_params
572+
deleted_iceberg_properties = current_table.properties.keys() - updated_staged_table.properties.keys()
573+
574+
# Merge: preserve HMS-native properties, remove deleted Iceberg properties, apply new Iceberg properties
575+
existing_hms_parameters = dict(hive_table.parameters or {})
576+
for key in deleted_iceberg_properties:
577+
existing_hms_parameters.pop(key, None)
578+
existing_hms_parameters.update(new_iceberg_properties)
579+
hive_table.parameters = existing_hms_parameters
571580

572581
# Update hive's schema and properties
573582
hive_table.sd = _construct_hive_storage_descriptor(

tests/integration/test_reads.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,66 @@ def test_hive_critical_properties_always_from_iceberg(catalog: Catalog) -> None:
286286
assert new_metadata_location != original_metadata_location, "metadata_location should be updated!"
287287

288288

289+
@pytest.mark.integration
290+
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive")])
291+
def test_hive_native_properties_cannot_be_deleted_via_iceberg(catalog: Catalog) -> None:
292+
"""Test that HMS-native properties (set outside Iceberg) cannot be deleted via Iceberg.
293+
294+
HMS-native properties are not visible to Iceberg, so remove_properties fails with KeyError.
295+
However, if you first SET an HMS property via Iceberg (making it tracked in Iceberg metadata),
296+
it can then be deleted via Iceberg.
297+
"""
298+
table = create_table(catalog)
299+
hive_client: _HiveClient = _HiveClient(catalog.properties["uri"])
300+
301+
# Set an HMS-native property directly (not through Iceberg)
302+
with hive_client as open_client:
303+
hive_table = open_client.get_table(*TABLE_NAME)
304+
hive_table.parameters["hms_native_prop"] = "native_value"
305+
open_client.alter_table(TABLE_NAME[0], TABLE_NAME[1], hive_table)
306+
307+
# Verify the HMS-native property exists in HMS
308+
with hive_client as open_client:
309+
hive_table = open_client.get_table(*TABLE_NAME)
310+
assert hive_table.parameters.get("hms_native_prop") == "native_value"
311+
312+
# Refresh the Iceberg table to get the latest state
313+
table.refresh()
314+
315+
# Verify the HMS-native property is NOT visible in Iceberg
316+
assert "hms_native_prop" not in table.properties
317+
318+
# Attempt to remove the HMS-native property via Iceberg - this should fail
319+
# because the property is not tracked in Iceberg metadata (not visible to Iceberg)
320+
with pytest.raises(KeyError):
321+
table.transaction().remove_properties("hms_native_prop").commit_transaction()
322+
323+
# HMS-native property should still exist (cannot be deleted via Iceberg)
324+
with hive_client as open_client:
325+
hive_table = open_client.get_table(*TABLE_NAME)
326+
assert hive_table.parameters.get("hms_native_prop") == "native_value", (
327+
"HMS-native property should still exist since Iceberg removal failed!"
328+
)
329+
330+
# Now SET the same property via Iceberg (this makes it tracked in Iceberg metadata)
331+
table.transaction().set_properties({"hms_native_prop": "iceberg_value"}).commit_transaction()
332+
333+
# Verify it's updated in both places
334+
with hive_client as open_client:
335+
hive_table = open_client.get_table(*TABLE_NAME)
336+
assert hive_table.parameters.get("hms_native_prop") == "iceberg_value"
337+
338+
# Now we CAN delete it via Iceberg (because it's now tracked in Iceberg metadata)
339+
table.transaction().remove_properties("hms_native_prop").commit_transaction()
340+
341+
# Property should be deleted from HMS
342+
with hive_client as open_client:
343+
hive_table = open_client.get_table(*TABLE_NAME)
344+
assert hive_table.parameters.get("hms_native_prop") is None, (
345+
"Property should be deletable after being SET via Iceberg (making it tracked)!"
346+
)
347+
348+
289349
@pytest.mark.integration
290350
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
291351
def test_table_properties_dict(catalog: Catalog) -> None:

0 commit comments

Comments
 (0)