Skip to content
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 21 additions & 12 deletions pyiceberg/catalog/hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,23 +551,32 @@ def commit_table(

if hive_table and current_table:
# Table exists, update it.
new_parameters = _construct_parameters(

# Note on table properties:
# - Iceberg table properties are stored in both HMS and Iceberg metadata JSON.
# - Updates are reflected in both locations
# - Existing HMS table properties (set by external systems like Hive/Spark) are preserved.
#
# While it is possible to modify HMS table properties through this API, it is not recommended:
# - Mixing HMS-specific properties in Iceberg metadata can cause confusion
# - New/updated HMS table properties will also be stored in Iceberg metadata (even though it is HMS-specific)
# - HMS-native properties (set outside Iceberg) cannot be deleted since they are not visible to Iceberg
# (However, if you first SET an HMS property via Iceberg, it becomes tracked in Iceberg metadata,
# and can then be deleted via Iceberg - which removes it from both Iceberg metadata and HMS)
new_iceberg_properties = _construct_parameters(
metadata_location=updated_staged_table.metadata_location,
previous_metadata_location=current_table.metadata_location,
metadata_properties=updated_staged_table.properties,
)

# Detect properties that were removed from Iceberg metadata
removed_keys = current_table.properties.keys() - updated_staged_table.properties.keys()

# Sync HMS parameters: Iceberg metadata is the source of truth, HMS parameters are
# a projection of Iceberg state plus any HMS-only properties.
# Start with existing HMS params, remove deleted Iceberg properties, then apply Iceberg values.
merged_params = dict(hive_table.parameters or {})
for key in removed_keys:
merged_params.pop(key, None)
merged_params.update(new_parameters)
hive_table.parameters = merged_params
deleted_iceberg_properties = current_table.properties.keys() - updated_staged_table.properties.keys()
Comment thread
kevinjqliu marked this conversation as resolved.

# Merge: preserve HMS-native properties, remove deleted Iceberg properties, apply new Iceberg properties
existing_hms_parameters = dict(hive_table.parameters or {})
for key in deleted_iceberg_properties:
existing_hms_parameters.pop(key, None)
existing_hms_parameters.update(new_iceberg_properties)
hive_table.parameters = existing_hms_parameters

# Update hive's schema and properties
hive_table.sd = _construct_hive_storage_descriptor(
Expand Down