127127HIVE_KERBEROS_SERVICE_NAME = "hive.kerberos-service-name"
128128HIVE_KERBEROS_SERVICE_NAME_DEFAULT = "hive"
129129
130+ LOCK_ENABLED = "lock-enabled"
131+ DEFAULT_LOCK_ENABLED = True
132+
130133LOCK_CHECK_MIN_WAIT_TIME = "lock-check-min-wait-time"
131134LOCK_CHECK_MAX_WAIT_TIME = "lock-check-max-wait-time"
132135LOCK_CHECK_RETRIES = "lock-check-retries"
@@ -301,6 +304,7 @@ def __init__(self, name: str, **properties: str):
301304 super ().__init__ (name , ** properties )
302305 self ._client = self ._create_hive_client (properties )
303306
307+ self ._lock_enabled = property_as_bool (properties , LOCK_ENABLED , DEFAULT_LOCK_ENABLED )
304308 self ._lock_check_min_wait_time = property_as_float (properties , LOCK_CHECK_MIN_WAIT_TIME , DEFAULT_LOCK_CHECK_MIN_WAIT_TIME )
305309 self ._lock_check_max_wait_time = property_as_float (properties , LOCK_CHECK_MAX_WAIT_TIME , DEFAULT_LOCK_CHECK_MAX_WAIT_TIME )
306310 self ._lock_check_retries = property_as_float (
@@ -499,6 +503,91 @@ def _do_wait_for_lock() -> LockResponse:
499503
500504 return _do_wait_for_lock ()
501505
506+ def _do_commit (
507+ self , open_client : Client , table_identifier : Identifier , database_name : str , table_name : str ,
508+ requirements : tuple [TableRequirement , ...], updates : tuple [TableUpdate , ...],
509+ ) -> CommitTableResponse :
510+ """Perform the actual commit logic (get table, update, write metadata, alter/create in HMS).
511+
512+ This method contains the core commit logic, separated from locking concerns.
513+ """
514+ hive_table : HiveTable | None
515+ current_table : Table | None
516+ try :
517+ hive_table = self ._get_hive_table (open_client , database_name , table_name )
518+ current_table = self ._convert_hive_into_iceberg (hive_table )
519+ except NoSuchTableError :
520+ hive_table = None
521+ current_table = None
522+
523+ updated_staged_table = self ._update_and_stage_table (current_table , table_identifier , requirements , updates )
524+ if current_table and updated_staged_table .metadata == current_table .metadata :
525+ # no changes, do nothing
526+ return CommitTableResponse (metadata = current_table .metadata , metadata_location = current_table .metadata_location )
527+ self ._write_metadata (
528+ metadata = updated_staged_table .metadata ,
529+ io = updated_staged_table .io ,
530+ metadata_path = updated_staged_table .metadata_location ,
531+ )
532+
533+ if hive_table and current_table :
534+ # Table exists, update it.
535+
536+ # Note on table properties:
537+ # - Iceberg table properties are stored in both HMS and Iceberg metadata JSON.
538+ # - Updates are reflected in both locations
539+ # - Existing HMS table properties (set by external systems like Hive/Spark) are preserved.
540+ #
541+ # While it is possible to modify HMS table properties through this API, it is not recommended:
542+ # - Mixing HMS-specific properties in Iceberg metadata can cause confusion
543+ # - New/updated HMS table properties will also be stored in Iceberg metadata (even though it is HMS-specific)
544+ # - HMS-native properties (set outside Iceberg) cannot be deleted since they are not visible to Iceberg
545+ # (However, if you first SET an HMS property via Iceberg, it becomes tracked in Iceberg metadata,
546+ # and can then be deleted via Iceberg - which removes it from both Iceberg metadata and HMS)
547+ new_iceberg_properties = _construct_parameters (
548+ metadata_location = updated_staged_table .metadata_location ,
549+ previous_metadata_location = current_table .metadata_location ,
550+ metadata_properties = updated_staged_table .properties ,
551+ )
552+ # Detect properties that were removed from Iceberg metadata
553+ deleted_iceberg_properties = current_table .properties .keys () - updated_staged_table .properties .keys ()
554+
555+ # Merge: preserve HMS-native properties, remove deleted Iceberg properties, apply new Iceberg properties
556+ existing_hms_parameters = dict (hive_table .parameters or {})
557+ for key in deleted_iceberg_properties :
558+ existing_hms_parameters .pop (key , None )
559+ existing_hms_parameters .update (new_iceberg_properties )
560+ hive_table .parameters = existing_hms_parameters
561+
562+ # Update hive's schema and properties
563+ hive_table .sd = _construct_hive_storage_descriptor (
564+ updated_staged_table .schema (),
565+ updated_staged_table .location (),
566+ property_as_bool (self .properties , HIVE2_COMPATIBLE , HIVE2_COMPATIBLE_DEFAULT ),
567+ )
568+ open_client .alter_table_with_environment_context (
569+ dbname = database_name ,
570+ tbl_name = table_name ,
571+ new_tbl = hive_table ,
572+ environment_context = EnvironmentContext (properties = {DO_NOT_UPDATE_STATS : DO_NOT_UPDATE_STATS_DEFAULT }),
573+ )
574+ else :
575+ # Table does not exist, create it.
576+ hive_table = self ._convert_iceberg_into_hive (
577+ StagedTable (
578+ identifier = (database_name , table_name ),
579+ metadata = updated_staged_table .metadata ,
580+ metadata_location = updated_staged_table .metadata_location ,
581+ io = updated_staged_table .io ,
582+ catalog = self ,
583+ )
584+ )
585+ self ._create_hive_table (open_client , hive_table )
586+
587+ return CommitTableResponse (
588+ metadata = updated_staged_table .metadata , metadata_location = updated_staged_table .metadata_location
589+ )
590+
502591 def commit_table (
503592 self , table : Table , requirements : tuple [TableRequirement , ...], updates : tuple [TableUpdate , ...]
504593 ) -> CommitTableResponse :
@@ -521,95 +610,23 @@ def commit_table(
521610 # commit to hive
522611 # https://github.com/apache/hive/blob/master/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift#L1232
523612 with self ._client as open_client :
524- lock : LockResponse = open_client .lock (self ._create_lock_request (database_name , table_name ))
613+ if self ._lock_enabled :
614+ lock : LockResponse = open_client .lock (self ._create_lock_request (database_name , table_name ))
525615
526- try :
527- if lock .state != LockState .ACQUIRED :
528- if lock .state == LockState .WAITING :
529- self ._wait_for_lock (database_name , table_name , lock .lockid , open_client )
530- else :
531- raise CommitFailedException (f"Failed to acquire lock for { table_identifier } , state: { lock .state } " )
532-
533- hive_table : HiveTable | None
534- current_table : Table | None
535616 try :
536- hive_table = self ._get_hive_table (open_client , database_name , table_name )
537- current_table = self ._convert_hive_into_iceberg (hive_table )
538- except NoSuchTableError :
539- hive_table = None
540- current_table = None
541-
542- updated_staged_table = self ._update_and_stage_table (current_table , table_identifier , requirements , updates )
543- if current_table and updated_staged_table .metadata == current_table .metadata :
544- # no changes, do nothing
545- return CommitTableResponse (metadata = current_table .metadata , metadata_location = current_table .metadata_location )
546- self ._write_metadata (
547- metadata = updated_staged_table .metadata ,
548- io = updated_staged_table .io ,
549- metadata_path = updated_staged_table .metadata_location ,
550- )
551-
552- if hive_table and current_table :
553- # Table exists, update it.
554-
555- # Note on table properties:
556- # - Iceberg table properties are stored in both HMS and Iceberg metadata JSON.
557- # - Updates are reflected in both locations
558- # - Existing HMS table properties (set by external systems like Hive/Spark) are preserved.
559- #
560- # While it is possible to modify HMS table properties through this API, it is not recommended:
561- # - Mixing HMS-specific properties in Iceberg metadata can cause confusion
562- # - New/updated HMS table properties will also be stored in Iceberg metadata (even though it is HMS-specific)
563- # - HMS-native properties (set outside Iceberg) cannot be deleted since they are not visible to Iceberg
564- # (However, if you first SET an HMS property via Iceberg, it becomes tracked in Iceberg metadata,
565- # and can then be deleted via Iceberg - which removes it from both Iceberg metadata and HMS)
566- new_iceberg_properties = _construct_parameters (
567- metadata_location = updated_staged_table .metadata_location ,
568- previous_metadata_location = current_table .metadata_location ,
569- metadata_properties = updated_staged_table .properties ,
570- )
571- # Detect properties that were removed from Iceberg metadata
572- deleted_iceberg_properties = current_table .properties .keys () - updated_staged_table .properties .keys ()
573-
574- # Merge: preserve HMS-native properties, remove deleted Iceberg properties, apply new Iceberg properties
575- existing_hms_parameters = dict (hive_table .parameters or {})
576- for key in deleted_iceberg_properties :
577- existing_hms_parameters .pop (key , None )
578- existing_hms_parameters .update (new_iceberg_properties )
579- hive_table .parameters = existing_hms_parameters
580-
581- # Update hive's schema and properties
582- hive_table .sd = _construct_hive_storage_descriptor (
583- updated_staged_table .schema (),
584- updated_staged_table .location (),
585- property_as_bool (self .properties , HIVE2_COMPATIBLE , HIVE2_COMPATIBLE_DEFAULT ),
586- )
587- open_client .alter_table_with_environment_context (
588- dbname = database_name ,
589- tbl_name = table_name ,
590- new_tbl = hive_table ,
591- environment_context = EnvironmentContext (properties = {DO_NOT_UPDATE_STATS : DO_NOT_UPDATE_STATS_DEFAULT }),
592- )
593- else :
594- # Table does not exist, create it.
595- hive_table = self ._convert_iceberg_into_hive (
596- StagedTable (
597- identifier = (database_name , table_name ),
598- metadata = updated_staged_table .metadata ,
599- metadata_location = updated_staged_table .metadata_location ,
600- io = updated_staged_table .io ,
601- catalog = self ,
602- )
603- )
604- self ._create_hive_table (open_client , hive_table )
605- except WaitingForLockException as e :
606- raise CommitFailedException (f"Failed to acquire lock for { table_identifier } , state: { lock .state } " ) from e
607- finally :
608- open_client .unlock (UnlockRequest (lockid = lock .lockid ))
609-
610- return CommitTableResponse (
611- metadata = updated_staged_table .metadata , metadata_location = updated_staged_table .metadata_location
612- )
617+ if lock .state != LockState .ACQUIRED :
618+ if lock .state == LockState .WAITING :
619+ self ._wait_for_lock (database_name , table_name , lock .lockid , open_client )
620+ else :
621+ raise CommitFailedException (f"Failed to acquire lock for { table_identifier } , state: { lock .state } " )
622+
623+ return self ._do_commit (open_client , table_identifier , database_name , table_name , requirements , updates )
624+ except WaitingForLockException as e :
625+ raise CommitFailedException (f"Failed to acquire lock for { table_identifier } , state: { lock .state } " ) from e
626+ finally :
627+ open_client .unlock (UnlockRequest (lockid = lock .lockid ))
628+ else :
629+ return self ._do_commit (open_client , table_identifier , database_name , table_name , requirements , updates )
613630
614631 def load_table (self , identifier : str | Identifier ) -> Table :
615632 """Load the table's metadata and return the table instance.
0 commit comments