4040 ICEBERG ,
4141 LOCATION ,
4242 METADATA_LOCATION ,
43+ PREVIOUS_METADATA_LOCATION ,
4344 TABLE_TYPE ,
4445 Catalog ,
4546 Identifier ,
4647 Properties ,
4748 PropertiesUpdateSummary ,
4849)
4950from pyiceberg .exceptions import (
51+ CommitFailedException ,
5052 NamespaceAlreadyExistsError ,
5153 NamespaceNotEmptyError ,
5254 NoSuchIcebergTableError ,
5961from pyiceberg .partitioning import UNPARTITIONED_PARTITION_SPEC , PartitionSpec
6062from pyiceberg .schema import Schema
6163from pyiceberg .serializers import FromInputFile
62- from pyiceberg .table import CommitTableRequest , CommitTableResponse , Table
64+ from pyiceberg .table import CommitTableRequest , CommitTableResponse , Table , update_table_metadata
6365from pyiceberg .table .metadata import new_table_metadata
6466from pyiceberg .table .sorting import UNSORTED_SORT_ORDER , SortOrder
6567from pyiceberg .typedef import EMPTY_DICT
6668
67-
68- def _construct_parameters (metadata_location : str ) -> Properties :
69- return {TABLE_TYPE : ICEBERG .upper (), METADATA_LOCATION : metadata_location }
70-
71-
72- def _construct_create_table_input (table_name : str , metadata_location : str , properties : Properties ) -> TableInputTypeDef :
69+ # If Glue should skip archiving an old table version when creating a new version in a commit. By
70+ # default, Glue archives all old table versions after an UpdateTable call, but Glue has a default
71+ # max number of archived table versions (can be increased). So for streaming use case with lots
72+ # of commits, it is recommended to set this value to true.
73+ GLUE_SKIP_ARCHIVE = "glue.skip-archive"
74+ GLUE_SKIP_ARCHIVE_DEFAULT = True
75+
76+
77+ def _construct_parameters (
78+ metadata_location : str , glue_table : Optional [TableTypeDef ] = None , prev_metadata_location : Optional [str ] = None
79+ ) -> Properties :
80+ new_parameters = glue_table .get ("Parameters" , {}) if glue_table else {}
81+ new_parameters .update ({TABLE_TYPE : ICEBERG .upper (), METADATA_LOCATION : metadata_location })
82+ if prev_metadata_location :
83+ new_parameters [PREVIOUS_METADATA_LOCATION ] = prev_metadata_location
84+ return new_parameters
85+
86+
87+ def _construct_table_input (
88+ table_name : str ,
89+ metadata_location : str ,
90+ properties : Properties ,
91+ glue_table : Optional [TableTypeDef ] = None ,
92+ prev_metadata_location : Optional [str ] = None ,
93+ ) -> TableInputTypeDef :
7394 table_input : TableInputTypeDef = {
7495 "Name" : table_name ,
7596 "TableType" : EXTERNAL_TABLE ,
76- "Parameters" : _construct_parameters (metadata_location ),
97+ "Parameters" : _construct_parameters (metadata_location , glue_table , prev_metadata_location ),
7798 }
7899
79100 if "Description" in properties :
@@ -177,6 +198,28 @@ def _create_glue_table(self, database_name: str, table_name: str, table_input: T
177198 except self .glue .exceptions .EntityNotFoundException as e :
178199 raise NoSuchNamespaceError (f"Database { database_name } does not exist" ) from e
179200
201+ def _update_glue_table (self , database_name : str , table_name : str , table_input : TableInputTypeDef , version_id : str ) -> None :
202+ try :
203+ self .glue .update_table (
204+ DatabaseName = database_name ,
205+ TableInput = table_input ,
206+ SkipArchive = self .properties .get (GLUE_SKIP_ARCHIVE , GLUE_SKIP_ARCHIVE_DEFAULT ),
207+ VersionId = version_id ,
208+ )
209+ except self .glue .exceptions .EntityNotFoundException as e :
210+ raise NoSuchTableError (f"Table does not exist: { database_name } .{ table_name } (Glue table version { version_id } )" ) from e
211+ except self .glue .exceptions .ConcurrentModificationException as e :
212+ raise CommitFailedException (
213+ f"Cannot commit { database_name } .{ table_name } because Glue detected concurrent update to table version { version_id } "
214+ ) from e
215+
216+ def _get_glue_table (self , database_name : str , table_name : str ) -> TableTypeDef :
217+ try :
218+ load_table_response = self .glue .get_table (DatabaseName = database_name , Name = table_name )
219+ return load_table_response ["Table" ]
220+ except self .glue .exceptions .EntityNotFoundException as e :
221+ raise NoSuchTableError (f"Table does not exist: { database_name } .{ table_name } " ) from e
222+
180223 def create_table (
181224 self ,
182225 identifier : Union [str , Identifier ],
@@ -215,7 +258,7 @@ def create_table(
215258 io = load_file_io (properties = self .properties , location = metadata_location )
216259 self ._write_metadata (metadata , io , metadata_location )
217260
218- table_input = _construct_create_table_input (table_name , metadata_location , properties )
261+ table_input = _construct_table_input (table_name , metadata_location , properties )
219262 database_name , table_name = self .identifier_to_database_and_table (identifier )
220263 self ._create_glue_table (database_name = database_name , table_name = table_name , table_input = table_input )
221264
@@ -247,8 +290,52 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons
247290
248291 Raises:
249292 NoSuchTableError: If a table with the given identifier does not exist.
293+ CommitFailedException: If the commit failed.
250294 """
251- raise NotImplementedError
295+ identifier_tuple = self .identifier_to_tuple_without_catalog (
296+ tuple (table_request .identifier .namespace .root + [table_request .identifier .name ])
297+ )
298+ database_name , table_name = self .identifier_to_database_and_table (identifier_tuple )
299+
300+ current_glue_table = self ._get_glue_table (database_name = database_name , table_name = table_name )
301+ glue_table_version_id = current_glue_table .get ("VersionId" )
302+ if not glue_table_version_id :
303+ raise CommitFailedException (f"Cannot commit { database_name } .{ table_name } because Glue table version id is missing" )
304+ current_table = self ._convert_glue_to_iceberg (glue_table = current_glue_table )
305+ base_metadata = current_table .metadata
306+
307+ # Validate the update requirements
308+ for requirement in table_request .requirements :
309+ requirement .validate (base_metadata )
310+
311+ updated_metadata = update_table_metadata (base_metadata , table_request .updates )
312+ if updated_metadata == base_metadata :
313+ # no changes, do nothing
314+ return CommitTableResponse (metadata = base_metadata , metadata_location = current_table .metadata_location )
315+
316+ # write new metadata
317+ new_metadata_version = self ._parse_metadata_version (current_table .metadata_location ) + 1
318+ new_metadata_location = self ._get_metadata_location (current_table .metadata .location , new_metadata_version )
319+ self ._write_metadata (updated_metadata , current_table .io , new_metadata_location )
320+
321+ update_table_input = _construct_table_input (
322+ table_name = table_name ,
323+ metadata_location = new_metadata_location ,
324+ properties = current_table .properties ,
325+ glue_table = current_glue_table ,
326+ prev_metadata_location = current_table .metadata_location ,
327+ )
328+
329+ # Pass `version_id` to implement optimistic locking: it ensures updates are rejected if concurrent
330+ # modifications occur. See more details at https://iceberg.apache.org/docs/latest/aws/#optimistic-locking
331+ self ._update_glue_table (
332+ database_name = database_name ,
333+ table_name = table_name ,
334+ table_input = update_table_input ,
335+ version_id = glue_table_version_id ,
336+ )
337+
338+ return CommitTableResponse (metadata = updated_metadata , metadata_location = new_metadata_location )
252339
253340 def load_table (self , identifier : Union [str , Identifier ]) -> Table :
254341 """Load the table's metadata and returns the table instance.
@@ -267,12 +354,8 @@ def load_table(self, identifier: Union[str, Identifier]) -> Table:
267354 """
268355 identifier_tuple = self .identifier_to_tuple_without_catalog (identifier )
269356 database_name , table_name = self .identifier_to_database_and_table (identifier_tuple , NoSuchTableError )
270- try :
271- load_table_response = self .glue .get_table (DatabaseName = database_name , Name = table_name )
272- except self .glue .exceptions .EntityNotFoundException as e :
273- raise NoSuchTableError (f"Table does not exist: { database_name } .{ table_name } " ) from e
274357
275- return self ._convert_glue_to_iceberg (load_table_response [ "Table" ] )
358+ return self ._convert_glue_to_iceberg (self . _get_glue_table ( database_name = database_name , table_name = table_name ) )
276359
277360 def drop_table (self , identifier : Union [str , Identifier ]) -> None :
278361 """Drop a table.
0 commit comments