1717
1818
1919import logging
20+ from types import TracebackType
2021from typing import (
2122 TYPE_CHECKING ,
2223 Any ,
5556from pyiceberg .serializers import FromInputFile , ToOutputFile
5657from pyiceberg .table import (
5758 CommitTableResponse ,
59+ CreateTableTransaction ,
60+ StagedTable ,
5861 Table ,
5962)
6063from pyiceberg .table .metadata import TableMetadata
@@ -314,6 +317,47 @@ def add_glue_catalog_id(params: dict[str, str], **kwargs: Any) -> None:
314317 event_system .register ("provide-client-params.glue" , add_glue_catalog_id )
315318
316319
320+ class _S3TablesCreateTableTransaction (CreateTableTransaction ):
321+ """CreateTableTransaction that cleans up the S3 Tables staging table on failure.
322+
323+ When ``create_table_transaction`` pre-creates a Glue table entry for an S3 Tables
324+ federated database, that entry must be deleted if the transaction is never committed
325+ (e.g. an exception inside the ``with`` block) or if the commit itself fails.
326+ """
327+
328+ def __init__ (self , staged_table : StagedTable , catalog : "GlueCatalog" , database_name : str , table_name : str ):
329+ super ().__init__ (staged_table )
330+ self ._catalog = catalog
331+ self ._database_name = database_name
332+ self ._table_name = table_name
333+ self ._staging_table_needs_cleanup = True
334+
335+ def commit_transaction (self ) -> Table :
336+ try :
337+ result = super ().commit_transaction ()
338+ self ._staging_table_needs_cleanup = False # commit succeeded; staging table is now the real table
339+ return result
340+ except Exception :
341+ self ._cleanup_staging_table ()
342+ raise
343+
344+ def __exit__ (self , exctype : type [BaseException ] | None , excinst : BaseException | None , exctb : TracebackType | None ) -> None :
345+ super ().__exit__ (exctype , excinst , exctb )
346+ self ._cleanup_staging_table ()
347+
348+ def _cleanup_staging_table (self ) -> None :
349+ if not self ._staging_table_needs_cleanup :
350+ return
351+ self ._staging_table_needs_cleanup = False
352+ try :
353+ self ._catalog .glue .delete_table (DatabaseName = self ._database_name , Name = self ._table_name )
354+ except Exception :
355+ logger .warning (
356+ f"Failed to clean up S3 Tables staging table { self ._database_name } .{ self ._table_name } " ,
357+ exc_info = logger .isEnabledFor (logging .DEBUG ),
358+ )
359+
360+
317361class GlueCatalog (MetastoreCatalog ):
318362 glue : "GlueClient"
319363
@@ -601,6 +645,82 @@ def create_table(
601645 catalog = self ,
602646 )
603647
648+ def create_table_transaction (
649+ self ,
650+ identifier : str | Identifier ,
651+ schema : Union [Schema , "pa.Schema" ],
652+ location : str | None = None ,
653+ partition_spec : PartitionSpec = UNPARTITIONED_PARTITION_SPEC ,
654+ sort_order : SortOrder = UNSORTED_SORT_ORDER ,
655+ properties : Properties = EMPTY_DICT ,
656+ ) -> CreateTableTransaction :
657+ """Create a CreateTableTransaction.
658+
659+ For S3 Tables federated databases, storage must be allocated before the table
660+ metadata can be built, because S3 Tables manages the table location. This
661+ override pre-creates a minimal Glue table entry, retrieves the managed location,
662+ and then builds the staged table targeting that location. The commit path in
663+ ``commit_table`` will find the existing Glue table and update it with the final
664+ metadata pointer.
665+
666+ For non-S3 Tables databases, this delegates to the base class.
667+ """
668+ database_name , table_name = self .identifier_to_database_and_table (identifier )
669+
670+ if not self ._is_s3tables_database (database_name ):
671+ return super ().create_table_transaction (
672+ identifier = identifier ,
673+ schema = schema ,
674+ location = location ,
675+ partition_spec = partition_spec ,
676+ sort_order = sort_order ,
677+ properties = properties ,
678+ )
679+
680+ if location is not None :
681+ raise ValueError (
682+ f"Cannot specify a location for S3 Tables table { database_name } .{ table_name } . "
683+ "S3 Tables manages the storage location automatically."
684+ )
685+
686+ # Create a minimal table in Glue so S3 Tables allocates storage
687+ self ._create_glue_table (
688+ database_name = database_name ,
689+ table_name = table_name ,
690+ table_input = {
691+ "Name" : table_name ,
692+ "Parameters" : {"format" : "ICEBERG" },
693+ },
694+ )
695+
696+ try :
697+ # Retrieve the managed storage location.
698+ glue_table = self ._get_glue_table (database_name = database_name , table_name = table_name )
699+ storage_descriptor = glue_table .get ("StorageDescriptor" , {})
700+ managed_location = storage_descriptor .get ("Location" )
701+ if not managed_location :
702+ raise ValueError (f"S3 Tables did not assign a storage location for { database_name } .{ table_name } " )
703+
704+ staged_table = self ._create_staged_table (
705+ identifier = identifier ,
706+ schema = schema ,
707+ location = managed_location ,
708+ partition_spec = partition_spec ,
709+ sort_order = sort_order ,
710+ properties = properties ,
711+ )
712+ except Exception :
713+ try :
714+ self .glue .delete_table (DatabaseName = database_name , Name = table_name )
715+ except Exception :
716+ logger .warning (
717+ f"Failed to clean up S3 Tables table { database_name } .{ table_name } " ,
718+ exc_info = logger .isEnabledFor (logging .DEBUG ),
719+ )
720+ raise
721+
722+ return _S3TablesCreateTableTransaction (staged_table , self , database_name , table_name )
723+
604724 def register_table (self , identifier : str | Identifier , metadata_location : str ) -> Table :
605725 """Register a new table using existing metadata.
606726
@@ -649,7 +769,12 @@ def commit_table(
649769 try :
650770 current_glue_table = self ._get_glue_table (database_name = database_name , table_name = table_name )
651771 glue_table_version_id = current_glue_table .get ("VersionId" )
652- current_table = self ._convert_glue_to_iceberg (glue_table = current_glue_table )
772+ # A staging table (pre-created by create_table_transaction for S3 Tables)
773+ # exists in Glue but has no metadata_location yet — skip loading Iceberg metadata.
774+ if current_glue_table .get ("Parameters" , {}).get (METADATA_LOCATION ):
775+ current_table = self ._convert_glue_to_iceberg (glue_table = current_glue_table )
776+ else :
777+ current_table = None
653778 except NoSuchTableError :
654779 current_glue_table = None
655780 glue_table_version_id = None
@@ -669,8 +794,9 @@ def commit_table(
669794 metadata_path = updated_staged_table .metadata_location ,
670795 )
671796
672- if current_table :
673- # table exists, update the table
797+ if current_glue_table is not None :
798+ # Glue table exists — either a fully committed table or a staging table
799+ # pre-created by create_table_transaction. Update it with the metadata pointer.
674800 if not glue_table_version_id :
675801 raise CommitFailedException (
676802 f"Cannot commit { database_name } .{ table_name } because Glue table version id is missing"
@@ -684,7 +810,7 @@ def commit_table(
684810 properties = updated_staged_table .properties ,
685811 metadata = updated_staged_table .metadata ,
686812 glue_table = current_glue_table ,
687- prev_metadata_location = current_table .metadata_location ,
813+ prev_metadata_location = current_table .metadata_location if current_table else None ,
688814 )
689815 self ._update_glue_table (
690816 database_name = database_name ,
0 commit comments