@@ -428,6 +428,44 @@ def append(self, df: pa.Table, snapshot_properties: Dict[str, str] = EMPTY_DICT)
428428 for data_file in data_files :
429429 update_snapshot .append_data_file (data_file )
430430
431+ def merge_append (self , df : pa .Table , snapshot_properties : Dict [str , str ] = EMPTY_DICT ) -> None :
432+ """
433+ Shorthand API for appending a PyArrow table to a table transaction.
434+
435+ Args:
436+ df: The Arrow dataframe that will be appended to overwrite the table
437+ snapshot_properties: Custom properties to be added to the snapshot summary
438+ """
439+ try :
440+ import pyarrow as pa
441+ except ModuleNotFoundError as e :
442+ raise ModuleNotFoundError ("For writes PyArrow needs to be installed" ) from e
443+
444+ if not isinstance (df , pa .Table ):
445+ raise ValueError (f"Expected PyArrow table, got: { df } " )
446+
447+ if unsupported_partitions := [
448+ field for field in self .table_metadata .spec ().fields if not field .transform .supports_pyarrow_transform
449+ ]:
450+ raise ValueError (
451+ f"Not all partition types are supported for writes. Following partitions cannot be written using pyarrow: { unsupported_partitions } ."
452+ )
453+
454+ _check_schema_compatible (self ._table .schema (), other_schema = df .schema )
455+ # cast if the two schemas are compatible but not equal
456+ table_arrow_schema = self ._table .schema ().as_arrow ()
457+ if table_arrow_schema != df .schema :
458+ df = df .cast (table_arrow_schema )
459+
460+ with self .update_snapshot (snapshot_properties = snapshot_properties ).merge_append () as update_snapshot :
461+ # skip writing data files if the dataframe is empty
462+ if df .shape [0 ] > 0 :
463+ data_files = _dataframe_to_data_files (
464+ table_metadata = self ._table .metadata , write_uuid = update_snapshot .commit_uuid , df = df ,
465+ io = self ._table .io
466+ )
467+ for data_file in data_files :
468+ update_snapshot .append_data_file (data_file )
431469 def overwrite (
432470 self , df : pa .Table , overwrite_filter : BooleanExpression = ALWAYS_TRUE , snapshot_properties : Dict [str , str ] = EMPTY_DICT
433471 ) -> None :
@@ -1352,6 +1390,17 @@ def append(self, df: pa.Table, snapshot_properties: Dict[str, str] = EMPTY_DICT)
13521390 with self .transaction () as tx :
13531391 tx .append (df = df , snapshot_properties = snapshot_properties )
13541392
1393+ def merge_append (self , df : pa .Table , snapshot_properties : Dict [str , str ] = EMPTY_DICT ) -> None :
1394+ """
1395+ Shorthand API for appending a PyArrow table to the table.
1396+
1397+ Args:
1398+ df: The Arrow dataframe that will be appended to overwrite the table
1399+ snapshot_properties: Custom properties to be added to the snapshot summary
1400+ """
1401+ with self .transaction () as tx :
1402+ tx .merge_append (df = df , snapshot_properties = snapshot_properties )
1403+
13551404 def overwrite (
13561405 self , df : pa .Table , overwrite_filter : BooleanExpression = ALWAYS_TRUE , snapshot_properties : Dict [str , str ] = EMPTY_DICT
13571406 ) -> None :
0 commit comments