@@ -483,7 +483,7 @@ def append(self, df: pa.Table, snapshot_properties: Dict[str, str] = EMPTY_DICT)
483483 f"Not all partition types are supported for writes. Following partitions cannot be written using pyarrow: { unsupported_partitions } ."
484484 )
485485
486- _check_schema_compatible (self ._table .schema (), other_schema = df .schema )
486+ # _check_schema_compatible(self._table.schema(), other_schema=df.schema)
487487
488488 with self .update_snapshot (snapshot_properties = snapshot_properties ).fast_append () as update_snapshot :
489489 # skip writing data files if the dataframe is empty
@@ -520,7 +520,7 @@ def overwrite(
520520 if len (self ._table .spec ().fields ) > 0 :
521521 raise ValueError ("Cannot write to partitioned tables" )
522522
523- _check_schema_compatible (self ._table .schema (), other_schema = df .schema )
523+ # _check_schema_compatible(self._table.schema(), other_schema=df.schema)
524524
525525 with self .update_snapshot (snapshot_properties = snapshot_properties ).overwrite () as update_snapshot :
526526 # skip writing data files if the dataframe is empty
@@ -2904,7 +2904,7 @@ def _dataframe_to_data_files(
29042904 Returns:
29052905 An iterable that supplies datafiles that represent the table.
29062906 """
2907- from pyiceberg .io .pyarrow import bin_pack_arrow_table , write_file
2907+ from pyiceberg .io .pyarrow import bin_pack_arrow_table , pyarrow_to_schema , write_file
29082908
29092909 counter = itertools .count (0 )
29102910 write_uuid = write_uuid or uuid .uuid4 ()
@@ -2914,6 +2914,9 @@ def _dataframe_to_data_files(
29142914 default = TableProperties .WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT ,
29152915 )
29162916
2917+ # projects schema to match the pyarrow table
2918+ write_schema = pyarrow_to_schema (df .schema , name_mapping = table_metadata .schema ().name_mapping )
2919+
29172920 if len (table_metadata .spec ().fields ) > 0 :
29182921 partitions = _determine_partitions (spec = table_metadata .spec (), schema = table_metadata .schema (), arrow_table = df )
29192922 yield from write_file (
@@ -2925,7 +2928,7 @@ def _dataframe_to_data_files(
29252928 task_id = next (counter ),
29262929 record_batches = batches ,
29272930 partition_key = partition .partition_key ,
2928- schema = table_metadata . schema () ,
2931+ schema = write_schema ,
29292932 )
29302933 for partition in partitions
29312934 for batches in bin_pack_arrow_table (partition .arrow_table_partition , target_file_size )
@@ -2936,7 +2939,7 @@ def _dataframe_to_data_files(
29362939 io = io ,
29372940 table_metadata = table_metadata ,
29382941 tasks = iter ([
2939- WriteTask (write_uuid = write_uuid , task_id = next (counter ), record_batches = batches , schema = table_metadata . schema () )
2942+ WriteTask (write_uuid = write_uuid , task_id = next (counter ), record_batches = batches , schema = write_schema )
29402943 for batches in bin_pack_arrow_table (df , target_file_size )
29412944 ]),
29422945 )
0 commit comments