Skip to content

Commit ac271a2

Browse files
committed
project pyarrow table schema
1 parent 65fd00a commit ac271a2

File tree

1 file changed

+8
-5
lines changed

1 file changed

+8
-5
lines changed

pyiceberg/table/__init__.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -482,7 +482,7 @@ def append(self, df: pa.Table, snapshot_properties: Dict[str, str] = EMPTY_DICT)
482482
f"Not all partition types are supported for writes. Following partitions cannot be written using pyarrow: {unsupported_partitions}."
483483
)
484484

485-
_check_schema_compatible(self._table.schema(), other_schema=df.schema)
485+
# _check_schema_compatible(self._table.schema(), other_schema=df.schema)
486486

487487
with self.update_snapshot(snapshot_properties=snapshot_properties).fast_append() as update_snapshot:
488488
# skip writing data files if the dataframe is empty
@@ -519,7 +519,7 @@ def overwrite(
519519
if len(self._table.spec().fields) > 0:
520520
raise ValueError("Cannot write to partitioned tables")
521521

522-
_check_schema_compatible(self._table.schema(), other_schema=df.schema)
522+
# _check_schema_compatible(self._table.schema(), other_schema=df.schema)
523523

524524
with self.update_snapshot(snapshot_properties=snapshot_properties).overwrite() as update_snapshot:
525525
# skip writing data files if the dataframe is empty
@@ -2885,7 +2885,7 @@ def _dataframe_to_data_files(
28852885
Returns:
28862886
An iterable that supplies datafiles that represent the table.
28872887
"""
2888-
from pyiceberg.io.pyarrow import bin_pack_arrow_table, write_file
2888+
from pyiceberg.io.pyarrow import bin_pack_arrow_table, pyarrow_to_schema, write_file
28892889

28902890
counter = itertools.count(0)
28912891
write_uuid = write_uuid or uuid.uuid4()
@@ -2895,6 +2895,9 @@ def _dataframe_to_data_files(
28952895
default=TableProperties.WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT,
28962896
)
28972897

2898+
# projects schema to match the pyarrow table
2899+
write_schema = pyarrow_to_schema(df.schema, name_mapping=table_metadata.schema().name_mapping)
2900+
28982901
if len(table_metadata.spec().fields) > 0:
28992902
partitions = _determine_partitions(spec=table_metadata.spec(), schema=table_metadata.schema(), arrow_table=df)
29002903
yield from write_file(
@@ -2906,7 +2909,7 @@ def _dataframe_to_data_files(
29062909
task_id=next(counter),
29072910
record_batches=batches,
29082911
partition_key=partition.partition_key,
2909-
schema=table_metadata.schema(),
2912+
schema=write_schema,
29102913
)
29112914
for partition in partitions
29122915
for batches in bin_pack_arrow_table(partition.arrow_table_partition, target_file_size)
@@ -2917,7 +2920,7 @@ def _dataframe_to_data_files(
29172920
io=io,
29182921
table_metadata=table_metadata,
29192922
tasks=iter([
2920-
WriteTask(write_uuid=write_uuid, task_id=next(counter), record_batches=batches, schema=table_metadata.schema())
2923+
WriteTask(write_uuid=write_uuid, task_id=next(counter), record_batches=batches, schema=write_schema)
29212924
for batches in bin_pack_arrow_table(df, target_file_size)
29222925
]),
29232926
)

0 commit comments

Comments
 (0)