Skip to content

Commit 3126608

Browse files
committed
project pyarrow table schema
1 parent eded507 commit 3126608

File tree

1 file changed

+8
-5
lines changed

1 file changed

+8
-5
lines changed

pyiceberg/table/__init__.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,7 @@ def append(self, df: pa.Table, snapshot_properties: Dict[str, str] = EMPTY_DICT)
483483
f"Not all partition types are supported for writes. Following partitions cannot be written using pyarrow: {unsupported_partitions}."
484484
)
485485

486-
_check_schema_compatible(self._table.schema(), other_schema=df.schema)
486+
# _check_schema_compatible(self._table.schema(), other_schema=df.schema)
487487

488488
with self.update_snapshot(snapshot_properties=snapshot_properties).fast_append() as update_snapshot:
489489
# skip writing data files if the dataframe is empty
@@ -520,7 +520,7 @@ def overwrite(
520520
if len(self._table.spec().fields) > 0:
521521
raise ValueError("Cannot write to partitioned tables")
522522

523-
_check_schema_compatible(self._table.schema(), other_schema=df.schema)
523+
# _check_schema_compatible(self._table.schema(), other_schema=df.schema)
524524

525525
with self.update_snapshot(snapshot_properties=snapshot_properties).overwrite() as update_snapshot:
526526
# skip writing data files if the dataframe is empty
@@ -2904,7 +2904,7 @@ def _dataframe_to_data_files(
29042904
Returns:
29052905
An iterable that supplies datafiles that represent the table.
29062906
"""
2907-
from pyiceberg.io.pyarrow import bin_pack_arrow_table, write_file
2907+
from pyiceberg.io.pyarrow import bin_pack_arrow_table, pyarrow_to_schema, write_file
29082908

29092909
counter = itertools.count(0)
29102910
write_uuid = write_uuid or uuid.uuid4()
@@ -2914,6 +2914,9 @@ def _dataframe_to_data_files(
29142914
default=TableProperties.WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT,
29152915
)
29162916

2917+
# projects schema to match the pyarrow table
2918+
write_schema = pyarrow_to_schema(df.schema, name_mapping=table_metadata.schema().name_mapping)
2919+
29172920
if len(table_metadata.spec().fields) > 0:
29182921
partitions = _determine_partitions(spec=table_metadata.spec(), schema=table_metadata.schema(), arrow_table=df)
29192922
yield from write_file(
@@ -2925,7 +2928,7 @@ def _dataframe_to_data_files(
29252928
task_id=next(counter),
29262929
record_batches=batches,
29272930
partition_key=partition.partition_key,
2928-
schema=table_metadata.schema(),
2931+
schema=write_schema,
29292932
)
29302933
for partition in partitions
29312934
for batches in bin_pack_arrow_table(partition.arrow_table_partition, target_file_size)
@@ -2936,7 +2939,7 @@ def _dataframe_to_data_files(
29362939
io=io,
29372940
table_metadata=table_metadata,
29382941
tasks=iter([
2939-
WriteTask(write_uuid=write_uuid, task_id=next(counter), record_batches=batches, schema=table_metadata.schema())
2942+
WriteTask(write_uuid=write_uuid, task_id=next(counter), record_batches=batches, schema=write_schema)
29402943
for batches in bin_pack_arrow_table(df, target_file_size)
29412944
]),
29422945
)

0 commit comments

Comments
 (0)