Skip to content

Commit 06dd647

Browse files
committed
write with sanitized column names
1 parent 4148edb commit 06dd647

2 files changed

Lines changed: 4 additions & 1 deletion

File tree

pyiceberg/io/pyarrow.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1761,7 +1761,7 @@ def data_file_statistics_from_parquet_metadata(
17611761

17621762

17631763
def write_file(io: FileIO, table_metadata: TableMetadata, tasks: Iterator[WriteTask]) -> Iterator[DataFile]:
1764-
schema = table_metadata.schema()
1764+
schema = sanitize_column_names(table_metadata.schema())
17651765
arrow_file_schema = schema.as_arrow()
17661766
parquet_writer_kwargs = _get_parquet_writer_kwargs(table_metadata.properties)
17671767

pyiceberg/table/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@
9292
SchemaWithPartnerVisitor,
9393
assign_fresh_schema_ids,
9494
promote,
95+
sanitize_column_names,
9596
visit,
9697
visit_with_partner,
9798
)
@@ -2702,6 +2703,8 @@ def _dataframe_to_data_files(
27022703
property_name=TableProperties.WRITE_TARGET_FILE_SIZE_BYTES,
27032704
default=TableProperties.WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT,
27042705
)
2706+
sanitized_arrow_schema = sanitize_column_names(table_metadata.schema()).as_arrow()
2707+
df = df.rename_columns(sanitized_arrow_schema.names)
27052708

27062709
if len(table_metadata.spec().fields) > 0:
27072710
partitions = _determine_partitions(spec=table_metadata.spec(), schema=table_metadata.schema(), arrow_table=df)

0 commit comments

Comments
 (0)