|
26 | 26 | Union, |
27 | 27 | ) |
28 | 28 |
|
29 | | -from pyiceberg.schema import Schema, SchemaVisitorPerPrimitiveType, visit |
| 29 | +from pyiceberg.schema import Schema, SchemaVisitorPerPrimitiveType, visit, ICEBERG_FIELD_NAME_PROP, FIELD_ID_PROP, make_compatible_name, _valid_avro_name |
30 | 30 | from pyiceberg.types import ( |
31 | 31 | BinaryType, |
32 | 32 | BooleanType, |
@@ -225,13 +225,13 @@ def _convert_field(self, field: Dict[str, Any]) -> NestedField: |
225 | 225 | Returns: |
226 | 226 | The Iceberg equivalent field. |
227 | 227 | """ |
228 | | - if "field-id" not in field: |
229 | | - raise ValueError(f"Cannot convert field, missing field-id: {field}") |
| 228 | + if FIELD_ID_PROP not in field: |
| 229 | + raise ValueError(f"Cannot convert field, missing {FIELD_ID_PROP}: {field}") |
230 | 230 |
|
231 | 231 | plain_type, required = self._resolve_union(field["type"]) |
232 | 232 |
|
233 | 233 | return NestedField( |
234 | | - field_id=field["field-id"], |
| 234 | + field_id=field[FIELD_ID_PROP], |
235 | 235 | name=field["name"], |
236 | 236 | field_type=self._convert_schema(plain_type), |
237 | 237 | required=required, |
@@ -524,12 +524,19 @@ def field(self, field: NestedField, field_result: AvroType) -> AvroType: |
524 | 524 | if isinstance(field_result, dict) and field_result.get("type") == "record": |
525 | 525 | field_result["name"] = f"r{field.field_id}" |
526 | 526 |
|
| 527 | + orig_field_name = field.name |
| 528 | + is_valid_field_name = _valid_avro_name(orig_field_name) |
| 529 | + field_name = orig_field_name if is_valid_field_name else make_compatible_name(orig_field_name) |
| 530 | + |
527 | 531 | result = { |
528 | | - "name": field.name, |
529 | | - "field-id": field.field_id, |
| 532 | + "name": field_name, |
| 533 | + FIELD_ID_PROP: field.field_id, |
530 | 534 | "type": field_result if field.required else ["null", field_result], |
531 | 535 | } |
532 | 536 |
|
| 537 | + if not is_valid_field_name: |
| 538 | + result[ICEBERG_FIELD_NAME_PROP] = orig_field_name |
| 539 | + |
533 | 540 | if field.write_default is not None: |
534 | 541 | result["default"] = field.write_default |
535 | 542 | elif field.optional: |
@@ -564,8 +571,8 @@ def map(self, map_type: MapType, key_result: AvroType, value_result: AvroType) - |
564 | 571 | "type": "record", |
565 | 572 | "name": f"k{self.last_map_key_field_id}_v{self.last_map_value_field_id}", |
566 | 573 | "fields": [ |
567 | | - {"name": "key", "type": key_result, "field-id": self.last_map_key_field_id}, |
568 | | - {"name": "value", "type": value_result, "field-id": self.last_map_value_field_id}, |
| 574 | + {"name": "key", "type": key_result, FIELD_ID_PROP: self.last_map_key_field_id}, |
| 575 | + {"name": "value", "type": value_result, FIELD_ID_PROP: self.last_map_value_field_id}, |
569 | 576 | ], |
570 | 577 | }, |
571 | 578 | "logicalType": "map", |
|
0 commit comments