Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions pyiceberg/io/pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,9 @@ def visit_uuid(self, _: UUIDType) -> pa.DataType:
return pa.uuid()

def visit_unknown(self, _: UnknownType) -> pa.DataType:
"""
UnknownType can be promoted to any primitive type in V3+ tables per the Iceberg spec
"""
return pa.null()

def visit_binary(self, _: BinaryType) -> pa.DataType:
Expand Down Expand Up @@ -1358,6 +1361,8 @@ def primitive(self, primitive: pa.DataType) -> PrimitiveType:
primitive = cast(pa.FixedSizeBinaryType, primitive)
return FixedType(primitive.byte_width)
elif pa.types.is_null(primitive):
# PyArrow null type (pa.null()) is converted to Iceberg UnknownType
# UnknownType can be promoted to any primitive type in V3+ tables per the Iceberg spec
return UnknownType()
elif isinstance(primitive, pa.UuidType):
return UUIDType()
Expand Down
21 changes: 19 additions & 2 deletions pyiceberg/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -1692,6 +1692,15 @@ def _(file_type: FixedType, read_type: IcebergType) -> IcebergType:
raise ResolveError(f"Cannot promote {file_type} to {read_type}")


@promote.register(UnknownType)
def _(file_type: UnknownType, read_type: IcebergType) -> IcebergType:
# Per V3 Spec, "Unknown" can be promoted to any Primitive type
if isinstance(read_type, PrimitiveType):
return read_type
else:
raise ResolveError(f"Cannot promote {file_type} to {read_type}")


def _check_schema_compatible(requested_schema: Schema, provided_schema: Schema) -> None:
"""
Check if the `provided_schema` is compatible with `requested_schema`.
Expand Down Expand Up @@ -1760,8 +1769,16 @@ def _is_field_compatible(self, lhs: NestedField) -> bool:
promote(rhs.field_type, lhs.field_type)
self.rich_table.add_row("✅", str(lhs), str(rhs))
return True
except ResolveError:
self.rich_table.add_row("❌", str(lhs), str(rhs))
except ResolveError as e:
# UnknownType can only be promoted to Primitive types
if isinstance(rhs.field_type, UnknownType):
if isinstance(lhs.field_type, (ListType, MapType, StructType)):
Comment thread
kris-gaudel marked this conversation as resolved.
Outdated
error_msg = f"PyArrow null type (UnknownType) cannot be promoted to non-primitive type {lhs.field_type}. UnknownType can only be promoted to primitive types (string, int, boolean, etc.) in V3+ tables."
else:
error_msg = f"PyArrow null type (UnknownType) cannot be promoted to {lhs.field_type}. This may be due to table format version limitations (V1/V2 tables don't support UnknownType promotion)."
Comment thread
kris-gaudel marked this conversation as resolved.
Outdated
self.rich_table.add_row("❌", str(lhs), f"{str(rhs)} - {error_msg}")
else:
self.rich_table.add_row("❌", str(lhs), str(rhs))
return False

def schema(self, schema: Schema, struct_result: Callable[[], bool]) -> bool:
Expand Down
Loading