Skip to content

Commit 3a7ed01

Browse files
committed
Requires pyarrow 21 for geometry type
1 parent 34265fb commit 3a7ed01

File tree

15 files changed

+377
-3
lines changed

15 files changed

+377
-3
lines changed

pyiceberg/avro/reader.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,16 @@ def skip(self, decoder: BinaryDecoder) -> None:
260260
decoder.skip_bytes()
261261

262262

263+
class GeographyReader(BinaryReader):
264+
"""Reads a geography from the stream.
265+
"""
266+
267+
268+
class GeometryReader(BinaryReader):
269+
"""Reads a geometry from the stream.
270+
"""
271+
272+
263273
@dataclass(frozen=True, init=False)
264274
class DecimalReader(Reader):
265275
"""Reads a value as a decimal.

pyiceberg/avro/resolver.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535
DoubleReader,
3636
FixedReader,
3737
FloatReader,
38+
GeographyReader,
39+
GeometryReader,
3840
IntegerReader,
3941
ListReader,
4042
MapReader,
@@ -60,6 +62,8 @@
6062
DoubleWriter,
6163
FixedWriter,
6264
FloatWriter,
65+
GeographyWriter,
66+
GeometryWriter,
6367
IntegerWriter,
6468
ListWriter,
6569
MapWriter,
@@ -96,6 +100,8 @@
96100
FloatType,
97101
IcebergType,
98102
IntegerType,
103+
GeographyType,
104+
GeometryType,
99105
ListType,
100106
LongType,
101107
MapType,
@@ -181,6 +187,12 @@ def visit_float(self, float_type: FloatType) -> Writer:
181187
def visit_double(self, double_type: DoubleType) -> Writer:
182188
return DoubleWriter()
183189

190+
def visit_geography(self, geography_type: GeographyType) -> Writer:
191+
return GeographyWriter()
192+
193+
def visit_geometry(self, geometry_type: GeometryType) -> Writer:
194+
return GeometryWriter()
195+
184196
def visit_date(self, date_type: DateType) -> Writer:
185197
return DateWriter()
186198

@@ -338,6 +350,12 @@ def visit_decimal(self, decimal_type: DecimalType, partner: Optional[IcebergType
338350
def visit_date(self, date_type: DateType, partner: Optional[IcebergType]) -> Writer:
339351
return DateWriter()
340352

353+
def visit_geography(self, geography_type: GeographyType, partner: Optional[IcebergType]) -> Writer:
354+
return GeographyWriter()
355+
356+
def visit_geometry(self, geometry_type: GeometryType, partner: Optional[IcebergType]) -> Writer:
357+
return GeometryWriter()
358+
341359
def visit_time(self, time_type: TimeType, partner: Optional[IcebergType]) -> Writer:
342360
return TimeWriter()
343361

@@ -504,6 +522,12 @@ def visit_fixed(self, fixed_type: FixedType, partner: Optional[IcebergType]) ->
504522
def visit_binary(self, binary_type: BinaryType, partner: Optional[IcebergType]) -> Reader:
505523
return BinaryReader()
506524

525+
def visit_geography(self, geography_type: GeographyType, partner: Optional[IcebergType]) -> Reader:
526+
return GeographyReader()
527+
528+
def visit_geometry(self, geometry_type: GeometryType, partner: Optional[IcebergType]) -> Reader:
529+
return GeometryReader()
530+
507531
def visit_unknown(self, unknown_type: UnknownType, partner: Optional[IcebergType]) -> Reader:
508532
return UnknownReader()
509533

pyiceberg/avro/writer.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,18 @@ def write(self, encoder: BinaryEncoder, val: Any) -> None:
161161
encoder.write_bytes(val)
162162

163163

164+
@dataclass(frozen=True)
165+
class GeographyWriter(Writer):
166+
def write(self, encoder: BinaryEncoder, val: Any) -> None:
167+
encoder.write_bytes(val)
168+
169+
170+
@dataclass(frozen=True)
171+
class GeometryWriter(Writer):
172+
def write(self, encoder: BinaryEncoder, val: Any) -> None:
173+
encoder.write_bytes(val)
174+
175+
164176
@dataclass(frozen=True)
165177
class DecimalWriter(Writer):
166178
precision: int = dataclassfield()

pyiceberg/conversions.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@
5151
DoubleType,
5252
FixedType,
5353
FloatType,
54+
GeometryType,
55+
GeographyType,
5456
IntegerType,
5557
LongType,
5658
PrimitiveType,
@@ -167,6 +169,8 @@ def _(_: UUIDType, value_str: str) -> uuid.UUID:
167169

168170
@partition_to_py.register(FixedType)
169171
@partition_to_py.register(BinaryType)
172+
@partition_to_py.register(GeographyType)
173+
@partition_to_py.register(GeometryType)
170174
@handle_none
171175
def _(_: PrimitiveType, value_str: str) -> bytes:
172176
return bytes(value_str, UTF8)
@@ -275,6 +279,8 @@ def _(_: UUIDType, value: Union[uuid.UUID, bytes]) -> bytes:
275279

276280
@to_bytes.register(BinaryType)
277281
@to_bytes.register(FixedType)
282+
@to_bytes.register(GeographyType)
283+
@to_bytes.register(GeometryType)
278284
def _(_: PrimitiveType, value: bytes) -> bytes:
279285
return value
280286

@@ -355,6 +361,8 @@ def _(_: StringType, b: bytes) -> str:
355361

356362
@from_bytes.register(BinaryType)
357363
@from_bytes.register(FixedType)
364+
@from_bytes.register(GeographyType)
365+
@from_bytes.register(GeometryType)
358366
@from_bytes.register(UUIDType)
359367
def _(_: PrimitiveType, b: bytes) -> bytes:
360368
return b
@@ -453,6 +461,8 @@ def _(t: FixedType, b: bytes) -> str:
453461

454462

455463
@to_json.register(BinaryType)
464+
@to_json.register(GeographyType)
465+
@to_json.register(GeometryType)
456466
def _(_: BinaryType, b: bytes) -> str:
457467
"""Python bytes serializes into hexadecimal encoded string."""
458468
return codecs.encode(b, "hex").decode(UTF8)
@@ -580,6 +590,16 @@ def _(_: BinaryType, val: Union[bytes, str]) -> bytes:
580590
return val
581591

582592

593+
@from_json.register(GeographyType)
594+
def _(_: GeographyType, val: bytes) -> bytes:
595+
return val
596+
597+
598+
@from_json.register(GeometryType)
599+
def _(_: GeometryType, val: bytes) -> bytes:
600+
return val
601+
602+
583603
@from_json.register(DecimalType)
584604
def _(_: DecimalType, val: str) -> Decimal:
585605
"""Convert JSON string into a Python Decimal."""

pyiceberg/io/pyarrow.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,8 @@
164164
DoubleType,
165165
FixedType,
166166
FloatType,
167+
GeographyType,
168+
GeometryType,
167169
IcebergType,
168170
IntegerType,
169171
ListType,
@@ -802,6 +804,12 @@ def visit_unknown(self, _: UnknownType) -> pa.DataType:
802804
def visit_binary(self, _: BinaryType) -> pa.DataType:
803805
return pa.large_binary()
804806

807+
def visit_geography(self, _: GeographyType) -> pa.DataType:
808+
return pa.large_binary()
809+
810+
def visit_geometry(self, _: GeometryType) -> pa.DataType:
811+
return pa.large_binary()
812+
805813

806814
def _convert_scalar(value: Any, iceberg_type: IcebergType) -> pa.scalar:
807815
if not isinstance(iceberg_type, PrimitiveType):
@@ -2064,6 +2072,12 @@ def visit_uuid(self, uuid_type: UUIDType) -> str:
20642072
def visit_binary(self, binary_type: BinaryType) -> str:
20652073
return "BYTE_ARRAY"
20662074

2075+
def visit_geography(self, geography_type: GeographyType) -> str:
2076+
return "BYTE_ARRAY"
2077+
2078+
def visit_geometry(self, geometry_type: GeometryType) -> str:
2079+
return "BYTE_ARRAY"
2080+
20672081
def visit_unknown(self, unknown_type: UnknownType) -> str:
20682082
return "UNKNOWN"
20692083

pyiceberg/schema.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@
4848
DoubleType,
4949
FixedType,
5050
FloatType,
51+
GeographyType,
52+
GeometryType,
5153
IcebergType,
5254
IntegerType,
5355
ListType,
@@ -556,6 +558,10 @@ def primitive(self, primitive: PrimitiveType, primitive_partner: Optional[P]) ->
556558
return self.visit_fixed(primitive, primitive_partner)
557559
elif isinstance(primitive, BinaryType):
558560
return self.visit_binary(primitive, primitive_partner)
561+
elif isinstance(primitive, GeographyType):
562+
return self.visit_geography(primitive, primitive_partner)
563+
elif isinstance(primitive, GeometryType):
564+
return self.visit_geometry(primitive, primitive_partner)
559565
elif isinstance(primitive, UnknownType):
560566
return self.visit_unknown(primitive, primitive_partner)
561567
else:
@@ -625,6 +631,14 @@ def visit_fixed(self, fixed_type: FixedType, partner: Optional[P]) -> T:
625631
def visit_binary(self, binary_type: BinaryType, partner: Optional[P]) -> T:
626632
"""Visit a BinaryType."""
627633

634+
@abstractmethod
635+
def visit_geography(self, date_type: GeographyType, partner: Optional[P]) -> T:
636+
"""Visit a GeographyType."""
637+
638+
@abstractmethod
639+
def visit_geometry(self, date_type: GeometryType, partner: Optional[P]) -> T:
640+
"""Visit a GeometryType."""
641+
628642
@abstractmethod
629643
def visit_unknown(self, unknown_type: UnknownType, partner: Optional[P]) -> T:
630644
"""Visit a UnknownType."""
@@ -750,6 +764,10 @@ def primitive(self, primitive: PrimitiveType) -> T:
750764
return self.visit_uuid(primitive)
751765
elif isinstance(primitive, BinaryType):
752766
return self.visit_binary(primitive)
767+
elif isinstance(primitive, GeographyType):
768+
return self.visit_geography(primitive)
769+
elif isinstance(primitive, GeometryType):
770+
return self.visit_geometry(primitive)
753771
elif isinstance(primitive, UnknownType):
754772
return self.visit_unknown(primitive)
755773
else:
@@ -819,6 +837,14 @@ def visit_uuid(self, uuid_type: UUIDType) -> T:
819837
def visit_binary(self, binary_type: BinaryType) -> T:
820838
"""Visit a BinaryType."""
821839

840+
@abstractmethod
841+
def visit_geography(self, geography_type: GeographyType) -> T:
842+
"""Visit a GeographyType."""
843+
844+
@abstractmethod
845+
def visit_geometry(self, geometry_type: GeometryType) -> T:
846+
"""Visit a GeometryType."""
847+
822848
@abstractmethod
823849
def visit_unknown(self, unknown_type: UnknownType) -> T:
824850
"""Visit a UnknownType."""
@@ -1665,9 +1691,13 @@ def _(file_type: StringType, read_type: IcebergType) -> IcebergType:
16651691

16661692

16671693
@promote.register(BinaryType)
1694+
@promote.register(GeographyType)
1695+
@promote.register(GeometryType)
16681696
def _(file_type: BinaryType, read_type: IcebergType) -> IcebergType:
16691697
if isinstance(read_type, StringType):
16701698
return read_type
1699+
elif isinstance(read_type, (GeographyType, GeometryType)):
1700+
return read_type
16711701
else:
16721702
raise ResolveError(f"Cannot promote an binary to {read_type}")
16731703

pyiceberg/transforms.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@
7272
DateType,
7373
DecimalType,
7474
FixedType,
75+
GeographyType,
76+
GeometryType,
7577
IcebergType,
7678
IntegerType,
7779
LongType,
@@ -368,7 +370,7 @@ def hash_func(v: Any) -> int:
368370
def hash_func(v: Any) -> int:
369371
return mmh3.hash(decimal_to_bytes(v))
370372

371-
elif isinstance(source, (StringType, FixedType, BinaryType)):
373+
elif isinstance(source, (StringType, FixedType, BinaryType, GeographyType, GeometryType)):
372374

373375
def hash_func(v: Any) -> int:
374376
return mmh3.hash(v)

0 commit comments

Comments
 (0)