Skip to content

Commit a8cc06c

Browse files
committed
Use the same regex than in iceberg (java)
1 parent 7024d54 commit a8cc06c

File tree

2 files changed

+15
-6
lines changed

2 files changed

+15
-6
lines changed

pyiceberg/types.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,8 @@
6262
from pyiceberg.utils.singleton import Singleton
6363

6464
DECIMAL_REGEX = re.compile(r"decimal\((\d+),\s*(\d+)\)")
65-
GEOGRAPHY_REGEX = re.compile(r"""geography(\(([a-zA-Z0-9:]+)(,\s*([a-zA-Z]+))?\))?""")
66-
GEOMETRY_REGEX = re.compile(r"""geometry(\(([a-zA-Z0-9:]+)\))?""")
65+
GEOGRAPHY_REGEX = re.compile(r"""geography\s*(?:\(\s*([^,]*?)\s*(?:,\s*(\w*)\s*)?\))?""", re.IGNORECASE)
66+
GEOMETRY_REGEX = re.compile(r"""geometry\s*(?:\(\s*([^)]*?)\s*\))?""", re.IGNORECASE)
6767
FIXED = "fixed"
6868
FIXED_PARSER = ParseNumberFromBrackets(FIXED)
6969

@@ -92,10 +92,13 @@ def _parse_geography_type(geography: Any) -> Tuple[str, GeographyType.EdgeAlgori
9292
if isinstance(geography, str):
9393
matches = GEOGRAPHY_REGEX.search(geography)
9494
if matches:
95+
crs = None
9596
edge_algorithm = None
96-
if matches.group(4):
97-
edge_algorithm = GeographyType.EdgeAlgorithm(matches.group(4))
98-
return matches.group(2), edge_algorithm
97+
if matches.group(1):
98+
crs = matches.group(1)
99+
if matches.group(2):
100+
edge_algorithm = GeographyType.EdgeAlgorithm(matches.group(2).lower())
101+
return crs, edge_algorithm
99102
else:
100103
raise ValidationError(f"Could not parse {geography} into a GeographyType")
101104
elif isinstance(geography, dict):
@@ -107,7 +110,10 @@ def _parse_geometry_type(geometry: Any) -> str:
107110
if isinstance(geometry, str):
108111
matches = GEOMETRY_REGEX.search(geometry)
109112
if matches:
110-
return matches.group(2)
113+
crs = None
114+
if matches.group(1):
115+
crs = matches.group(1)
116+
return crs
111117
else:
112118
raise ValidationError(f"Could not parse {geometry} into a GeometryType")
113119
elif isinstance(geometry, dict):

tests/test_types.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -507,6 +507,8 @@ def test_serialization_geography() -> None:
507507

508508
def test_deserialization_geography() -> None:
509509
assert GeographyType.model_validate_json('"geography"') == GeographyType()
510+
assert GeographyType.model_validate_json('"geography(OGC:CRS84, thomas)"') == GeographyType(crs="OGC:CRS84", edge_algorithm=GeographyType.EdgeAlgorithm.THOMAS)
511+
assert GeographyType.model_validate_json('"geography(OGC:CRS84)"') == GeographyType(crs="OGC:CRS84", edge_algorithm=None)
510512

511513

512514
def test_str_geography() -> None:
@@ -523,6 +525,7 @@ def test_serialization_geometry() -> None:
523525

524526
def test_deserialization_geometry() -> None:
525527
assert GeometryType.model_validate_json('"geometry"') == GeometryType()
528+
assert GeometryType.model_validate_json('"geometry(OGC:CRS84)"') == GeometryType(crs="OGC:CRS84")
526529

527530

528531
def test_str_geometry() -> None:

0 commit comments

Comments
 (0)