Skip to content

Commit d02397a

Browse files
authored
[GH-3021] Python Box3DType UDT bindings (#3022)
1 parent 6fed0da commit d02397a

6 files changed

Lines changed: 109 additions & 12 deletions

File tree

python/sedona/spark/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
from sedona.spark.sql.st_predicates import *
5858
from sedona.spark.sql.types import (
5959
Box2DType,
60+
Box3DType,
6061
GeometryType,
6162
GeographyType,
6263
RasterType,
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
19+
class Box3D:
20+
"""Planar 3D bounding box. Always a valid finite bbox; absence of a bbox
21+
is represented by ``None`` (SQL NULL) at the column level rather than by an
22+
in-band sentinel. Matches PostGIS ``box3d`` semantics. Geometries without a
23+
Z dimension contribute ``z = 0``; inverted bounds (``xmin > xmax`` etc.)
24+
are rejected by the Box3D predicates since Z has no wraparound
25+
convention."""
26+
27+
__slots__ = ("xmin", "ymin", "zmin", "xmax", "ymax", "zmax")
28+
29+
def __init__(
30+
self,
31+
xmin: float,
32+
ymin: float,
33+
zmin: float,
34+
xmax: float,
35+
ymax: float,
36+
zmax: float,
37+
):
38+
self.xmin = float(xmin)
39+
self.ymin = float(ymin)
40+
self.zmin = float(zmin)
41+
self.xmax = float(xmax)
42+
self.ymax = float(ymax)
43+
self.zmax = float(zmax)
44+
45+
def __eq__(self, other: object) -> bool:
46+
if not isinstance(other, Box3D):
47+
return NotImplemented
48+
return (
49+
self.xmin == other.xmin
50+
and self.ymin == other.ymin
51+
and self.zmin == other.zmin
52+
and self.xmax == other.xmax
53+
and self.ymax == other.ymax
54+
and self.zmax == other.zmax
55+
)
56+
57+
def __hash__(self) -> int:
58+
return hash((self.xmin, self.ymin, self.zmin, self.xmax, self.ymax, self.zmax))
59+
60+
def __repr__(self) -> str:
61+
return (
62+
f"Box3D({self.xmin}, {self.ymin}, {self.zmin}, "
63+
f"{self.xmax}, {self.ymax}, {self.zmax})"
64+
)

python/sedona/spark/sql/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from sedona.spark.sql.st_predicates import *
3333
from sedona.spark.sql.types import (
3434
Box2DType,
35+
Box3DType,
3536
GeometryType,
3637
GeographyType,
3738
RasterType,

python/sedona/spark/sql/types.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
from sedona.spark.utils import geometry_serde
4242
from sedona.spark.core.geom.geography import Geography
4343
from sedona.spark.core.geom.box2d import Box2D
44+
from sedona.spark.core.geom.box3d import Box3D
4445

4546

4647
class GeometryType(UserDefinedType):
@@ -124,6 +125,39 @@ def scalaUDT(cls):
124125
return "org.apache.spark.sql.sedona_sql.UDT.Box2DUDT"
125126

126127

128+
class Box3DType(UserDefinedType):
129+
130+
@classmethod
131+
def sqlType(cls):
132+
return StructType(
133+
[
134+
StructField("xmin", DoubleType(), nullable=False),
135+
StructField("ymin", DoubleType(), nullable=False),
136+
StructField("zmin", DoubleType(), nullable=False),
137+
StructField("xmax", DoubleType(), nullable=False),
138+
StructField("ymax", DoubleType(), nullable=False),
139+
StructField("zmax", DoubleType(), nullable=False),
140+
]
141+
)
142+
143+
def serialize(self, obj):
144+
return (obj.xmin, obj.ymin, obj.zmin, obj.xmax, obj.ymax, obj.zmax)
145+
146+
def deserialize(self, datum):
147+
return Box3D(datum[0], datum[1], datum[2], datum[3], datum[4], datum[5])
148+
149+
@classmethod
150+
def module(cls):
151+
return "sedona.spark.sql.types"
152+
153+
def needConversion(self):
154+
return True
155+
156+
@classmethod
157+
def scalaUDT(cls):
158+
return "org.apache.spark.sql.sedona_sql.UDT.Box3DUDT"
159+
160+
127161
class RasterType(UserDefinedType):
128162

129163
@classmethod

python/tests/sql/test_dataframe_api.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from tests.test_base import TestBase
2828

2929
from sedona.spark.core.geom.box2d import Box2D
30+
from sedona.spark.core.geom.box3d import Box3D
3031
from sedona.spark.core.geom.geography import Geography
3132
from sedona.spark.sql import st_aggregates as sta
3233
from sedona.spark.sql import st_constructors as stc
@@ -181,9 +182,9 @@
181182
stc.ST_3DMakeBox,
182183
("a", "b"),
183184
"two_points",
184-
# Box3DType has no Python UDT yet; cast to STRING uses Box3D.toString for comparison.
185-
"CAST(geom AS STRING)",
186-
"BOX3D(0.0 0.0 0.0, 3.0 0.0 4.0)",
185+
"",
186+
# two_points has a=(0,0,0), b=(3,0,4).
187+
Box3D(0.0, 0.0, 0.0, 3.0, 0.0, 4.0),
187188
),
188189
(
189190
stc.ST_GeomFromBox2D,
@@ -559,10 +560,9 @@
559560
stf.ST_Box3D,
560561
("line",),
561562
"linestring_geom",
562-
# Box3DType has no Python UDT yet; cast to STRING uses Box3D.toString for comparison.
563-
"CAST(geom AS STRING)",
563+
"",
564564
# linestring_geom is 2D so Z folds to 0 per PostGIS semantics.
565-
"BOX3D(0.0 0.0 0.0, 5.0 0.0 0.0)",
565+
Box3D(0.0, 0.0, 0.0, 5.0, 0.0, 0.0),
566566
),
567567
(
568568
stf.ST_Envelope,
@@ -1359,10 +1359,9 @@
13591359
sta.ST_3DExtent,
13601360
("geom",),
13611361
"exploded_points",
1362-
# Box3DType has no Python UDT yet; cast to STRING uses Box3D.toString for comparison.
1363-
"CAST(geom AS STRING)",
1362+
"",
13641363
# 2D inputs fold Z=0 per PostGIS semantics.
1365-
"BOX3D(0.0 0.0 0.0, 1.0 1.0 0.0)",
1364+
Box3D(0.0, 0.0, 0.0, 1.0, 1.0, 0.0),
13661365
),
13671366
# Test aliases for *_Aggr functions with *_Agg suffix
13681367
(

spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/UDT/Box3DUDT.scala

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,7 @@ class Box3DUDT extends UserDefinedType[Box3D] {
4040
StructField("ymax", DoubleType, nullable = false),
4141
StructField("zmax", DoubleType, nullable = false)))
4242

43-
// No `pyUDT` override yet — the Python `Box3DType` class is intentionally out of scope for
44-
// Phase 1 (see #2973). It will be added together with the Python bindings follow-up, the
45-
// same way Box2D paired `Box2DUDT.pyUDT` with `python/sedona/spark/sql/types.py::Box2DType`.
43+
override def pyUDT: String = "sedona.spark.sql.types.Box3DType"
4644

4745
override def userClass: Class[Box3D] = classOf[Box3D]
4846

0 commit comments

Comments
 (0)