Skip to content

Commit b5c1635

Browse files
authored
[GH-2973] Box3D foundation: value class + UDT + Catalyst plumbing (#2978)
1 parent 2396878 commit b5c1635

6 files changed

Lines changed: 369 additions & 4 deletions

File tree

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.sedona.common.geometryObjects;
20+
21+
import java.io.Serializable;
22+
import java.util.Objects;
23+
import org.locationtech.jts.geom.Coordinate;
24+
import org.locationtech.jts.geom.Geometry;
25+
26+
/**
27+
* Planar 3D bounding box with min/max X, Y, and Z. Storage order matches PostGIS {@code box3d}:
28+
* {@code xmin, ymin, zmin, xmax, ymax, zmax}.
29+
*
30+
* <p>Absence is represented by SQL NULL at the column level rather than an in-band sentinel.
31+
* Geometries that lack a Z dimension are treated as having {@code z = 0} (matching PostGIS), so the
32+
* bbox of an XY geometry has {@code zmin == zmax == 0} rather than NaN. Predicates require ordered
33+
* bounds ({@code xmin <= xmax}, {@code ymin <= ymax}, {@code zmin <= zmax}); inverted Z has no
34+
* defined planar meaning and there is no wraparound convention for the Z axis.
35+
*/
36+
public final class Box3D implements Serializable {
37+
38+
private final double xmin;
39+
private final double ymin;
40+
private final double zmin;
41+
private final double xmax;
42+
private final double ymax;
43+
private final double zmax;
44+
45+
public Box3D(double xmin, double ymin, double zmin, double xmax, double ymax, double zmax) {
46+
this.xmin = xmin;
47+
this.ymin = ymin;
48+
this.zmin = zmin;
49+
this.xmax = xmax;
50+
this.ymax = ymax;
51+
this.zmax = zmax;
52+
}
53+
54+
/**
55+
* Returns the 3D bbox of {@code geometry}, or {@code null} for null/empty geometry. Z values that
56+
* are NaN (i.e. the coordinate has no Z dimension) are treated as 0, matching PostGIS's
57+
* convention where flat XY geometries get a degenerate Z extent at 0.
58+
*/
59+
public static Box3D fromGeometry(Geometry geometry) {
60+
if (geometry == null || geometry.isEmpty()) {
61+
return null;
62+
}
63+
double xMin = Double.POSITIVE_INFINITY;
64+
double yMin = Double.POSITIVE_INFINITY;
65+
double zMin = Double.POSITIVE_INFINITY;
66+
double xMax = Double.NEGATIVE_INFINITY;
67+
double yMax = Double.NEGATIVE_INFINITY;
68+
double zMax = Double.NEGATIVE_INFINITY;
69+
boolean sawZ = false;
70+
for (Coordinate c : geometry.getCoordinates()) {
71+
xMin = Math.min(xMin, c.x);
72+
xMax = Math.max(xMax, c.x);
73+
yMin = Math.min(yMin, c.y);
74+
yMax = Math.max(yMax, c.y);
75+
double z = c.getZ();
76+
if (Double.isNaN(z)) {
77+
// PostGIS-compatible: missing Z is folded into the 0 plane on each coord.
78+
zMin = Math.min(zMin, 0.0);
79+
zMax = Math.max(zMax, 0.0);
80+
} else {
81+
sawZ = true;
82+
zMin = Math.min(zMin, z);
83+
zMax = Math.max(zMax, z);
84+
}
85+
}
86+
// If the geometry has no Z at any coordinate, collapse to z=0.
87+
if (!sawZ) {
88+
zMin = 0.0;
89+
zMax = 0.0;
90+
}
91+
return new Box3D(xMin, yMin, zMin, xMax, yMax, zMax);
92+
}
93+
94+
public double getXMin() {
95+
return xmin;
96+
}
97+
98+
public double getYMin() {
99+
return ymin;
100+
}
101+
102+
public double getZMin() {
103+
return zmin;
104+
}
105+
106+
public double getXMax() {
107+
return xmax;
108+
}
109+
110+
public double getYMax() {
111+
return ymax;
112+
}
113+
114+
public double getZMax() {
115+
return zmax;
116+
}
117+
118+
/**
119+
* Returns the union of {@code this} and {@code other}. {@code other == null} is treated as a
120+
* no-op, returning {@code this}.
121+
*/
122+
public Box3D expandToInclude(Box3D other) {
123+
if (other == null) {
124+
return this;
125+
}
126+
return new Box3D(
127+
Math.min(xmin, other.xmin),
128+
Math.min(ymin, other.ymin),
129+
Math.min(zmin, other.zmin),
130+
Math.max(xmax, other.xmax),
131+
Math.max(ymax, other.ymax),
132+
Math.max(zmax, other.zmax));
133+
}
134+
135+
@Override
136+
public boolean equals(Object o) {
137+
if (this == o) return true;
138+
if (!(o instanceof Box3D)) return false;
139+
Box3D other = (Box3D) o;
140+
return Double.compare(xmin, other.xmin) == 0
141+
&& Double.compare(ymin, other.ymin) == 0
142+
&& Double.compare(zmin, other.zmin) == 0
143+
&& Double.compare(xmax, other.xmax) == 0
144+
&& Double.compare(ymax, other.ymax) == 0
145+
&& Double.compare(zmax, other.zmax) == 0;
146+
}
147+
148+
@Override
149+
public int hashCode() {
150+
return Objects.hash(xmin, ymin, zmin, xmax, ymax, zmax);
151+
}
152+
153+
@Override
154+
public String toString() {
155+
return "BOX3D(" + xmin + " " + ymin + " " + zmin + ", " + xmax + " " + ymax + " " + zmax + ")";
156+
}
157+
}
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.spark.sql.sedona_sql.UDT
20+
21+
import org.apache.sedona.common.geometryObjects.Box3D
22+
import org.apache.spark.sql.catalyst.InternalRow
23+
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
24+
import org.apache.spark.sql.types._
25+
import org.json4s.JsonAST.JValue
26+
import org.json4s.JsonDSL._
27+
28+
/**
29+
* UDT for [[Box3D]]. Stored as a Spark struct of six non-nullable doubles in PostGIS `box3d`
30+
* order: `xmin`, `ymin`, `zmin`, `xmax`, `ymax`, `zmax`.
31+
*/
32+
class Box3DUDT extends UserDefinedType[Box3D] {
33+
34+
override def sqlType: DataType = StructType(
35+
Seq(
36+
StructField("xmin", DoubleType, nullable = false),
37+
StructField("ymin", DoubleType, nullable = false),
38+
StructField("zmin", DoubleType, nullable = false),
39+
StructField("xmax", DoubleType, nullable = false),
40+
StructField("ymax", DoubleType, nullable = false),
41+
StructField("zmax", DoubleType, nullable = false)))
42+
43+
// No `pyUDT` override yet — the Python `Box3DType` class is intentionally out of scope for
44+
// Phase 1 (see #2973). It will be added together with the Python bindings follow-up, the
45+
// same way Box2D paired `Box2DUDT.pyUDT` with `python/sedona/spark/sql/types.py::Box2DType`.
46+
47+
override def userClass: Class[Box3D] = classOf[Box3D]
48+
49+
override def serialize(obj: Box3D): InternalRow = {
50+
val row = new GenericInternalRow(6)
51+
row.setDouble(0, obj.getXMin)
52+
row.setDouble(1, obj.getYMin)
53+
row.setDouble(2, obj.getZMin)
54+
row.setDouble(3, obj.getXMax)
55+
row.setDouble(4, obj.getYMax)
56+
row.setDouble(5, obj.getZMax)
57+
row
58+
}
59+
60+
override def deserialize(datum: Any): Box3D = datum match {
61+
case row: InternalRow =>
62+
new Box3D(
63+
row.getDouble(0),
64+
row.getDouble(1),
65+
row.getDouble(2),
66+
row.getDouble(3),
67+
row.getDouble(4),
68+
row.getDouble(5))
69+
}
70+
71+
override private[sql] def jsonValue: JValue = {
72+
super.jsonValue mapField {
73+
case ("class", _) => "class" -> this.getClass.getName.stripSuffix("$")
74+
case other: Any => other
75+
}
76+
}
77+
78+
override def equals(other: Any): Boolean = other match {
79+
case _: UserDefinedType[_] => other.isInstanceOf[Box3DUDT]
80+
case _ => false
81+
}
82+
83+
override def hashCode(): Int = userClass.hashCode()
84+
85+
override def toString: String = "Box3DUDT"
86+
}
87+
88+
case object Box3DUDT
89+
extends org.apache.spark.sql.sedona_sql.UDT.Box3DUDT
90+
with scala.Serializable {
91+
def apply(): Box3DUDT = new Box3DUDT()
92+
}

spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/UDT/UdtRegistratorWrapper.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
package org.apache.spark.sql.sedona_sql.UDT
2020

2121
import org.apache.sedona.common.S2Geography.Geography
22-
import org.apache.sedona.common.geometryObjects.Box2D
22+
import org.apache.sedona.common.geometryObjects.{Box2D, Box3D}
2323
import org.apache.spark.sql.types.UDTRegistration
2424
import org.locationtech.jts.geom.Geometry
2525
import org.locationtech.jts.index.SpatialIndex
@@ -30,6 +30,7 @@ object UdtRegistratorWrapper {
3030
registerIfNotExists(classOf[Geometry].getName, classOf[GeometryUDT].getName)
3131
registerIfNotExists(classOf[Geography].getName, classOf[GeographyUDT].getName)
3232
registerIfNotExists(classOf[Box2D].getName, classOf[Box2DUDT].getName)
33+
registerIfNotExists(classOf[Box3D].getName, classOf[Box3DUDT].getName)
3334
registerIfNotExists(classOf[SpatialIndex].getName, classOf[IndexUDT].getName)
3435
}
3536

spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/InferredExpression.scala

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,12 @@ package org.apache.spark.sql.sedona_sql.expressions
2020

2121
import org.apache.commons.lang3.StringUtils
2222
import org.apache.sedona.common.S2Geography.Geography
23-
import org.apache.sedona.common.geometryObjects.Box2D
23+
import org.apache.sedona.common.geometryObjects.{Box2D, Box3D}
2424
import org.apache.spark.sql.catalyst.InternalRow
2525
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
2626
import org.apache.spark.sql.catalyst.expressions.{Expression, ImplicitCastInputTypes}
2727
import org.apache.spark.sql.catalyst.util.ArrayData
28-
import org.apache.spark.sql.sedona_sql.UDT.{Box2DUDT, GeographyUDT, GeometryUDT}
28+
import org.apache.spark.sql.sedona_sql.UDT.{Box2DUDT, Box3DUDT, GeographyUDT, GeometryUDT}
2929
import org.apache.spark.sql.sedona_sql.expressions.implicits._
3030
import org.apache.spark.sql.types._
3131
import org.apache.spark.unsafe.types.UTF8String
@@ -167,6 +167,8 @@ object InferrableType {
167167
new InferrableType[Array[Geography]] {}
168168
implicit val box2DInstance: InferrableType[Box2D] =
169169
new InferrableType[Box2D] {}
170+
implicit val box3DInstance: InferrableType[Box3D] =
171+
new InferrableType[Box3D] {}
170172
implicit val javaDoubleInstance: InferrableType[java.lang.Double] =
171173
new InferrableType[java.lang.Double] {}
172174
implicit val javaIntegerInstance: InferrableType[java.lang.Integer] =
@@ -219,6 +221,8 @@ object InferredTypes {
219221
expr.toGeographyArray(input)
220222
} else if (t =:= typeOf[Box2D]) { expr => input =>
221223
expr.toBox2D(input)
224+
} else if (t =:= typeOf[Box3D]) { expr => input =>
225+
expr.toBox3D(input)
222226
} else if (InferredRasterExpression.isRasterType(t)) {
223227
InferredRasterExpression.rasterExtractor
224228
} else if (t =:= typeOf[Array[Double]]) { expr => input =>
@@ -279,6 +283,14 @@ object InferredTypes {
279283
} else {
280284
null
281285
}
286+
} else if (t =:= typeOf[Box3D]) {
287+
val udt = Box3DUDT
288+
output =>
289+
if (output != null) {
290+
udt.serialize(output.asInstanceOf[Box3D])
291+
} else {
292+
null
293+
}
282294
} else if (InferredRasterExpression.isRasterType(t)) {
283295
InferredRasterExpression.rasterSerializer
284296
} else if (t =:= typeOf[String]) { output =>
@@ -347,6 +359,8 @@ object InferredTypes {
347359
DataTypes.createArrayType(GeographyUDT())
348360
} else if (t =:= typeOf[Box2D]) {
349361
Box2DUDT()
362+
} else if (t =:= typeOf[Box3D]) {
363+
Box3DUDT()
350364
} else if (InferredRasterExpression.isRasterType(t)) {
351365
InferredRasterExpression.rasterUDT
352366
} else if (InferredRasterExpression.isRasterArrayType(t)) {

spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/implicits.scala

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
package org.apache.spark.sql.sedona_sql.expressions
2020

2121
import org.apache.sedona.common.S2Geography.{Geography, GeographyWKBSerializer}
22-
import org.apache.sedona.common.geometryObjects.Box2D
22+
import org.apache.sedona.common.geometryObjects.{Box2D, Box3D}
2323
import org.apache.sedona.sql.utils.GeometrySerializer
2424
import org.apache.spark.sql.catalyst.InternalRow
2525
import org.apache.spark.sql.catalyst.expressions.Expression
@@ -87,6 +87,25 @@ object implicits {
8787
}
8888
}
8989

90+
def toBox3D(input: InternalRow): Box3D = {
91+
inputExpression match {
92+
case serdeAware: SerdeAware =>
93+
serdeAware.evalWithoutSerialization(input).asInstanceOf[Box3D]
94+
case _ =>
95+
inputExpression.eval(input) match {
96+
case row: InternalRow =>
97+
new Box3D(
98+
row.getDouble(0),
99+
row.getDouble(1),
100+
row.getDouble(2),
101+
row.getDouble(3),
102+
row.getDouble(4),
103+
row.getDouble(5))
104+
case _ => null
105+
}
106+
}
107+
}
108+
90109
def toGeographyArray(input: InternalRow): Array[Geography] = {
91110
inputExpression match {
92111
case aware: SerdeAware =>

0 commit comments

Comments
 (0)