Skip to content

Commit 2e3b3a8

Browse files
authored
[GH-3013] Box3D aggregate: ST_3DExtent (#3015)
1 parent bd909f7 commit 2e3b3a8

7 files changed

Lines changed: 184 additions & 1 deletion

File tree

python/sedona/spark/sql/st_aggregates.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,22 @@ def ST_Extent(geometry: ColumnOrName) -> Column:
5656
return _call_aggregate_function("ST_Extent", geometry)
5757

5858

59+
@validate_argument_types
60+
def ST_3DExtent(geometry: ColumnOrName) -> Column:
61+
"""Aggregate Function: Get the 3D bounding box (Box3D) of a geometry column.
62+
63+
Returns NULL when the input contains no rows or all rows are null/empty
64+
geometries. Geometries without a Z dimension contribute ``z = 0`` per
65+
coordinate, matching PostGIS. Mirrors PostGIS ST_3DExtent.
66+
67+
:param geometry: Geometry column to aggregate.
68+
:type geometry: ColumnOrName
69+
:return: Box3D representing the union of 3D bounding boxes of the geometry column.
70+
:rtype: Column
71+
"""
72+
return _call_aggregate_function("ST_3DExtent", geometry)
73+
74+
5975
@validate_argument_types
6076
def ST_Intersection_Aggr(geometry: ColumnOrName) -> Column:
6177
"""Aggregate Function: Get the aggregate intersection of a geometry column.

python/tests/sql/test_dataframe_api.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1355,6 +1355,15 @@
13551355
"",
13561356
Box2D(0.0, 0.0, 1.0, 1.0),
13571357
),
1358+
(
1359+
sta.ST_3DExtent,
1360+
("geom",),
1361+
"exploded_points",
1362+
# Box3DType has no Python UDT yet; cast to STRING uses Box3D.toString for comparison.
1363+
"CAST(geom AS STRING)",
1364+
# 2D inputs fold Z=0 per PostGIS semantics.
1365+
"BOX3D(0.0 0.0 0.0, 1.0 1.0 0.0)",
1366+
),
13581367
# Test aliases for *_Aggr functions with *_Agg suffix
13591368
(
13601369
sta.ST_Envelope_Agg,

spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,7 @@ object Catalog extends AbstractCatalog with Logging {
552552
Seq(
553553
new ST_Envelope_Aggr,
554554
new ST_Extent,
555+
new ST_3DExtent,
555556
new ST_Intersection_Aggr,
556557
new ST_Union_Aggr(),
557558
new ST_Collect_Agg())

spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/AggregateFunctions.scala

Lines changed: 80 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
package org.apache.spark.sql.sedona_sql.expressions
2020

2121
import org.apache.sedona.common.Functions
22-
import org.apache.sedona.common.geometryObjects.Box2D
22+
import org.apache.sedona.common.geometryObjects.{Box2D, Box3D}
2323
import org.apache.spark.sql.{Encoder, Encoders}
2424
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
2525
import org.apache.spark.sql.expressions.Aggregator
@@ -210,6 +210,85 @@ private[apache] class ST_Extent extends Aggregator[Geometry, Option[EnvelopeBuff
210210
def zero: Option[EnvelopeBuffer] = None
211211
}
212212

213+
/**
214+
* Aggregator-buffer for the 3D extent. Geometries without a Z dimension fold into the `z = 0`
215+
* plane on a per-coordinate basis, matching PostGIS's flat-XY-treated-as-XY[Z=0] convention.
216+
*/
217+
case class Envelope3DBuffer(
218+
minX: Double,
219+
maxX: Double,
220+
minY: Double,
221+
maxY: Double,
222+
minZ: Double,
223+
maxZ: Double) {
224+
def isNull: Boolean = minX > maxX
225+
226+
def merge(other: Envelope3DBuffer): Envelope3DBuffer = {
227+
if (this.isNull) other
228+
else if (other.isNull) this
229+
else
230+
Envelope3DBuffer(
231+
math.min(this.minX, other.minX),
232+
math.max(this.maxX, other.maxX),
233+
math.min(this.minY, other.minY),
234+
math.max(this.maxY, other.maxY),
235+
math.min(this.minZ, other.minZ),
236+
math.max(this.maxZ, other.maxZ))
237+
}
238+
}
239+
240+
/**
241+
* Return the 3D bounding box (Box3D) of all geometries in the given column. Returns NULL when the
242+
* input contains no rows or all rows are null/empty geometries. Mirrors PostGIS `ST_3DExtent`.
243+
* Geometries without a Z dimension are treated as having `z = 0`.
244+
*/
245+
private[apache] class ST_3DExtent extends Aggregator[Geometry, Option[Envelope3DBuffer], Box3D] {
246+
247+
val outputSerde: ExpressionEncoder[Box3D] = ExpressionEncoder[Box3D]()
248+
249+
def reduce(buffer: Option[Envelope3DBuffer], input: Geometry): Option[Envelope3DBuffer] = {
250+
if (input == null || input.isEmpty) return buffer
251+
val box = Box3D.fromGeometry(input)
252+
if (box == null) return buffer
253+
val incoming = Envelope3DBuffer(
254+
box.getXMin,
255+
box.getXMax,
256+
box.getYMin,
257+
box.getYMax,
258+
box.getZMin,
259+
box.getZMax)
260+
buffer match {
261+
case Some(b) => Some(b.merge(incoming))
262+
case None => Some(incoming)
263+
}
264+
}
265+
266+
def merge(
267+
buffer1: Option[Envelope3DBuffer],
268+
buffer2: Option[Envelope3DBuffer]): Option[Envelope3DBuffer] = {
269+
(buffer1, buffer2) match {
270+
case (Some(b1), Some(b2)) => Some(b1.merge(b2))
271+
case (Some(_), None) => buffer1
272+
case (None, Some(_)) => buffer2
273+
case (None, None) => None
274+
}
275+
}
276+
277+
def finish(reduction: Option[Envelope3DBuffer]): Box3D = {
278+
reduction match {
279+
case Some(b) => new Box3D(b.minX, b.minY, b.minZ, b.maxX, b.maxY, b.maxZ)
280+
case None => null
281+
}
282+
}
283+
284+
def bufferEncoder: Encoder[Option[Envelope3DBuffer]] =
285+
Encoders.product[Option[Envelope3DBuffer]]
286+
287+
def outputEncoder: ExpressionEncoder[Box3D] = outputSerde
288+
289+
def zero: Option[Envelope3DBuffer] = None
290+
}
291+
213292
/**
214293
* Return the polygon intersection of all Polygon in the given column
215294
*/

spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_aggregates.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,16 @@ object st_aggregates {
7373
aggrFunc(col(geometry))
7474
}
7575

76+
def ST_3DExtent(geometry: Column): Column = {
77+
val aggrFunc = udaf(new ST_3DExtent)
78+
aggrFunc(geometry)
79+
}
80+
81+
def ST_3DExtent(geometry: String): Column = {
82+
val aggrFunc = udaf(new ST_3DExtent)
83+
aggrFunc(col(geometry))
84+
}
85+
7686
// Aliases for *_Aggr functions with *_Agg suffix
7787
def ST_Envelope_Agg(geometry: Column): Column = ST_Envelope_Aggr(geometry)
7888

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.sedona.sql
20+
21+
class Box3DExtentSuite extends TestBaseScala {
22+
23+
describe("ST_3DExtent aggregate") {
24+
25+
it("aggregates 3D bbox over geometry rows, treating XY as Z=0") {
26+
val row = sparkSession
27+
.sql("""
28+
WITH t AS (
29+
SELECT ST_GeomFromText('POINT(1 1)') AS g UNION ALL
30+
SELECT ST_GeomFromWKT('POINT Z(5 7 -2)') UNION ALL
31+
SELECT ST_GeomFromWKT('LINESTRING Z(3 2 4, 6 4 9)')
32+
)
33+
SELECT ST_AsText(ST_3DExtent(g)) AS s FROM t
34+
""")
35+
.collect()(0)
36+
assert(row.getString(0) == "BOX3D(1.0 1.0 -2.0, 6.0 7.0 9.0)")
37+
}
38+
39+
it("returns NULL on empty input") {
40+
val v = sparkSession
41+
.sql("SELECT ST_3DExtent(g) FROM (SELECT ST_GeomFromText(NULL) AS g) WHERE false")
42+
.collect()
43+
assert(v.isEmpty || v(0).isNullAt(0))
44+
}
45+
46+
it("skips NULL and empty geometry rows") {
47+
val row = sparkSession
48+
.sql("""
49+
WITH t AS (
50+
SELECT ST_GeomFromWKT('POINT Z(5 7 -2)') AS g UNION ALL
51+
SELECT ST_GeomFromText(NULL) UNION ALL
52+
SELECT ST_GeomFromText('LINESTRING EMPTY') UNION ALL
53+
SELECT ST_GeomFromWKT('POINT Z(1 1 1)')
54+
)
55+
SELECT ST_AsText(ST_3DExtent(g)) AS s FROM t
56+
""")
57+
.collect()(0)
58+
assert(row.getString(0) == "BOX3D(1.0 1.0 -2.0, 5.0 7.0 1.0)")
59+
}
60+
}
61+
}

spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -685,6 +685,13 @@ class dataFrameAPITestScala extends TestBaseScala {
685685
assert(!row.getBoolean(1))
686686
}
687687

688+
it("Passed ST_3DExtent") {
689+
val pointsDf = sparkSession.sql("SELECT explode(array(" +
690+
"ST_PointZ(0.0, 0.0, -1.0), ST_PointZ(2.0, 4.0, 6.0), ST_PointZ(1.0, 1.0, 1.0))) AS geom")
691+
val actual = pointsDf.select(ST_3DExtent("geom")).first().get(0).toString
692+
assert(actual == "BOX3D(0.0 0.0 -1.0, 2.0 4.0 6.0)")
693+
}
694+
688695
it("Passed ST_Expand") {
689696
val baseDf = sparkSession.sql(
690697
"SELECT ST_GeomFromWKT('POLYGON ((50 50 1, 50 80 2, 80 80 3, 80 50 2, 50 50 1))') as geom")

0 commit comments

Comments
 (0)