Skip to content

Commit 6fed0da

Browse files
authored
[GH-2983] Box3D SQL parser keyword + Geometry→Box3D cast resolution (#3016)
1 parent 2e3b3a8 commit 6fed0da

11 files changed

Lines changed: 612 additions & 13 deletions

File tree

spark/common/src/main/scala/org/apache/sedona/sql/SedonaSqlExtensions.scala

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ import org.apache.sedona.spark.SedonaContext
2222
import org.apache.spark.SparkContext
2323
import org.apache.spark.sql.SparkSessionExtensions
2424
import org.apache.spark.sql.parser.ParserFactory
25-
import org.apache.spark.sql.sedona_sql.optimization.Box2DCastResolutionRule
25+
import org.apache.spark.sql.sedona_sql.optimization.{Box2DCastResolutionRule, Box3DCastResolutionRule}
2626
import org.slf4j.{Logger, LoggerFactory}
2727

2828
class SedonaSqlExtensions extends (SparkSessionExtensions => Unit) {
@@ -42,6 +42,11 @@ class SedonaSqlExtensions extends (SparkSessionExtensions => Unit) {
4242
// refusing arbitrary UDT-to-UDT casts.
4343
e.injectResolutionRule(_ => new Box2DCastResolutionRule)
4444

45+
// Resolve geometry→Box3D casts during analysis. Only the forward direction lands here;
46+
// the inverse cast (`CAST(box3d AS geometry)`) is deferred until Box3D has an
47+
// `ST_GeomFromBox3D` counterpart driven by a concrete consumer.
48+
e.injectResolutionRule(_ => new Box3DCastResolutionRule)
49+
4550
// Inject Sedona SQL parser
4651
if (enableParser) {
4752
// Try to inject the Sedona SQL parser but gracefully handle initialization failures.
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.spark.sql.sedona_sql.optimization
20+
21+
import org.apache.spark.sql.catalyst.expressions.Cast
22+
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
23+
import org.apache.spark.sql.catalyst.rules.Rule
24+
import org.apache.spark.sql.sedona_sql.UDT.{Box3DUDT, GeometryUDT}
25+
import org.apache.spark.sql.sedona_sql.expressions.ST_Box3D
26+
27+
/**
28+
* Analyzer rule that resolves Catalyst casts from Geometry to Box3D. Spark's `Cast.canCast`
29+
* returns `false` for arbitrary UDT-to-UDT casts, so without this rule the analyzer would reject
30+
* `CAST(geom AS box3d)`. We rewrite during analysis (before `CheckAnalysis`) so the downstream
31+
* optimizer and codegen path see the expression tree of an ordinary Sedona expression:
32+
*
33+
* - `CAST(geom AS box3d)` → `ST_Box3D(geom)` (planar 3D bounding box of the geometry;
34+
* geometries without a Z dimension fold into `zmin = zmax = 0` per PostGIS)
35+
*
36+
* The inverse direction (`CAST(box3d AS geometry)`) is intentionally deferred until Box3D has a
37+
* `ST_GeomFromBox3D` counterpart and a concrete consumer has driven the choice of output geometry
38+
* shape. Implicit type coercion is also out of scope here; it requires hooking into Catalyst's
39+
* type coercion rules and is tracked separately.
40+
*/
41+
class Box3DCastResolutionRule extends Rule[LogicalPlan] {
42+
43+
override def apply(plan: LogicalPlan): LogicalPlan = plan.transformAllExpressions {
44+
case c: Cast
45+
if c.child.resolved
46+
&& c.child.dataType.isInstanceOf[GeometryUDT]
47+
&& c.dataType.isInstanceOf[Box3DUDT] =>
48+
ST_Box3D(Seq(c.child))
49+
}
50+
}
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.sedona.sql
20+
21+
import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Cast, Expression, Literal}
22+
import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Project}
23+
import org.apache.spark.sql.sedona_sql.UDT.{Box3DUDT, GeometryUDT}
24+
import org.apache.spark.sql.sedona_sql.expressions.ST_Box3D
25+
import org.apache.spark.sql.sedona_sql.optimization.Box3DCastResolutionRule
26+
import org.apache.spark.sql.types.LongType
27+
import org.scalatest.funspec.AnyFunSpec
28+
29+
class Box3DCastResolutionRuleSuite extends AnyFunSpec {
30+
31+
private val rule = new Box3DCastResolutionRule
32+
33+
private def projectExprPlan(input: AttributeReference, expr: Expression): LogicalPlan = {
34+
val rel = LocalRelation(input)
35+
Project(Seq(Alias(expr, "out")()), rel)
36+
}
37+
38+
describe("Box3DCastResolutionRule") {
39+
it("rewrites Cast(geometry-typed expression, Box3DUDT) into ST_Box3D") {
40+
val geomAttr = AttributeReference("g", GeometryUDT(), nullable = true)()
41+
val cast = Cast(geomAttr, Box3DUDT)
42+
val rewritten = rule(projectExprPlan(geomAttr, cast))
43+
val outExpr =
44+
rewritten.asInstanceOf[Project].projectList.head.asInstanceOf[Alias].child
45+
assert(outExpr.isInstanceOf[ST_Box3D])
46+
assert(outExpr.asInstanceOf[ST_Box3D].inputExpressions == Seq(geomAttr))
47+
assert(outExpr.dataType.isInstanceOf[Box3DUDT])
48+
}
49+
50+
it("leaves Cast(Box3D-typed expression, GeometryUDT) untouched (inverse cast not in scope)") {
51+
val boxAttr = AttributeReference("b", Box3DUDT, nullable = true)()
52+
val cast = Cast(boxAttr, GeometryUDT())
53+
val rewritten = rule(projectExprPlan(boxAttr, cast))
54+
val outExpr =
55+
rewritten.asInstanceOf[Project].projectList.head.asInstanceOf[Alias].child
56+
assert(outExpr.isInstanceOf[Cast])
57+
}
58+
59+
it("leaves unrelated casts untouched") {
60+
val geomAttr = AttributeReference("g", GeometryUDT(), nullable = true)()
61+
val cast = Cast(Literal(1), LongType)
62+
val rewritten = rule(projectExprPlan(geomAttr, cast))
63+
val outExpr =
64+
rewritten.asInstanceOf[Project].projectList.head.asInstanceOf[Alias].child
65+
assert(outExpr.isInstanceOf[Cast])
66+
}
67+
}
68+
}

spark/spark-3.4/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,19 +20,20 @@ package org.apache.sedona.sql.parser
2020

2121
import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
2222
import org.apache.spark.sql.execution.SparkSqlAstBuilder
23-
import org.apache.spark.sql.sedona_sql.UDT.{Box2DUDT, GeometryUDT}
23+
import org.apache.spark.sql.sedona_sql.UDT.{Box2DUDT, Box3DUDT, GeometryUDT}
2424
import org.apache.spark.sql.types.DataType
2525

2626
class SedonaSqlAstBuilder extends SparkSqlAstBuilder {
2727

2828
/**
29-
* Recognize Sedona UDT names (GEOMETRY, BOX2D) as primitive data types so SQL `CAST(... AS
30-
* geometry)` / `CAST(... AS box2d)` parse to the matching UDT.
29+
* Recognize Sedona UDT names (GEOMETRY, BOX2D, BOX3D) as primitive data types so SQL `CAST(...
30+
* AS geometry)` / `CAST(... AS box2d)` / `CAST(... AS box3d)` parse to the matching UDT.
3131
*/
3232
override def visitPrimitiveDataType(ctx: PrimitiveDataTypeContext): DataType = {
3333
ctx.getText.toUpperCase() match {
3434
case "GEOMETRY" => GeometryUDT()
3535
case "BOX2D" => Box2DUDT
36+
case "BOX3D" => Box3DUDT
3637
case _ => super.visitPrimitiveDataType(ctx)
3738
}
3839
}
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.sedona.sql
20+
21+
import org.apache.sedona.common.geometryObjects.Box3D
22+
import org.apache.spark.sql.functions.{col, expr}
23+
import org.apache.spark.sql.sedona_sql.UDT.Box3DUDT
24+
25+
class Box3DCastSuite extends TestBaseScala {
26+
27+
/**
28+
* SQL `CAST(... AS box3d)` parsing requires Sedona's `SedonaSqlAstBuilder` to be active. The
29+
* test base randomizes `spark.sedona.enableParserExtension` across CI runs, and `SparkContext`
30+
* is JVM-singleton so the active value can differ from this suite's session-level config. Probe
31+
* directly by parsing a tiny CAST: this matches the behavior the SQL tests actually depend on,
32+
* and caches the answer for the rest of the suite. DataFrame `.cast(...)` tests run
33+
* unconditionally because the resolution rule is always injected.
34+
*/
35+
private lazy val sqlCastSupported: Boolean = {
36+
try {
37+
sparkSession
38+
.sql("SELECT CAST(ST_GeomFromText('POINT (0 0)') AS box3d) AS b")
39+
.collect()
40+
true
41+
} catch {
42+
case _: org.apache.spark.sql.catalyst.parser.ParseException => false
43+
}
44+
}
45+
46+
describe("Geometry → Box3D Catalyst cast") {
47+
48+
it("DataFrame .cast(Box3DUDT) rewrites to ST_Box3D") {
49+
import sparkSession.implicits._
50+
val df = Seq("LINESTRING Z(0 0 -3, 5 10 7)").toDF("wkt")
51+
val box = df
52+
.select(expr("ST_GeomFromText(wkt)").alias("g"))
53+
.select(col("g").cast(Box3DUDT).alias("b"))
54+
.collect()
55+
.head
56+
.getAs[Box3D]("b")
57+
assert(box == new Box3D(0.0, 0.0, -3.0, 5.0, 10.0, 7.0))
58+
}
59+
60+
it("DataFrame .cast(Box3DUDT) on XY geometry folds Z = 0") {
61+
import sparkSession.implicits._
62+
val df = Seq("LINESTRING (0 0, 5 10)").toDF("wkt")
63+
val box = df
64+
.select(expr("ST_GeomFromText(wkt)").alias("g"))
65+
.select(col("g").cast(Box3DUDT).alias("b"))
66+
.collect()
67+
.head
68+
.getAs[Box3D]("b")
69+
assert(box == new Box3D(0.0, 0.0, 0.0, 5.0, 10.0, 0.0))
70+
}
71+
72+
it("DataFrame .cast(Box3DUDT) on NULL geometry returns null") {
73+
val box = sparkSession
74+
.sql("SELECT ST_GeomFromText(NULL) AS g")
75+
.select(col("g").cast(Box3DUDT).alias("b"))
76+
.collect()
77+
.head
78+
.getAs[Box3D]("b")
79+
assert(box == null)
80+
}
81+
82+
it("SQL CAST(geom AS box3d) returns the 3D bbox") {
83+
assume(
84+
sqlCastSupported,
85+
"Sedona SQL parser extension is required for `CAST(... AS box3d)` syntax")
86+
val box = sparkSession
87+
.sql("SELECT CAST(ST_GeomFromText('LINESTRING Z(0 0 -3, 5 10 7)') AS box3d) AS b")
88+
.collect()
89+
.head
90+
.getAs[Box3D]("b")
91+
assert(box == new Box3D(0.0, 0.0, -3.0, 5.0, 10.0, 7.0))
92+
}
93+
94+
it("SQL CAST(geom AS box3d) on XY geometry folds Z = 0") {
95+
assume(
96+
sqlCastSupported,
97+
"Sedona SQL parser extension is required for `CAST(... AS box3d)` syntax")
98+
val box = sparkSession
99+
.sql("SELECT CAST(ST_GeomFromText('LINESTRING (0 0, 5 10)') AS box3d) AS b")
100+
.collect()
101+
.head
102+
.getAs[Box3D]("b")
103+
assert(box == new Box3D(0.0, 0.0, 0.0, 5.0, 10.0, 0.0))
104+
}
105+
106+
it("SQL CAST(NULL geometry AS box3d) returns null") {
107+
assume(
108+
sqlCastSupported,
109+
"Sedona SQL parser extension is required for `CAST(... AS box3d)` syntax")
110+
val box = sparkSession
111+
.sql("SELECT CAST(ST_GeomFromText(NULL) AS box3d) AS b")
112+
.collect()
113+
.head
114+
.getAs[Box3D]("b")
115+
assert(box == null)
116+
}
117+
}
118+
}

spark/spark-3.5/src/main/scala/org/apache/sedona/sql/parser/SedonaSqlAstBuilder.scala

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,19 +20,20 @@ package org.apache.sedona.sql.parser
2020

2121
import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
2222
import org.apache.spark.sql.execution.SparkSqlAstBuilder
23-
import org.apache.spark.sql.sedona_sql.UDT.{Box2DUDT, GeometryUDT}
23+
import org.apache.spark.sql.sedona_sql.UDT.{Box2DUDT, Box3DUDT, GeometryUDT}
2424
import org.apache.spark.sql.types.DataType
2525

2626
class SedonaSqlAstBuilder extends SparkSqlAstBuilder {
2727

2828
/**
29-
* Recognize Sedona UDT names (GEOMETRY, BOX2D) as primitive data types so SQL `CAST(... AS
30-
* geometry)` / `CAST(... AS box2d)` parse to the matching UDT.
29+
* Recognize Sedona UDT names (GEOMETRY, BOX2D, BOX3D) as primitive data types so SQL `CAST(...
30+
* AS geometry)` / `CAST(... AS box2d)` / `CAST(... AS box3d)` parse to the matching UDT.
3131
*/
3232
override def visitPrimitiveDataType(ctx: PrimitiveDataTypeContext): DataType = {
3333
ctx.getText.toUpperCase() match {
3434
case "GEOMETRY" => GeometryUDT()
3535
case "BOX2D" => Box2DUDT
36+
case "BOX3D" => Box3DUDT
3637
case _ => super.visitPrimitiveDataType(ctx)
3738
}
3839
}

0 commit comments

Comments
 (0)