From fb483e0fcbbe7ef34ae39ef12e0aad7cedfea0a6 Mon Sep 17 00:00:00 2001 From: zhangfengcdt Date: Fri, 24 Apr 2026 09:41:12 -0700 Subject: [PATCH 1/5] [GH-2830] Adds Geography implementations for ST_Within and ST_DWithin --- .../sedona/common/geography/Functions.java | 10 +++ .../sedona/common/Geography/FunctionTest.java | 46 ++++++++++++++ docs/api/sql/geography/Geography-Functions.md | 1 + .../Geography-Functions/ST_DWithin.md | 62 +++++++++++++++++++ .../sedona_sql/expressions/Predicates.scala | 3 +- .../sql/geography/GeographyFunctionTest.scala | 41 ++++++++++++ 6 files changed, 162 insertions(+), 1 deletion(-) create mode 100644 docs/api/sql/geography/Geography-Functions/ST_DWithin.md diff --git a/common/src/main/java/org/apache/sedona/common/geography/Functions.java b/common/src/main/java/org/apache/sedona/common/geography/Functions.java index d6a0b407b1e..1b7b8acc822 100644 --- a/common/src/main/java/org/apache/sedona/common/geography/Functions.java +++ b/common/src/main/java/org/apache/sedona/common/geography/Functions.java @@ -136,6 +136,16 @@ public static boolean contains(Geography g1, Geography g2) { return pred.S2_contains(toShapeIndex(g1), toShapeIndex(g2), s2Options()); } + /** + * Spherical "distance within" test. Returns true iff the minimum geodesic distance between g1 and + * g2 (in meters) is less than or equal to {@code distanceMeters}. + */ + public static boolean dWithin(Geography g1, Geography g2, double distanceMeters) { + if (g1 == null || g2 == null) return false; + Double d = distance(g1, g2); + return d != null && d <= distanceMeters; + } + /** Return EWKT for geography object */ public static String asEWKT(Geography geography) { return geography.toEWKT(); diff --git a/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java b/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java index ab2daca5df5..cc0b8cfd911 100644 --- a/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java +++ b/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java @@ -222,4 +222,50 @@ public void contains_nullHandling() throws ParseException { assertFalse(Functions.contains(g1, null)); assertFalse(Functions.contains(null, g1)); } + + // ─── Level 3: ST_DWithin ───────────────────────────────────────────────── + + @Test + public void dWithin_twoPointsOneDegreeApart() throws ParseException { + Geography g1 = Constructors.geogFromWKT("POINT (0 0)", 4326); + Geography g2 = Constructors.geogFromWKT("POINT (0 1)", 4326); + // 1° of latitude ≈ 111_195 m on the sphere + assertFalse(Functions.dWithin(g1, g2, 100_000.0)); + assertTrue(Functions.dWithin(g1, g2, 200_000.0)); + } + + @Test + public void dWithin_pointInsidePolygon() throws ParseException { + Geography poly = Constructors.geogFromWKT("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", 4326); + Geography pt = Constructors.geogFromWKT("POINT (0.5 0.5)", 4326); + // Distance is zero when one contains the other; any positive threshold should pass. + assertTrue(Functions.dWithin(poly, pt, 1.0)); + } + + @Test + public void dWithin_boundaryInclusive() throws ParseException { + // sedona-db parity: distance == threshold ⇒ true (inclusive <=) + Geography g1 = Constructors.geogFromWKT("POINT (0 0)", 4326); + Geography g2 = Constructors.geogFromWKT("POINT (0 1)", 4326); + double actual = Functions.distance(g1, g2); + assertTrue(Functions.dWithin(g1, g2, actual)); + assertFalse(Functions.dWithin(g1, g2, actual - 1.0)); + } + + @Test + public void dWithin_antimeridianCrossing() throws ParseException { + // Two points straddling the antimeridian: great-circle distance ~22 km, + // planar distance ~40_000 km — succeeding at 50 km proves we use spherical distance. + Geography g1 = Constructors.geogFromWKT("POINT (179.9 0)", 4326); + Geography g2 = Constructors.geogFromWKT("POINT (-179.9 0)", 4326); + assertTrue(Functions.dWithin(g1, g2, 50_000.0)); + } + + @Test + public void dWithin_nullHandling() throws ParseException { + Geography g = Constructors.geogFromWKT("POINT (0 0)", 4326); + assertFalse(Functions.dWithin(g, null, 1e6)); + assertFalse(Functions.dWithin(null, g, 1e6)); + assertFalse(Functions.dWithin(null, null, 1e6)); + } } diff --git a/docs/api/sql/geography/Geography-Functions.md b/docs/api/sql/geography/Geography-Functions.md index 86b5a12fb5f..c1dcadc588e 100644 --- a/docs/api/sql/geography/Geography-Functions.md +++ b/docs/api/sql/geography/Geography-Functions.md @@ -47,3 +47,4 @@ These functions operate on geography type objects. | [ST_NPoints](Geography-Functions/ST_NPoints.md) | Integer | Return the number of points (vertices) in a geography. | v1.9.0 | | [ST_Distance](Geography-Functions/ST_Distance.md) | Double | Return the minimum geodesic distance between two geographies in meters. | v1.9.0 | | [ST_Contains](Geography-Functions/ST_Contains.md) | Boolean | Test whether geography A fully contains geography B. | v1.9.0 | +| [ST_DWithin](Geography-Functions/ST_DWithin.md) | Boolean | Test whether two geographies are within a given geodesic distance (in meters) of each other. | v1.9.1 | diff --git a/docs/api/sql/geography/Geography-Functions/ST_DWithin.md b/docs/api/sql/geography/Geography-Functions/ST_DWithin.md new file mode 100644 index 00000000000..751ee3f8b27 --- /dev/null +++ b/docs/api/sql/geography/Geography-Functions/ST_DWithin.md @@ -0,0 +1,62 @@ + + +# ST_DWithin + +Introduction: Tests whether two geographies are within a given geodesic distance (in meters) of each other on the sphere. The minimum great-circle distance between any two points on the two geographies is compared against the threshold; the test is inclusive (returns true when the minimum distance equals the threshold). + +Format: + +`ST_DWithin (A: Geography, B: Geography, distance: Double)` + +Return type: `Boolean` + +Since: `v1.9.1` + +SQL Example + +```sql +SELECT ST_DWithin( + ST_GeogFromWKT('POINT (0 0)', 4326), + ST_GeogFromWKT('POINT (0 1)', 4326), + 200000.0 +); +``` + +Output: + +``` +true +``` + +The same pair of points with a tighter threshold: + +```sql +SELECT ST_DWithin( + ST_GeogFromWKT('POINT (0 0)', 4326), + ST_GeogFromWKT('POINT (0 1)', 4326), + 100000.0 +); +``` + +Output: + +``` +false +``` diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Predicates.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Predicates.scala index 6affd348b32..8be19ec5736 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Predicates.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Predicates.scala @@ -292,7 +292,8 @@ private[apache] case class ST_OrderingEquals(inputExpressions: Seq[Expression]) private[apache] case class ST_DWithin(inputExpressions: Seq[Expression]) extends InferredExpression( inferrableFunction3(Predicates.dWithin), - inferrableFunction4(Predicates.dWithin)) { + inferrableFunction4(Predicates.dWithin), + inferrableFunction3(org.apache.sedona.common.geography.Functions.dWithin)) { protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = { copy(inputExpressions = newChildren) diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala b/spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala index 7f47ae51826..c7af715e60b 100644 --- a/spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala +++ b/spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala @@ -153,6 +153,37 @@ class GeographyFunctionTest extends TestBaseScala { .first() assertTrue(!row.getBoolean(0)) } + + it("ST_DWithin true when within threshold") { + val row = sparkSession + .sql(""" + SELECT ST_DWithin( + ST_GeogFromWKT('POINT (0 0)', 4326), + ST_GeogFromWKT('POINT (0 1)', 4326), + 200000.0) AS r + """) + .first() + assertTrue(row.getBoolean(0)) + } + + it("ST_DWithin false when outside threshold") { + val row = sparkSession + .sql(""" + SELECT ST_DWithin( + ST_GeogFromWKT('POINT (0 0)', 4326), + ST_GeogFromWKT('POINT (0 1)', 4326), + 100000.0) AS r + """) + .first() + assertTrue(!row.getBoolean(0)) + } + + it("ST_DWithin null handling") { + val row = sparkSession + .sql("SELECT ST_DWithin(ST_GeogFromWKT('POINT (0 0)', 4326), null, 1.0) AS r") + .first() + assertTrue(row.isNullAt(0)) + } } // ─── DataFrame API ───────────────────────────────────────────────────── @@ -187,6 +218,16 @@ class GeographyFunctionTest extends TestBaseScala { .select(st_predicates.ST_Contains(col("poly"), col("pt")).as("result")) assertTrue(df.first().getBoolean(0)) } + + it("ST_DWithin via DataFrame API") { + val df = sparkSession + .sql("SELECT 'POINT (0 0)' AS a, 'POINT (0 1)' AS b") + .select( + st_constructors.ST_GeogFromWKT(col("a"), lit(4326)).as("a"), + st_constructors.ST_GeogFromWKT(col("b"), lit(4326)).as("b")) + .select(st_predicates.ST_DWithin(col("a"), col("b"), lit(200000.0)).as("r")) + assertTrue(df.first().getBoolean(0)) + } } // ─── Serialization round-trip ────────────────────────────────────────── From 21fcf55a22260a43b0029db92f30ae92e49101ff Mon Sep 17 00:00:00 2001 From: zhangfengcdt Date: Fri, 24 Apr 2026 09:48:42 -0700 Subject: [PATCH 2/5] add svg to the help file --- .../Geography-Functions/ST_DWithin.md | 3 ++ .../ST_DWithin_geography_false.svg | 42 +++++++++++++++++++ .../ST_DWithin_geography_true.svg | 42 +++++++++++++++++++ 3 files changed, 87 insertions(+) create mode 100644 docs/image/ST_DWithin_geography/ST_DWithin_geography_false.svg create mode 100644 docs/image/ST_DWithin_geography/ST_DWithin_geography_true.svg diff --git a/docs/api/sql/geography/Geography-Functions/ST_DWithin.md b/docs/api/sql/geography/Geography-Functions/ST_DWithin.md index 751ee3f8b27..d3c7559d97f 100644 --- a/docs/api/sql/geography/Geography-Functions/ST_DWithin.md +++ b/docs/api/sql/geography/Geography-Functions/ST_DWithin.md @@ -21,6 +21,9 @@ Introduction: Tests whether two geographies are within a given geodesic distance (in meters) of each other on the sphere. The minimum great-circle distance between any two points on the two geographies is compared against the threshold; the test is inclusive (returns true when the minimum distance equals the threshold). +![ST_DWithin returning true](../../../../image/ST_DWithin_geography/ST_DWithin_geography_true.svg "ST_DWithin returning true") +![ST_DWithin returning false](../../../../image/ST_DWithin_geography/ST_DWithin_geography_false.svg "ST_DWithin returning false") + Format: `ST_DWithin (A: Geography, B: Geography, distance: Double)` diff --git a/docs/image/ST_DWithin_geography/ST_DWithin_geography_false.svg b/docs/image/ST_DWithin_geography/ST_DWithin_geography_false.svg new file mode 100644 index 00000000000..88a240943a1 --- /dev/null +++ b/docs/image/ST_DWithin_geography/ST_DWithin_geography_false.svg @@ -0,0 +1,42 @@ + + + + + + + + + + + ST_DWithin(A, B, d) + + FALSE + + + + + + + + + + + + + + + d + + + + B + + + + A + + + Geodesic distance between A and B > d (on sphere) + diff --git a/docs/image/ST_DWithin_geography/ST_DWithin_geography_true.svg b/docs/image/ST_DWithin_geography/ST_DWithin_geography_true.svg new file mode 100644 index 00000000000..b2bbeeea2c6 --- /dev/null +++ b/docs/image/ST_DWithin_geography/ST_DWithin_geography_true.svg @@ -0,0 +1,42 @@ + + + + + + + + + + + ST_DWithin(A, B, d) + + TRUE + + + + + + + + + + + + + + + d + + + + B + + + + A + + + Geodesic distance between A and B ≤ d (on sphere) + From 4ff1d92612d9a6efbdaa7107407d16e1e284b32e Mon Sep 17 00:00:00 2001 From: zhangfengcdt Date: Fri, 24 Apr 2026 10:01:25 -0700 Subject: [PATCH 3/5] implement st_witin for geography type --- .../sedona/common/geography/Functions.java | 8 +++ .../sedona/common/Geography/FunctionTest.java | 34 ++++++++++++ docs/api/sql/geography/Geography-Functions.md | 1 + .../Geography-Functions/ST_Within.md | 48 +++++++++++++++++ .../ST_Within_geography_false.svg | 54 +++++++++++++++++++ .../ST_Within_geography_true.svg | 54 +++++++++++++++++++ .../sedona_sql/expressions/Predicates.scala | 12 ++--- .../strategy/join/JoinQueryDetector.scala | 27 +++++----- .../sql/geography/GeographyFunctionTest.scala | 34 ++++++++++++ 9 files changed, 252 insertions(+), 20 deletions(-) create mode 100644 docs/api/sql/geography/Geography-Functions/ST_Within.md create mode 100644 docs/image/ST_Within_geography/ST_Within_geography_false.svg create mode 100644 docs/image/ST_Within_geography/ST_Within_geography_true.svg diff --git a/common/src/main/java/org/apache/sedona/common/geography/Functions.java b/common/src/main/java/org/apache/sedona/common/geography/Functions.java index 1b7b8acc822..07feda0e289 100644 --- a/common/src/main/java/org/apache/sedona/common/geography/Functions.java +++ b/common/src/main/java/org/apache/sedona/common/geography/Functions.java @@ -146,6 +146,14 @@ public static boolean dWithin(Geography g1, Geography g2, double distanceMeters) return d != null && d <= distanceMeters; } + /** + * Spherical "within" test. Returns true iff g1 is fully inside g2 on the sphere. OGC convention: + * {@code ST_Within(A, B) == ST_Contains(B, A)}. + */ + public static boolean within(Geography g1, Geography g2) { + return contains(g2, g1); + } + /** Return EWKT for geography object */ public static String asEWKT(Geography geography) { return geography.toEWKT(); diff --git a/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java b/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java index cc0b8cfd911..e524f52916b 100644 --- a/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java +++ b/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java @@ -268,4 +268,38 @@ public void dWithin_nullHandling() throws ParseException { assertFalse(Functions.dWithin(null, g, 1e6)); assertFalse(Functions.dWithin(null, null, 1e6)); } + + // ─── Level 3: ST_Within ────────────────────────────────────────────────── + + @Test + public void within_pointInPolygon() throws ParseException { + Geography pt = Constructors.geogFromWKT("POINT (0.5 0.5)", 4326); + Geography poly = Constructors.geogFromWKT("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", 4326); + assertTrue(Functions.within(pt, poly)); + } + + @Test + public void within_pointOutsidePolygon() throws ParseException { + Geography pt = Constructors.geogFromWKT("POINT (2 2)", 4326); + Geography poly = Constructors.geogFromWKT("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", 4326); + assertFalse(Functions.within(pt, poly)); + } + + @Test + public void within_isContainsSwapped() throws ParseException { + // OGC parity: within(A, B) == contains(B, A) for every input pair. + Geography poly = Constructors.geogFromWKT("POLYGON ((0 0, 2 0, 2 2, 0 2, 0 0))", 4326); + Geography inside = Constructors.geogFromWKT("POINT (1 1)", 4326); + Geography outside = Constructors.geogFromWKT("POINT (3 3)", 4326); + assertEquals(Functions.contains(poly, inside), Functions.within(inside, poly)); + assertEquals(Functions.contains(poly, outside), Functions.within(outside, poly)); + } + + @Test + public void within_nullHandling() throws ParseException { + Geography g = Constructors.geogFromWKT("POINT (1 1)", 4326); + assertFalse(Functions.within(g, null)); + assertFalse(Functions.within(null, g)); + assertFalse(Functions.within(null, null)); + } } diff --git a/docs/api/sql/geography/Geography-Functions.md b/docs/api/sql/geography/Geography-Functions.md index c1dcadc588e..2304c50fca7 100644 --- a/docs/api/sql/geography/Geography-Functions.md +++ b/docs/api/sql/geography/Geography-Functions.md @@ -48,3 +48,4 @@ These functions operate on geography type objects. | [ST_Distance](Geography-Functions/ST_Distance.md) | Double | Return the minimum geodesic distance between two geographies in meters. | v1.9.0 | | [ST_Contains](Geography-Functions/ST_Contains.md) | Boolean | Test whether geography A fully contains geography B. | v1.9.0 | | [ST_DWithin](Geography-Functions/ST_DWithin.md) | Boolean | Test whether two geographies are within a given geodesic distance (in meters) of each other. | v1.9.1 | +| [ST_Within](Geography-Functions/ST_Within.md) | Boolean | Test whether geography A is fully within geography B. | v1.9.1 | diff --git a/docs/api/sql/geography/Geography-Functions/ST_Within.md b/docs/api/sql/geography/Geography-Functions/ST_Within.md new file mode 100644 index 00000000000..1a69777847d --- /dev/null +++ b/docs/api/sql/geography/Geography-Functions/ST_Within.md @@ -0,0 +1,48 @@ + + +# ST_Within + +Introduction: Tests whether geography A is fully within geography B using S2 spherical boolean operations. Returns true if every point of A is inside or on the boundary of B. By OGC convention, `ST_Within(A, B)` is equivalent to `ST_Contains(B, A)`. + +![ST_Within returning true](../../../../image/ST_Within_geography/ST_Within_geography_true.svg "ST_Within returning true") +![ST_Within returning false](../../../../image/ST_Within_geography/ST_Within_geography_false.svg "ST_Within returning false") + +Format: + +`ST_Within (A: Geography, B: Geography)` + +Return type: `Boolean` + +Since: `v1.9.1` + +SQL Example + +```sql +SELECT ST_Within( + ST_GeogFromWKT('POINT (0.5 0.5)'), + ST_GeogFromWKT('POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))') +); +``` + +Output: + +``` +true +``` diff --git a/docs/image/ST_Within_geography/ST_Within_geography_false.svg b/docs/image/ST_Within_geography/ST_Within_geography_false.svg new file mode 100644 index 00000000000..45af08d2ad8 --- /dev/null +++ b/docs/image/ST_Within_geography/ST_Within_geography_false.svg @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + ST_Within(A, B) is FALSE + ST_Within(P, B) is FALSE + + + + + + + + + + + + + + + + + + + + + + + B + A + P + + + + Point P (Outside B) + + + Polygon A (Straddles Boundary) + + + Polygon B + diff --git a/docs/image/ST_Within_geography/ST_Within_geography_true.svg b/docs/image/ST_Within_geography/ST_Within_geography_true.svg new file mode 100644 index 00000000000..1af1e2805f9 --- /dev/null +++ b/docs/image/ST_Within_geography/ST_Within_geography_true.svg @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + ST_Within(A, B) is TRUE + ST_Within(P, B) is TRUE + + + + + + + + + + + + + + + + + + + + + + + B + A + P + + + + Point P (Interior of B) + + + Polygon A (Fully Inside B) + + + Polygon B + diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Predicates.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Predicates.scala index 8be19ec5736..6ade426f6b9 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Predicates.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Predicates.scala @@ -109,17 +109,15 @@ private[apache] case class ST_Intersects(inputExpressions: Seq[Expression]) } /** - * Test if leftGeometry is full within rightGeometry + * Test if leftGeometry is fully within rightGeometry. Supports both Geometry (JTS) and Geography + * (S2) inputs via InferredExpression dual dispatch. * * @param inputExpressions */ private[apache] case class ST_Within(inputExpressions: Seq[Expression]) - extends ST_Predicate - with CodegenFallback { - - override def evalGeom(leftGeometry: Geometry, rightGeometry: Geometry): Boolean = { - Predicates.within(leftGeometry, rightGeometry) - } + extends InferredExpression( + inferrableFunction2(Predicates.within), + inferrableFunction2(org.apache.sedona.common.geography.Functions.within)) { protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = { copy(inputExpressions = newChildren) diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/JoinQueryDetector.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/JoinQueryDetector.scala index 233ed8a806f..a2659b31617 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/JoinQueryDetector.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/JoinQueryDetector.scala @@ -79,16 +79,6 @@ class JoinQueryDetector(sparkSession: SparkSession) extends SparkStrategy { SpatialPredicate.INTERSECTS, false, extraCondition)) - case ST_Within(Seq(leftShape, rightShape)) => - Some( - JoinQueryDetection( - left, - right, - leftShape, - rightShape, - SpatialPredicate.WITHIN, - false, - extraCondition)) case ST_Covers(Seq(leftShape, rightShape)) => Some( JoinQueryDetection( @@ -208,9 +198,9 @@ class JoinQueryDetector(sparkSession: SparkSession) extends SparkStrategy { val queryDetection: Option[JoinQueryDetection] = condition.flatMap { case joinConditionMatcher(predicate, extraCondition) => predicate match { - // ST_Contains is an InferredExpression (not ST_Predicate) so it can't sit inside - // getJoinDetection; it's also the only predicate currently accepting Geography - // inputs and therefore the only one needing the Geography guard. + // ST_Contains and ST_Within are InferredExpressions (not ST_Predicates) so they can't + // sit inside getJoinDetection; they also accept Geography inputs via dual dispatch and + // therefore need the Geography guard (join planning deserializes with GeometrySerializer). case ST_Contains(Seq(leftShape, rightShape)) if !isGeographyInput(leftShape) && !isGeographyInput(rightShape) => Some( @@ -222,6 +212,17 @@ class JoinQueryDetector(sparkSession: SparkSession) extends SparkStrategy { SpatialPredicate.CONTAINS, false, extraCondition)) + case ST_Within(Seq(leftShape, rightShape)) + if !isGeographyInput(leftShape) && !isGeographyInput(rightShape) => + Some( + JoinQueryDetection( + left, + right, + leftShape, + rightShape, + SpatialPredicate.WITHIN, + false, + extraCondition)) case pred: ST_Predicate => getJoinDetection(left, right, pred, extraCondition) case pred: RS_Predicate => diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala b/spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala index c7af715e60b..bc24c2bbc32 100644 --- a/spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala +++ b/spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala @@ -184,6 +184,30 @@ class GeographyFunctionTest extends TestBaseScala { .first() assertTrue(row.isNullAt(0)) } + + it("ST_Within point in polygon") { + val row = sparkSession + .sql(""" + SELECT ST_Within( + ST_GeogFromWKT('POINT (0.5 0.5)', 4326), + ST_GeogFromWKT('POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))', 4326) + ) AS r + """) + .first() + assertTrue(row.getBoolean(0)) + } + + it("ST_Within point outside polygon") { + val row = sparkSession + .sql(""" + SELECT ST_Within( + ST_GeogFromWKT('POINT (2 2)', 4326), + ST_GeogFromWKT('POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))', 4326) + ) AS r + """) + .first() + assertTrue(!row.getBoolean(0)) + } } // ─── DataFrame API ───────────────────────────────────────────────────── @@ -228,6 +252,16 @@ class GeographyFunctionTest extends TestBaseScala { .select(st_predicates.ST_DWithin(col("a"), col("b"), lit(200000.0)).as("r")) assertTrue(df.first().getBoolean(0)) } + + it("ST_Within via DataFrame API") { + val df = sparkSession + .sql("SELECT 'POINT (0.5 0.5)' AS pt, 'POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))' AS poly") + .select( + st_constructors.ST_GeogFromWKT(col("pt"), lit(4326)).as("pt"), + st_constructors.ST_GeogFromWKT(col("poly"), lit(4326)).as("poly")) + .select(st_predicates.ST_Within(col("pt"), col("poly")).as("r")) + assertTrue(df.first().getBoolean(0)) + } } // ─── Serialization round-trip ────────────────────────────────────────── From ae32ee24e2ddb4ba37f0a64b6b3b323604a9e386 Mon Sep 17 00:00:00 2001 From: zhangfengcdt Date: Fri, 24 Apr 2026 10:13:27 -0700 Subject: [PATCH 4/5] add more unit tests --- .../sedona/common/Geography/FunctionTest.java | 43 ++++++++++++++ .../sql/geography/GeographyFunctionTest.scala | 57 ++++++++++++++++++- 2 files changed, 98 insertions(+), 2 deletions(-) diff --git a/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java b/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java index e524f52916b..a0096a162a7 100644 --- a/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java +++ b/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java @@ -269,6 +269,31 @@ public void dWithin_nullHandling() throws ParseException { assertFalse(Functions.dWithin(null, null, 1e6)); } + @Test + public void dWithin_reflexiveZeroThreshold() throws ParseException { + // A point is trivially within distance 0 of itself (distance == 0, threshold == 0, <= is + // inclusive). + Geography g = Constructors.geogFromWKT("POINT (10 20)", 4326); + assertTrue(Functions.dWithin(g, g, 0.0)); + } + + @Test + public void dWithin_negativeDistance() throws ParseException { + // No two geographies can be at a negative geodesic distance, so any negative threshold => + // false. + Geography g1 = Constructors.geogFromWKT("POINT (0 0)", 4326); + Geography g2 = Constructors.geogFromWKT("POINT (0 0)", 4326); + assertFalse(Functions.dWithin(g1, g2, -1.0)); + } + + @Test + public void dWithin_nanDistance() throws ParseException { + // NaN threshold => all comparisons are false. + Geography g1 = Constructors.geogFromWKT("POINT (0 0)", 4326); + Geography g2 = Constructors.geogFromWKT("POINT (0 1)", 4326); + assertFalse(Functions.dWithin(g1, g2, Double.NaN)); + } + // ─── Level 3: ST_Within ────────────────────────────────────────────────── @Test @@ -302,4 +327,22 @@ public void within_nullHandling() throws ParseException { assertFalse(Functions.within(null, g)); assertFalse(Functions.within(null, null)); } + + @Test + public void within_polygonInPolygon() throws ParseException { + Geography inner = Constructors.geogFromWKT("POLYGON ((1 1, 2 1, 2 2, 1 2, 1 1))", 4326); + Geography outer = Constructors.geogFromWKT("POLYGON ((0 0, 3 0, 3 3, 0 3, 0 0))", 4326); + assertTrue(Functions.within(inner, outer)); + // Swapped: the outer polygon is NOT within the inner one. + assertFalse(Functions.within(outer, inner)); + } + + @Test + public void within_overlappingNotContained() throws ParseException { + // Two polygons that intersect but neither is contained in the other. + Geography a = Constructors.geogFromWKT("POLYGON ((0 0, 2 0, 2 2, 0 2, 0 0))", 4326); + Geography b = Constructors.geogFromWKT("POLYGON ((1 1, 3 1, 3 3, 1 3, 1 1))", 4326); + assertFalse(Functions.within(a, b)); + assertFalse(Functions.within(b, a)); + } } diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala b/spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala index bc24c2bbc32..4823d9ae743 100644 --- a/spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala +++ b/spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala @@ -179,10 +179,52 @@ class GeographyFunctionTest extends TestBaseScala { } it("ST_DWithin null handling") { - val row = sparkSession + // null as second arg + val r1 = sparkSession .sql("SELECT ST_DWithin(ST_GeogFromWKT('POINT (0 0)', 4326), null, 1.0) AS r") .first() - assertTrue(row.isNullAt(0)) + assertTrue(r1.isNullAt(0)) + // null as first arg + val r2 = sparkSession + .sql("SELECT ST_DWithin(null, ST_GeogFromWKT('POINT (0 0)', 4326), 1.0) AS r") + .first() + assertTrue(r2.isNullAt(0)) + // null distance + val r3 = sparkSession + .sql(""" + SELECT ST_DWithin( + ST_GeogFromWKT('POINT (0 0)', 4326), + ST_GeogFromWKT('POINT (0 1)', 4326), + CAST(null AS DOUBLE)) AS r + """) + .first() + assertTrue(r3.isNullAt(0)) + } + + it("ST_DWithin accepts INT distance literal") { + // Catalyst should coerce INT -> DOUBLE for the 3-arg Geography overload. + val row = sparkSession + .sql(""" + SELECT ST_DWithin( + ST_GeogFromWKT('POINT (0 0)', 4326), + ST_GeogFromWKT('POINT (0 1)', 4326), + 200000) AS r + """) + .first() + assertTrue(row.getBoolean(0)) + } + + it("ST_DWithin accepts FLOAT distance literal") { + // CAST to FLOAT forces a narrower type than DOUBLE; Catalyst should widen it. + val row = sparkSession + .sql(""" + SELECT ST_DWithin( + ST_GeogFromWKT('POINT (0 0)', 4326), + ST_GeogFromWKT('POINT (0 1)', 4326), + CAST(200000.5 AS FLOAT)) AS r + """) + .first() + assertTrue(row.getBoolean(0)) } it("ST_Within point in polygon") { @@ -208,6 +250,17 @@ class GeographyFunctionTest extends TestBaseScala { .first() assertTrue(!row.getBoolean(0)) } + + it("ST_Within null handling") { + val r1 = sparkSession + .sql("SELECT ST_Within(ST_GeogFromWKT('POINT (0 0)', 4326), null) AS r") + .first() + assertTrue(r1.isNullAt(0)) + val r2 = sparkSession + .sql("SELECT ST_Within(null, ST_GeogFromWKT('POINT (0 0)', 4326)) AS r") + .first() + assertTrue(r2.isNullAt(0)) + } } // ─── DataFrame API ───────────────────────────────────────────────────── From 622ad3ab2ed4d6b2dd4e5c14712ad846c24cf071 Mon Sep 17 00:00:00 2001 From: zhangfengcdt Date: Wed, 29 Apr 2026 08:58:22 -0700 Subject: [PATCH 5/5] address copilot comments --- .../Geography-Functions/ST_Within.md | 10 ++- .../strategy/join/JoinQueryDetector.scala | 75 ++++++++++++------- .../sql/geography/GeographyFunctionTest.scala | 16 ++++ 3 files changed, 69 insertions(+), 32 deletions(-) diff --git a/docs/api/sql/geography/Geography-Functions/ST_Within.md b/docs/api/sql/geography/Geography-Functions/ST_Within.md index 1a69777847d..d9058eec114 100644 --- a/docs/api/sql/geography/Geography-Functions/ST_Within.md +++ b/docs/api/sql/geography/Geography-Functions/ST_Within.md @@ -19,7 +19,9 @@ # ST_Within -Introduction: Tests whether geography A is fully within geography B using S2 spherical boolean operations. Returns true if every point of A is inside or on the boundary of B. By OGC convention, `ST_Within(A, B)` is equivalent to `ST_Contains(B, A)`. +Introduction: Tests whether geography `A` is fully within geography `B` using S2 spherical boolean operations. Returns true when every point of `A`'s interior lies in `B`'s interior. By OGC convention, `ST_Within(A, B)` is equivalent to `ST_Contains(B, A)`, and shares the same boundary semantics. + +Boundary semantics on the sphere are inherited from S2's boolean operations and depend on each ring's vertex orientation: along an edge that is "owned" by `B`'s boundary the test returns true, and along the opposite edge it returns false. Do not rely on a specific result for points that lie exactly on `B`'s boundary; for predictable behavior, use a strict interior point or expand `B` slightly with `ST_Buffer` before testing. ![ST_Within returning true](../../../../image/ST_Within_geography/ST_Within_geography_true.svg "ST_Within returning true") ![ST_Within returning false](../../../../image/ST_Within_geography/ST_Within_geography_false.svg "ST_Within returning false") @@ -32,12 +34,12 @@ Return type: `Boolean` Since: `v1.9.1` -SQL Example +SQL Example — interior point: ```sql SELECT ST_Within( - ST_GeogFromWKT('POINT (0.5 0.5)'), - ST_GeogFromWKT('POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))') + ST_GeogFromWKT('POINT (0.5 0.5)', 4326), + ST_GeogFromWKT('POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))', 4326) ); ``` diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/JoinQueryDetector.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/JoinQueryDetector.scala index a2659b31617..8944d8c7b1b 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/JoinQueryDetector.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/JoinQueryDetector.scala @@ -56,13 +56,37 @@ class JoinQueryDetector(sparkSession: SparkSession) extends SparkStrategy { // Geography spatial joins are not supported in this PR — TraitJoinQueryBase.toSpatialRDD // deserializes join keys with GeometrySerializer, which would fail on Geography bytes. - // ST_Contains is the only spatial predicate currently wired for Geography (via InferredExpression - // dual dispatch); when either side is GeographyUDT we skip join planning and let Spark evaluate - // the predicate row-by-row. Other ST_Predicates reject Geography inputs at analysis time, so no - // guard is needed there. + // ST_Contains and ST_Within are wired for Geography via InferredExpression dual dispatch; when + // either side is GeographyUDT we skip join planning and let Spark evaluate the predicate + // row-by-row. Other ST_Predicates reject Geography inputs at analysis time, so no guard is + // needed there. private def isGeographyInput(shape: Expression): Boolean = shape.dataType.isInstanceOf[GeographyUDT] + /** + * Build a JoinQueryDetection for an InferredExpression predicate (ST_Contains, ST_Within, ...) + * unless either operand is GeographyUDT, in which case the join is skipped and the predicate + * falls back to row-by-row evaluation. + */ + private def inferredJoinDetection( + left: LogicalPlan, + right: LogicalPlan, + leftShape: Expression, + rightShape: Expression, + spatialPredicate: SpatialPredicate, + extraCondition: Option[Expression]): Option[JoinQueryDetection] = + if (isGeographyInput(leftShape) || isGeographyInput(rightShape)) None + else + Some( + JoinQueryDetection( + left, + right, + leftShape, + rightShape, + spatialPredicate, + isGeography = false, + extraCondition)) + private def getJoinDetection( left: LogicalPlan, right: LogicalPlan, @@ -199,30 +223,25 @@ class JoinQueryDetector(sparkSession: SparkSession) extends SparkStrategy { case joinConditionMatcher(predicate, extraCondition) => predicate match { // ST_Contains and ST_Within are InferredExpressions (not ST_Predicates) so they can't - // sit inside getJoinDetection; they also accept Geography inputs via dual dispatch and - // therefore need the Geography guard (join planning deserializes with GeometrySerializer). - case ST_Contains(Seq(leftShape, rightShape)) - if !isGeographyInput(leftShape) && !isGeographyInput(rightShape) => - Some( - JoinQueryDetection( - left, - right, - leftShape, - rightShape, - SpatialPredicate.CONTAINS, - false, - extraCondition)) - case ST_Within(Seq(leftShape, rightShape)) - if !isGeographyInput(leftShape) && !isGeographyInput(rightShape) => - Some( - JoinQueryDetection( - left, - right, - leftShape, - rightShape, - SpatialPredicate.WITHIN, - false, - extraCondition)) + // sit inside getJoinDetection; they also accept Geography inputs via dual dispatch. + // inferredJoinDetection applies the Geography guard so the partition/range planner + // doesn't try to deserialize Geography bytes via GeometrySerializer. + case ST_Contains(Seq(leftShape, rightShape)) => + inferredJoinDetection( + left, + right, + leftShape, + rightShape, + SpatialPredicate.CONTAINS, + extraCondition) + case ST_Within(Seq(leftShape, rightShape)) => + inferredJoinDetection( + left, + right, + leftShape, + rightShape, + SpatialPredicate.WITHIN, + extraCondition) case pred: ST_Predicate => getJoinDetection(left, right, pred, extraCondition) case pred: RS_Predicate => diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala b/spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala index 4823d9ae743..d832129754d 100644 --- a/spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala +++ b/spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala @@ -251,6 +251,22 @@ class GeographyFunctionTest extends TestBaseScala { assertTrue(!row.getBoolean(0)) } + it("ST_Within point on polygon boundary (S2-owned edge)") { + // S2 boolean ownership of an edge depends on vertex orientation; for the unit-square + // ring (0 0, 1 0, 1 1, 0 1) the left edge midpoint (0, 0.5) is reported as 'within'. + // The test locks in this current behavior so a future S2/library change won't silently + // flip it. Boundary semantics in general are documented as implementation-defined. + val row = sparkSession + .sql(""" + SELECT ST_Within( + ST_GeogFromWKT('POINT (0 0.5)', 4326), + ST_GeogFromWKT('POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))', 4326) + ) AS r + """) + .first() + assertTrue(row.getBoolean(0)) + } + it("ST_Within null handling") { val r1 = sparkSession .sql("SELECT ST_Within(ST_GeogFromWKT('POINT (0 0)', 4326), null) AS r")