diff --git a/common/src/main/java/org/apache/sedona/common/geography/Functions.java b/common/src/main/java/org/apache/sedona/common/geography/Functions.java index 0d767736f46..fa8160d61eb 100644 --- a/common/src/main/java/org/apache/sedona/common/geography/Functions.java +++ b/common/src/main/java/org/apache/sedona/common/geography/Functions.java @@ -312,6 +312,24 @@ public static boolean equals(Geography g1, Geography g2) { return pred.S2_equals(toShapeIndex(g1), toShapeIndex(g2), s2Options()); } + /** + * Spherical "distance within" test. Returns true iff the minimum geodesic distance between g1 and + * g2 (in meters) is less than or equal to {@code distanceMeters}. + */ + public static boolean dWithin(Geography g1, Geography g2, double distanceMeters) { + if (g1 == null || g2 == null) return false; + Double d = distance(g1, g2); + return d != null && d <= distanceMeters; + } + + /** + * Spherical "within" test. Returns true iff g1 is fully inside g2 on the sphere. OGC convention: + * {@code ST_Within(A, B) == ST_Contains(B, A)}. + */ + public static boolean within(Geography g1, Geography g2) { + return contains(g2, g1); + } + /** Return EWKT for geography object */ public static String asEWKT(Geography geography) { return geography.toEWKT(); diff --git a/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java b/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java index 6f01bb35399..47daccb721e 100644 --- a/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java +++ b/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java @@ -496,6 +496,129 @@ public void contains_nullHandling() throws ParseException { assertFalse(Functions.contains(null, g1)); } + // ─── Level 3: ST_DWithin ───────────────────────────────────────────────── + + @Test + public void dWithin_twoPointsOneDegreeApart() throws ParseException { + Geography g1 = Constructors.geogFromWKT("POINT (0 0)", 4326); + Geography g2 = Constructors.geogFromWKT("POINT (0 1)", 4326); + // 1° of latitude ≈ 111_195 m on the sphere + assertFalse(Functions.dWithin(g1, g2, 100_000.0)); + assertTrue(Functions.dWithin(g1, g2, 200_000.0)); + } + + @Test + public void dWithin_pointInsidePolygon() throws ParseException { + Geography poly = Constructors.geogFromWKT("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", 4326); + Geography pt = Constructors.geogFromWKT("POINT (0.5 0.5)", 4326); + // Distance is zero when one contains the other; any positive threshold should pass. + assertTrue(Functions.dWithin(poly, pt, 1.0)); + } + + @Test + public void dWithin_boundaryInclusive() throws ParseException { + // distance == threshold ⇒ true (inclusive <=) + Geography g1 = Constructors.geogFromWKT("POINT (0 0)", 4326); + Geography g2 = Constructors.geogFromWKT("POINT (0 1)", 4326); + double actual = Functions.distance(g1, g2); + assertTrue(Functions.dWithin(g1, g2, actual)); + assertFalse(Functions.dWithin(g1, g2, actual - 1.0)); + } + + @Test + public void dWithin_antimeridianCrossing() throws ParseException { + // Two points straddling the antimeridian: great-circle distance ~22 km, + // planar distance ~40_000 km — succeeding at 50 km proves we use spherical distance. + Geography g1 = Constructors.geogFromWKT("POINT (179.9 0)", 4326); + Geography g2 = Constructors.geogFromWKT("POINT (-179.9 0)", 4326); + assertTrue(Functions.dWithin(g1, g2, 50_000.0)); + } + + @Test + public void dWithin_nullHandling() throws ParseException { + Geography g = Constructors.geogFromWKT("POINT (0 0)", 4326); + assertFalse(Functions.dWithin(g, null, 1e6)); + assertFalse(Functions.dWithin(null, g, 1e6)); + assertFalse(Functions.dWithin(null, null, 1e6)); + } + + @Test + public void dWithin_reflexiveZeroThreshold() throws ParseException { + // A point is trivially within distance 0 of itself (distance == 0, threshold == 0, <= is + // inclusive). + Geography g = Constructors.geogFromWKT("POINT (10 20)", 4326); + assertTrue(Functions.dWithin(g, g, 0.0)); + } + + @Test + public void dWithin_negativeDistance() throws ParseException { + // No two geographies can be at a negative geodesic distance, so any negative threshold => + // false. + Geography g1 = Constructors.geogFromWKT("POINT (0 0)", 4326); + Geography g2 = Constructors.geogFromWKT("POINT (0 0)", 4326); + assertFalse(Functions.dWithin(g1, g2, -1.0)); + } + + @Test + public void dWithin_nanDistance() throws ParseException { + // NaN threshold => all comparisons are false. + Geography g1 = Constructors.geogFromWKT("POINT (0 0)", 4326); + Geography g2 = Constructors.geogFromWKT("POINT (0 1)", 4326); + assertFalse(Functions.dWithin(g1, g2, Double.NaN)); + } + + // ─── Level 3: ST_Within ────────────────────────────────────────────────── + + @Test + public void within_pointInPolygon() throws ParseException { + Geography pt = Constructors.geogFromWKT("POINT (0.5 0.5)", 4326); + Geography poly = Constructors.geogFromWKT("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", 4326); + assertTrue(Functions.within(pt, poly)); + } + + @Test + public void within_pointOutsidePolygon() throws ParseException { + Geography pt = Constructors.geogFromWKT("POINT (2 2)", 4326); + Geography poly = Constructors.geogFromWKT("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", 4326); + assertFalse(Functions.within(pt, poly)); + } + + @Test + public void within_isContainsSwapped() throws ParseException { + // OGC parity: within(A, B) == contains(B, A) for every input pair. + Geography poly = Constructors.geogFromWKT("POLYGON ((0 0, 2 0, 2 2, 0 2, 0 0))", 4326); + Geography inside = Constructors.geogFromWKT("POINT (1 1)", 4326); + Geography outside = Constructors.geogFromWKT("POINT (3 3)", 4326); + assertEquals(Functions.contains(poly, inside), Functions.within(inside, poly)); + assertEquals(Functions.contains(poly, outside), Functions.within(outside, poly)); + } + + @Test + public void within_nullHandling() throws ParseException { + Geography g = Constructors.geogFromWKT("POINT (1 1)", 4326); + assertFalse(Functions.within(g, null)); + assertFalse(Functions.within(null, g)); + assertFalse(Functions.within(null, null)); + } + + @Test + public void within_polygonInPolygon() throws ParseException { + Geography inner = Constructors.geogFromWKT("POLYGON ((1 1, 2 1, 2 2, 1 2, 1 1))", 4326); + Geography outer = Constructors.geogFromWKT("POLYGON ((0 0, 3 0, 3 3, 0 3, 0 0))", 4326); + assertTrue(Functions.within(inner, outer)); + // Swapped: the outer polygon is NOT within the inner one. + assertFalse(Functions.within(outer, inner)); + } + + @Test + public void within_overlappingNotContained() throws ParseException { + // Two polygons that intersect but neither is contained in the other. + Geography a = Constructors.geogFromWKT("POLYGON ((0 0, 2 0, 2 2, 0 2, 0 0))", 4326); + Geography b = Constructors.geogFromWKT("POLYGON ((1 1, 3 1, 3 3, 1 3, 1 1))", 4326); + assertFalse(Functions.within(a, b)); + assertFalse(Functions.within(b, a)); + } + // ─── Level 4: ST_Buffer ────────────────────────────────────────────────── @Test diff --git a/docs/api/sql/geography/Geography-Functions.md b/docs/api/sql/geography/Geography-Functions.md index bf22fa40f98..907f55f7c9b 100644 --- a/docs/api/sql/geography/Geography-Functions.md +++ b/docs/api/sql/geography/Geography-Functions.md @@ -53,4 +53,6 @@ These functions operate on geography type objects. | [ST_Distance](Geography-Functions/ST_Distance.md) | Double | Return the minimum geodesic distance between two geographies in meters. | v1.9.0 | | [ST_Length](Geography-Functions/ST_Length.md) | Double | Return the spherical length of a geography in meters, summed along great-circle edges. | v1.9.1 | | [ST_Contains](Geography-Functions/ST_Contains.md) | Boolean | Test whether geography A fully contains geography B. | v1.9.0 | +| [ST_DWithin](Geography-Functions/ST_DWithin.md) | Boolean | Test whether two geographies are within a given geodesic distance (in meters) of each other. | v1.9.1 | +| [ST_Within](Geography-Functions/ST_Within.md) | Boolean | Test whether geography A is fully within geography B. | v1.9.1 | | [ST_Equals](Geography-Functions/ST_Equals.md) | Boolean | Test whether two geographies are spatially equal. | v1.9.1 | diff --git a/docs/api/sql/geography/Geography-Functions/ST_DWithin.md b/docs/api/sql/geography/Geography-Functions/ST_DWithin.md new file mode 100644 index 00000000000..d3c7559d97f --- /dev/null +++ b/docs/api/sql/geography/Geography-Functions/ST_DWithin.md @@ -0,0 +1,65 @@ + + +# ST_DWithin + +Introduction: Tests whether two geographies are within a given geodesic distance (in meters) of each other on the sphere. The minimum great-circle distance between any two points on the two geographies is compared against the threshold; the test is inclusive (returns true when the minimum distance equals the threshold). + +![ST_DWithin returning true](../../../../image/ST_DWithin_geography/ST_DWithin_geography_true.svg "ST_DWithin returning true") +![ST_DWithin returning false](../../../../image/ST_DWithin_geography/ST_DWithin_geography_false.svg "ST_DWithin returning false") + +Format: + +`ST_DWithin (A: Geography, B: Geography, distance: Double)` + +Return type: `Boolean` + +Since: `v1.9.1` + +SQL Example + +```sql +SELECT ST_DWithin( + ST_GeogFromWKT('POINT (0 0)', 4326), + ST_GeogFromWKT('POINT (0 1)', 4326), + 200000.0 +); +``` + +Output: + +``` +true +``` + +The same pair of points with a tighter threshold: + +```sql +SELECT ST_DWithin( + ST_GeogFromWKT('POINT (0 0)', 4326), + ST_GeogFromWKT('POINT (0 1)', 4326), + 100000.0 +); +``` + +Output: + +``` +false +``` diff --git a/docs/api/sql/geography/Geography-Functions/ST_Within.md b/docs/api/sql/geography/Geography-Functions/ST_Within.md new file mode 100644 index 00000000000..d9058eec114 --- /dev/null +++ b/docs/api/sql/geography/Geography-Functions/ST_Within.md @@ -0,0 +1,50 @@ + + +# ST_Within + +Introduction: Tests whether geography `A` is fully within geography `B` using S2 spherical boolean operations. Returns true when every point of `A`'s interior lies in `B`'s interior. By OGC convention, `ST_Within(A, B)` is equivalent to `ST_Contains(B, A)`, and shares the same boundary semantics. + +Boundary semantics on the sphere are inherited from S2's boolean operations and depend on each ring's vertex orientation: along an edge that is "owned" by `B`'s boundary the test returns true, and along the opposite edge it returns false. Do not rely on a specific result for points that lie exactly on `B`'s boundary; for predictable behavior, use a strict interior point or expand `B` slightly with `ST_Buffer` before testing. + +![ST_Within returning true](../../../../image/ST_Within_geography/ST_Within_geography_true.svg "ST_Within returning true") +![ST_Within returning false](../../../../image/ST_Within_geography/ST_Within_geography_false.svg "ST_Within returning false") + +Format: + +`ST_Within (A: Geography, B: Geography)` + +Return type: `Boolean` + +Since: `v1.9.1` + +SQL Example — interior point: + +```sql +SELECT ST_Within( + ST_GeogFromWKT('POINT (0.5 0.5)', 4326), + ST_GeogFromWKT('POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))', 4326) +); +``` + +Output: + +``` +true +``` diff --git a/docs/image/ST_DWithin_geography/ST_DWithin_geography_false.svg b/docs/image/ST_DWithin_geography/ST_DWithin_geography_false.svg new file mode 100644 index 00000000000..88a240943a1 --- /dev/null +++ b/docs/image/ST_DWithin_geography/ST_DWithin_geography_false.svg @@ -0,0 +1,42 @@ + + + + + + + + + + + ST_DWithin(A, B, d) + + FALSE + + + + + + + + + + + + + + + d + + + + B + + + + A + + + Geodesic distance between A and B > d (on sphere) + diff --git a/docs/image/ST_DWithin_geography/ST_DWithin_geography_true.svg b/docs/image/ST_DWithin_geography/ST_DWithin_geography_true.svg new file mode 100644 index 00000000000..b2bbeeea2c6 --- /dev/null +++ b/docs/image/ST_DWithin_geography/ST_DWithin_geography_true.svg @@ -0,0 +1,42 @@ + + + + + + + + + + + ST_DWithin(A, B, d) + + TRUE + + + + + + + + + + + + + + + d + + + + B + + + + A + + + Geodesic distance between A and B ≤ d (on sphere) + diff --git a/docs/image/ST_Within_geography/ST_Within_geography_false.svg b/docs/image/ST_Within_geography/ST_Within_geography_false.svg new file mode 100644 index 00000000000..45af08d2ad8 --- /dev/null +++ b/docs/image/ST_Within_geography/ST_Within_geography_false.svg @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + ST_Within(A, B) is FALSE + ST_Within(P, B) is FALSE + + + + + + + + + + + + + + + + + + + + + + + B + A + P + + + + Point P (Outside B) + + + Polygon A (Straddles Boundary) + + + Polygon B + diff --git a/docs/image/ST_Within_geography/ST_Within_geography_true.svg b/docs/image/ST_Within_geography/ST_Within_geography_true.svg new file mode 100644 index 00000000000..1af1e2805f9 --- /dev/null +++ b/docs/image/ST_Within_geography/ST_Within_geography_true.svg @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + ST_Within(A, B) is TRUE + ST_Within(P, B) is TRUE + + + + + + + + + + + + + + + + + + + + + + + B + A + P + + + + Point P (Interior of B) + + + Polygon A (Fully Inside B) + + + Polygon B + diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Predicates.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Predicates.scala index de94ac5c23e..b7ca9f51f6e 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Predicates.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Predicates.scala @@ -109,17 +109,15 @@ private[apache] case class ST_Intersects(inputExpressions: Seq[Expression]) } /** - * Test if leftGeometry is full within rightGeometry + * Test if leftGeometry is fully within rightGeometry. Supports both Geometry (JTS) and Geography + * (S2) inputs via InferredExpression dual dispatch. * * @param inputExpressions */ private[apache] case class ST_Within(inputExpressions: Seq[Expression]) - extends ST_Predicate - with CodegenFallback { - - override def evalGeom(leftGeometry: Geometry, rightGeometry: Geometry): Boolean = { - Predicates.within(leftGeometry, rightGeometry) - } + extends InferredExpression( + inferrableFunction2(Predicates.within), + inferrableFunction2(org.apache.sedona.common.geography.Functions.within)) { protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = { copy(inputExpressions = newChildren) @@ -289,7 +287,8 @@ private[apache] case class ST_OrderingEquals(inputExpressions: Seq[Expression]) private[apache] case class ST_DWithin(inputExpressions: Seq[Expression]) extends InferredExpression( inferrableFunction3(Predicates.dWithin), - inferrableFunction4(Predicates.dWithin)) { + inferrableFunction4(Predicates.dWithin), + inferrableFunction3(org.apache.sedona.common.geography.Functions.dWithin)) { protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = { copy(inputExpressions = newChildren) diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/JoinQueryDetector.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/JoinQueryDetector.scala index 8acae32e2b1..14a93da7266 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/JoinQueryDetector.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/JoinQueryDetector.scala @@ -61,13 +61,37 @@ class JoinQueryDetector(sparkSession: SparkSession) extends SparkStrategy { // * ST_Contains — broadcast joins route GeographyUDT inputs through a dedicated index/refine // path (see SpatialIndexExec.geographyShape / BroadcastIndexJoinExec.geographyShape). The // partition/range path still falls back to row-by-row evaluation. - // * ST_Equals — no broadcast index path yet (the Geography refiner is ST_Contains-specific), - // so we gate Geography inputs at the matcher and let Spark evaluate the predicate - // row-by-row. + // * ST_Within / ST_Equals — no broadcast index path yet (the Geography refiner is + // ST_Contains-specific), so we gate Geography inputs at the matcher (via + // `inferredJoinDetection`) and let Spark evaluate the predicate row-by-row. // Other ST_Predicates reject Geography inputs at analysis time, so no guard is needed there. private def isGeographyInput(shape: Expression): Boolean = shape.dataType.isInstanceOf[GeographyUDT] + /** + * Build a JoinQueryDetection for an InferredExpression predicate (ST_Contains, ST_Within, ...) + * unless either operand is GeographyUDT, in which case the join is skipped and the predicate + * falls back to row-by-row evaluation. + */ + private def inferredJoinDetection( + left: LogicalPlan, + right: LogicalPlan, + leftShape: Expression, + rightShape: Expression, + spatialPredicate: SpatialPredicate, + extraCondition: Option[Expression]): Option[JoinQueryDetection] = + if (isGeographyInput(leftShape) || isGeographyInput(rightShape)) None + else + Some( + JoinQueryDetection( + left, + right, + leftShape, + rightShape, + spatialPredicate, + isGeography = false, + extraCondition)) + private def getJoinDetection( left: LogicalPlan, right: LogicalPlan, @@ -84,16 +108,6 @@ class JoinQueryDetector(sparkSession: SparkSession) extends SparkStrategy { SpatialPredicate.INTERSECTS, false, extraCondition)) - case ST_Within(Seq(leftShape, rightShape)) => - Some( - JoinQueryDetection( - left, - right, - leftShape, - rightShape, - SpatialPredicate.WITHIN, - false, - extraCondition)) case ST_Covers(Seq(leftShape, rightShape)) => Some( JoinQueryDetection( @@ -203,9 +217,9 @@ class JoinQueryDetector(sparkSession: SparkSession) extends SparkStrategy { val queryDetection: Option[JoinQueryDetection] = condition.flatMap { case joinConditionMatcher(predicate, extraCondition) => predicate match { - // ST_Contains / ST_Equals are InferredExpression (not ST_Predicate) so they can't - // sit inside getJoinDetection; they're also the only predicates currently accepting - // Geography inputs. + // ST_Contains / ST_Equals / ST_Within are InferredExpression (not ST_Predicate) so + // they can't sit inside getJoinDetection; they're also the only predicates currently + // accepting Geography inputs. // // ST_Contains: when either operand is GeographyUDT we still detect the join here and // set `geographyShape = true`; planBroadcastJoin will route the work to the @@ -224,20 +238,25 @@ class JoinQueryDetector(sparkSession: SparkSession) extends SparkStrategy { isGeography = false, extraCondition, geographyShape = geographyShape)) - // ST_Equals on Geography has no broadcast index path yet (the Geography refiner is - // ST_Contains-specific), so gate Geography inputs and let them fall back to - // row-by-row evaluation. - case ST_Equals(Seq(leftShape, rightShape)) - if !isGeographyInput(leftShape) && !isGeographyInput(rightShape) => - Some( - JoinQueryDetection( - left, - right, - leftShape, - rightShape, - SpatialPredicate.EQUALS, - isGeography = false, - extraCondition)) + // ST_Within / ST_Equals on Geography have no broadcast index path yet (the Geography + // refiner is ST_Contains-specific), so gate Geography inputs and let them fall back + // to row-by-row evaluation. + case ST_Within(Seq(leftShape, rightShape)) => + inferredJoinDetection( + left, + right, + leftShape, + rightShape, + SpatialPredicate.WITHIN, + extraCondition) + case ST_Equals(Seq(leftShape, rightShape)) => + inferredJoinDetection( + left, + right, + leftShape, + rightShape, + SpatialPredicate.EQUALS, + extraCondition) case pred: ST_Predicate => getJoinDetection(left, right, pred, extraCondition) case pred: RS_Predicate => diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala b/spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala index 7629e271f64..ac3277eb5c1 100644 --- a/spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala +++ b/spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala @@ -230,6 +230,18 @@ class GeographyFunctionTest extends TestBaseScala { assertTrue(!row.getBoolean(0)) } + it("ST_DWithin true when within threshold") { + val row = sparkSession + .sql(""" + SELECT ST_DWithin( + ST_GeogFromWKT('POINT (0 0)', 4326), + ST_GeogFromWKT('POINT (0 1)', 4326), + 200000.0) AS r + """) + .first() + assertTrue(row.getBoolean(0)) + } + it("ST_Equals same point") { val row = sparkSession .sql(""" @@ -242,6 +254,18 @@ class GeographyFunctionTest extends TestBaseScala { assertTrue(row.getBoolean(0)) } + it("ST_DWithin false when outside threshold") { + val row = sparkSession + .sql(""" + SELECT ST_DWithin( + ST_GeogFromWKT('POINT (0 0)', 4326), + ST_GeogFromWKT('POINT (0 1)', 4326), + 100000.0) AS r + """) + .first() + assertTrue(!row.getBoolean(0)) + } + it("ST_Equals different points") { val row = sparkSession .sql(""" @@ -254,6 +278,67 @@ class GeographyFunctionTest extends TestBaseScala { assertTrue(!row.getBoolean(0)) } + it("ST_DWithin null handling") { + // null as second arg + val r1 = sparkSession + .sql("SELECT ST_DWithin(ST_GeogFromWKT('POINT (0 0)', 4326), null, 1.0) AS r") + .first() + assertTrue(r1.isNullAt(0)) + // null as first arg + val r2 = sparkSession + .sql("SELECT ST_DWithin(null, ST_GeogFromWKT('POINT (0 0)', 4326), 1.0) AS r") + .first() + assertTrue(r2.isNullAt(0)) + // null distance + val r3 = sparkSession + .sql(""" + SELECT ST_DWithin( + ST_GeogFromWKT('POINT (0 0)', 4326), + ST_GeogFromWKT('POINT (0 1)', 4326), + CAST(null AS DOUBLE)) AS r + """) + .first() + assertTrue(r3.isNullAt(0)) + } + + it("ST_DWithin accepts INT distance literal") { + // Catalyst should coerce INT -> DOUBLE for the 3-arg Geography overload. + val row = sparkSession + .sql(""" + SELECT ST_DWithin( + ST_GeogFromWKT('POINT (0 0)', 4326), + ST_GeogFromWKT('POINT (0 1)', 4326), + 200000) AS r + """) + .first() + assertTrue(row.getBoolean(0)) + } + + it("ST_DWithin accepts FLOAT distance literal") { + // CAST to FLOAT forces a narrower type than DOUBLE; Catalyst should widen it. + val row = sparkSession + .sql(""" + SELECT ST_DWithin( + ST_GeogFromWKT('POINT (0 0)', 4326), + ST_GeogFromWKT('POINT (0 1)', 4326), + CAST(200000.5 AS FLOAT)) AS r + """) + .first() + assertTrue(row.getBoolean(0)) + } + + it("ST_Within point in polygon") { + val row = sparkSession + .sql(""" + SELECT ST_Within( + ST_GeogFromWKT('POINT (0.5 0.5)', 4326), + ST_GeogFromWKT('POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))', 4326) + ) AS r + """) + .first() + assertTrue(row.getBoolean(0)) + } + it("ST_Equals same polygon") { val row = sparkSession .sql(""" @@ -266,6 +351,18 @@ class GeographyFunctionTest extends TestBaseScala { assertTrue(row.getBoolean(0)) } + it("ST_Within point outside polygon") { + val row = sparkSession + .sql(""" + SELECT ST_Within( + ST_GeogFromWKT('POINT (2 2)', 4326), + ST_GeogFromWKT('POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))', 4326) + ) AS r + """) + .first() + assertTrue(!row.getBoolean(0)) + } + it("ST_Equals different polygons") { val row = sparkSession .sql(""" @@ -277,6 +374,33 @@ class GeographyFunctionTest extends TestBaseScala { .first() assertTrue(!row.getBoolean(0)) } + + it("ST_Within point on polygon boundary returns a Boolean (semantics implementation-defined)") { + // S2 boolean ownership of an edge depends on vertex orientation, so the result for points + // exactly on the boundary is intentionally not asserted. We only verify the call completes + // and returns a non-null Boolean — the docs steer users toward ST_Buffer for predictable + // boundary handling. + val row = sparkSession + .sql(""" + SELECT ST_Within( + ST_GeogFromWKT('POINT (0 0.5)', 4326), + ST_GeogFromWKT('POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))', 4326) + ) AS r + """) + .first() + assertTrue(!row.isNullAt(0)) + } + + it("ST_Within null handling") { + val r1 = sparkSession + .sql("SELECT ST_Within(ST_GeogFromWKT('POINT (0 0)', 4326), null) AS r") + .first() + assertTrue(r1.isNullAt(0)) + val r2 = sparkSession + .sql("SELECT ST_Within(null, ST_GeogFromWKT('POINT (0 0)', 4326)) AS r") + .first() + assertTrue(r2.isNullAt(0)) + } } // ─── Level 4: ST_Buffer ──────────────────────────────────────────────── @@ -397,6 +521,26 @@ class GeographyFunctionTest extends TestBaseScala { assertTrue(df.first().getBoolean(0)) } + it("ST_DWithin via DataFrame API") { + val df = sparkSession + .sql("SELECT 'POINT (0 0)' AS a, 'POINT (0 1)' AS b") + .select( + st_constructors.ST_GeogFromWKT(col("a"), lit(4326)).as("a"), + st_constructors.ST_GeogFromWKT(col("b"), lit(4326)).as("b")) + .select(st_predicates.ST_DWithin(col("a"), col("b"), lit(200000.0)).as("r")) + assertTrue(df.first().getBoolean(0)) + } + + it("ST_Within via DataFrame API") { + val df = sparkSession + .sql("SELECT 'POINT (0.5 0.5)' AS pt, 'POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))' AS poly") + .select( + st_constructors.ST_GeogFromWKT(col("pt"), lit(4326)).as("pt"), + st_constructors.ST_GeogFromWKT(col("poly"), lit(4326)).as("poly")) + .select(st_predicates.ST_Within(col("pt"), col("poly")).as("r")) + assertTrue(df.first().getBoolean(0)) + } + it("ST_Equals via DataFrame API") { val df = sparkSession .sql("SELECT 'POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))' AS a, 'POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))' AS b")