From 75b133e0a9f81081114d1004a28bf8399ba768f8 Mon Sep 17 00:00:00 2001 From: zhangfengcdt Date: Wed, 22 Apr 2026 10:04:46 -0700 Subject: [PATCH 1/4] [GH-2830] Adds Geography dual-dispatch to ST_Intersects --- .../sedona/common/geography/Functions.java | 7 +++ .../sedona/common/Geography/FunctionTest.java | 29 ++++++++++++ docs/api/sql/geography/Geography-Functions.md | 1 + .../Geography-Functions/ST_Intersects.md | 45 +++++++++++++++++++ .../sedona_sql/expressions/Predicates.scala | 9 ++-- .../strategy/join/JoinQueryDetector.scala | 39 ++++++++-------- .../sql/geography/GeographyFunctionTest.scala | 24 ++++++++++ 7 files changed, 129 insertions(+), 25 deletions(-) create mode 100644 docs/api/sql/geography/Geography-Functions/ST_Intersects.md diff --git a/common/src/main/java/org/apache/sedona/common/geography/Functions.java b/common/src/main/java/org/apache/sedona/common/geography/Functions.java index e3166fa9e0f..378ada96e09 100644 --- a/common/src/main/java/org/apache/sedona/common/geography/Functions.java +++ b/common/src/main/java/org/apache/sedona/common/geography/Functions.java @@ -130,6 +130,13 @@ public static boolean contains(Geography g1, Geography g2) { return pred.S2_contains(toShapeIndex(g1), toShapeIndex(g2), s2Options()); } + /** Spherical intersection test using S2 boolean operations. */ + public static boolean intersects(Geography g1, Geography g2) { + if (g1 == null || g2 == null) return false; + Predicates pred = new Predicates(); + return pred.S2_intersects(toShapeIndex(g1), toShapeIndex(g2), s2Options()); + } + /** Return EWKT for geography object */ public static String asEWKT(Geography geography) { return geography.toEWKT(); diff --git a/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java b/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java index 6787c5ad876..dbaaccfb047 100644 --- a/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java +++ b/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java @@ -181,6 +181,35 @@ public void contains_pointOutsidePolygon() throws ParseException { assertFalse(Functions.contains(g1, g2)); } + @Test + public void intersects_overlappingPolygons() throws ParseException { + Geography g1 = Constructors.geogFromWKT("POLYGON ((0 0, 2 0, 2 2, 0 2, 0 0))", 4326); + Geography g2 = Constructors.geogFromWKT("POLYGON ((1 1, 3 1, 3 3, 1 3, 1 1))", 4326); + assertTrue(Functions.intersects(g1, g2)); + } + + @Test + public void intersects_disjointPolygons() throws ParseException { + Geography g1 = Constructors.geogFromWKT("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", 4326); + Geography g2 = Constructors.geogFromWKT("POLYGON ((10 10, 11 10, 11 11, 10 11, 10 10))", 4326); + assertFalse(Functions.intersects(g1, g2)); + } + + @Test + public void intersects_pointInPolygon() throws ParseException { + Geography g1 = Constructors.geogFromWKT("POLYGON ((0 0, 2 0, 2 2, 0 2, 0 0))", 4326); + Geography g2 = Constructors.geogFromWKT("POINT (1 1)", 4326); + assertTrue(Functions.intersects(g1, g2)); + } + + @Test + public void intersects_nullHandling() throws ParseException { + Geography g = Constructors.geogFromWKT("POINT (1 1)", 4326); + assertFalse(Functions.intersects(g, null)); + assertFalse(Functions.intersects(null, g)); + assertFalse(Functions.intersects(null, null)); + } + @Test public void contains_nullHandling() throws ParseException { Geography g1 = Constructors.geogFromWKT("POINT (1 1)", 4326); diff --git a/docs/api/sql/geography/Geography-Functions.md b/docs/api/sql/geography/Geography-Functions.md index 7648b0c0a1e..1d9db213e17 100644 --- a/docs/api/sql/geography/Geography-Functions.md +++ b/docs/api/sql/geography/Geography-Functions.md @@ -46,3 +46,4 @@ These functions operate on geography type objects. | [ST_NPoints](Geography-Functions/ST_NPoints.md) | Integer | Return the number of points (vertices) in a geography. | v1.9.0 | | [ST_Distance](Geography-Functions/ST_Distance.md) | Double | Return the minimum geodesic distance between two geographies in meters. | v1.9.0 | | [ST_Contains](Geography-Functions/ST_Contains.md) | Boolean | Test whether geography A fully contains geography B. | v1.9.0 | +| [ST_Intersects](Geography-Functions/ST_Intersects.md) | Boolean | Test whether two geographies intersect. | v1.9.1 | diff --git a/docs/api/sql/geography/Geography-Functions/ST_Intersects.md b/docs/api/sql/geography/Geography-Functions/ST_Intersects.md new file mode 100644 index 00000000000..2512b3971f8 --- /dev/null +++ b/docs/api/sql/geography/Geography-Functions/ST_Intersects.md @@ -0,0 +1,45 @@ + + +# ST_Intersects + +Introduction: Tests whether two geography objects intersect using S2 spherical boolean operations. Returns true if A and B share any portion of space. + +Format: + +`ST_Intersects (A: Geography, B: Geography)` + +Return type: `Boolean` + +Since: `v1.9.1` + +SQL Example + +```sql +SELECT ST_Intersects( + ST_GeogFromWKT('POLYGON ((0 0, 2 0, 2 2, 0 2, 0 0))'), + ST_GeogFromWKT('POLYGON ((1 1, 3 1, 3 3, 1 3, 1 1))') +); +``` + +Output: + +``` +true +``` diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Predicates.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Predicates.scala index 6affd348b32..d34b66bba78 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Predicates.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Predicates.scala @@ -96,12 +96,9 @@ private[apache] case class ST_Contains(inputExpressions: Seq[Expression]) * @param inputExpressions */ private[apache] case class ST_Intersects(inputExpressions: Seq[Expression]) - extends ST_Predicate - with CodegenFallback { - - override def evalGeom(leftGeometry: Geometry, rightGeometry: Geometry): Boolean = { - Predicates.intersects(leftGeometry, rightGeometry) - } + extends InferredExpression( + inferrableFunction2(Predicates.intersects), + inferrableFunction2(org.apache.sedona.common.geography.Functions.intersects)) { protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = { copy(inputExpressions = newChildren) diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/JoinQueryDetector.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/JoinQueryDetector.scala index 233ed8a806f..43548e92c11 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/JoinQueryDetector.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/strategy/join/JoinQueryDetector.scala @@ -54,12 +54,12 @@ case class JoinQueryDetection( */ class JoinQueryDetector(sparkSession: SparkSession) extends SparkStrategy { - // Geography spatial joins are not supported in this PR — TraitJoinQueryBase.toSpatialRDD - // deserializes join keys with GeometrySerializer, which would fail on Geography bytes. - // ST_Contains is the only spatial predicate currently wired for Geography (via InferredExpression - // dual dispatch); when either side is GeographyUDT we skip join planning and let Spark evaluate - // the predicate row-by-row. Other ST_Predicates reject Geography inputs at analysis time, so no - // guard is needed there. + // Geography spatial joins are not supported — TraitJoinQueryBase.toSpatialRDD deserializes + // join keys with GeometrySerializer, which would fail on Geography bytes. Spatial predicates + // wired for Geography via InferredExpression dual dispatch (ST_Contains, ST_Intersects, ...) + // are handled in the top-level predicate match with an isGeographyInput guard; when either + // side is GeographyUDT we skip join planning and let Spark evaluate the predicate row-by-row. + // Other ST_Predicates reject Geography inputs at analysis time, so no guard is needed there. private def isGeographyInput(shape: Expression): Boolean = shape.dataType.isInstanceOf[GeographyUDT] @@ -69,16 +69,6 @@ class JoinQueryDetector(sparkSession: SparkSession) extends SparkStrategy { predicate: ST_Predicate, extraCondition: Option[Expression] = None): Option[JoinQueryDetection] = { predicate match { - case ST_Intersects(Seq(leftShape, rightShape)) => - Some( - JoinQueryDetection( - left, - right, - leftShape, - rightShape, - SpatialPredicate.INTERSECTS, - false, - extraCondition)) case ST_Within(Seq(leftShape, rightShape)) => Some( JoinQueryDetection( @@ -208,9 +198,9 @@ class JoinQueryDetector(sparkSession: SparkSession) extends SparkStrategy { val queryDetection: Option[JoinQueryDetection] = condition.flatMap { case joinConditionMatcher(predicate, extraCondition) => predicate match { - // ST_Contains is an InferredExpression (not ST_Predicate) so it can't sit inside - // getJoinDetection; it's also the only predicate currently accepting Geography - // inputs and therefore the only one needing the Geography guard. + // ST_Contains / ST_Intersects are InferredExpression (not ST_Predicate) so they can't + // sit inside getJoinDetection; they're also the only predicates currently accepting + // Geography inputs and therefore the only ones needing the Geography guard. case ST_Contains(Seq(leftShape, rightShape)) if !isGeographyInput(leftShape) && !isGeographyInput(rightShape) => Some( @@ -222,6 +212,17 @@ class JoinQueryDetector(sparkSession: SparkSession) extends SparkStrategy { SpatialPredicate.CONTAINS, false, extraCondition)) + case ST_Intersects(Seq(leftShape, rightShape)) + if !isGeographyInput(leftShape) && !isGeographyInput(rightShape) => + Some( + JoinQueryDetection( + left, + right, + leftShape, + rightShape, + SpatialPredicate.INTERSECTS, + false, + extraCondition)) case pred: ST_Predicate => getJoinDetection(left, right, pred, extraCondition) case pred: RS_Predicate => diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala b/spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala index 7fe76730d27..ea8f7a034d4 100644 --- a/spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala +++ b/spark/common/src/test/scala/org/apache/sedona/sql/geography/GeographyFunctionTest.scala @@ -141,6 +141,30 @@ class GeographyFunctionTest extends TestBaseScala { .first() assertTrue(!row.getBoolean(0)) } + + it("ST_Intersects overlapping polygons") { + val row = sparkSession + .sql(""" + SELECT ST_Intersects( + ST_GeogFromWKT('POLYGON ((0 0, 2 0, 2 2, 0 2, 0 0))', 4326), + ST_GeogFromWKT('POLYGON ((1 1, 3 1, 3 3, 1 3, 1 1))', 4326) + ) AS result + """) + .first() + assertTrue(row.getBoolean(0)) + } + + it("ST_Intersects disjoint polygons") { + val row = sparkSession + .sql(""" + SELECT ST_Intersects( + ST_GeogFromWKT('POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))', 4326), + ST_GeogFromWKT('POLYGON ((10 10, 11 10, 11 11, 10 11, 10 10))', 4326) + ) AS result + """) + .first() + assertTrue(!row.getBoolean(0)) + } } // ─── DataFrame API ───────────────────────────────────────────────────── From e3144723344325290e199f7db4e01316fad7401f Mon Sep 17 00:00:00 2001 From: zhangfengcdt Date: Wed, 22 Apr 2026 10:27:18 -0700 Subject: [PATCH 2/4] add fast-path for st_intersects optimization --- .../sedona/common/S2Geography/Predicates.java | 16 ++++++++++ .../sedona/common/geography/Functions.java | 22 ++++++++++++- .../sedona/common/Geography/FunctionTest.java | 32 +++++++++++++++++++ 3 files changed, 69 insertions(+), 1 deletion(-) diff --git a/common/src/main/java/org/apache/sedona/common/S2Geography/Predicates.java b/common/src/main/java/org/apache/sedona/common/S2Geography/Predicates.java index e1df0da0318..7751a74534c 100644 --- a/common/src/main/java/org/apache/sedona/common/S2Geography/Predicates.java +++ b/common/src/main/java/org/apache/sedona/common/S2Geography/Predicates.java @@ -29,6 +29,22 @@ public boolean S2_intersects( return S2BooleanOperation.intersects(geo1.shapeIndex, geo2.shapeIndex, options); } + /** + * Fast intersects between a single point and a ShapeIndex. Avoids building a ShapeIndex for the + * point side — only the complex geometry needs an index. Uses S2ClosestEdgeQuery with + * includeInteriors=true (default) so a point in a polygon interior returns distance 0. + */ + public static boolean S2_intersectsPointWithIndex(S2Point point, ShapeIndexGeography geo) { + S2ClosestEdgeQuery query = S2ClosestEdgeQuery.builder().build(geo.shapeIndex); + S2ClosestEdgeQuery.PointTarget target = + new S2ClosestEdgeQuery.PointTarget<>(point); + Optional result = query.findClosestEdge(target); + if (!result.isPresent()) { + return false; + } + return ((S1ChordAngle) result.get().distance()).getLength2() == 0.0; + } + public boolean S2_equals( ShapeIndexGeography geo1, ShapeIndexGeography geo2, S2BooleanOperation.Options options) { return S2BooleanOperation.equals(geo1.shapeIndex, geo2.shapeIndex, options); diff --git a/common/src/main/java/org/apache/sedona/common/geography/Functions.java b/common/src/main/java/org/apache/sedona/common/geography/Functions.java index 378ada96e09..7ff0d1a23c7 100644 --- a/common/src/main/java/org/apache/sedona/common/geography/Functions.java +++ b/common/src/main/java/org/apache/sedona/common/geography/Functions.java @@ -130,9 +130,29 @@ public static boolean contains(Geography g1, Geography g2) { return pred.S2_contains(toShapeIndex(g1), toShapeIndex(g2), s2Options()); } - /** Spherical intersection test using S2 boolean operations. */ + /** + * Spherical intersection test using S2 boolean operations. Takes fast paths for point-to-point + * and point-to-complex inputs backed by WKBGeography, avoiding ShapeIndex construction on the + * point side. + */ public static boolean intersects(Geography g1, Geography g2) { if (g1 == null || g2 == null) return false; + if (g1 instanceof WKBGeography && g2 instanceof WKBGeography) { + WKBGeography w1 = (WKBGeography) g1; + WKBGeography w2 = (WKBGeography) g2; + // Fast path: point-to-point intersects iff the points are equal + if (w1.isPoint() && w2.isPoint()) { + return w1.extractPoint().equalsPoint(w2.extractPoint()); + } + // Fast path: point-to-complex uses PointTarget (avoids building ShapeIndex for point side) + if (w1.isPoint()) { + return Predicates.S2_intersectsPointWithIndex(w1.extractPoint(), toShapeIndex(w2)); + } + if (w2.isPoint()) { + return Predicates.S2_intersectsPointWithIndex(w2.extractPoint(), toShapeIndex(w1)); + } + } + // General path via ShapeIndex Predicates pred = new Predicates(); return pred.S2_intersects(toShapeIndex(g1), toShapeIndex(g2), s2Options()); } diff --git a/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java b/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java index dbaaccfb047..529a23b65f4 100644 --- a/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java +++ b/common/src/test/java/org/apache/sedona/common/Geography/FunctionTest.java @@ -202,6 +202,38 @@ public void intersects_pointInPolygon() throws ParseException { assertTrue(Functions.intersects(g1, g2)); } + @Test + public void intersects_pointToPoint_samePoint() throws ParseException { + // Exercises the point-to-point fast path (no ShapeIndex built on either side) + Geography g1 = Constructors.geogFromWKT("POINT (1 2)", 4326); + Geography g2 = Constructors.geogFromWKT("POINT (1 2)", 4326); + assertTrue(Functions.intersects(g1, g2)); + } + + @Test + public void intersects_pointToPoint_differentPoints() throws ParseException { + Geography g1 = Constructors.geogFromWKT("POINT (1 2)", 4326); + Geography g2 = Constructors.geogFromWKT("POINT (3 4)", 4326); + assertFalse(Functions.intersects(g1, g2)); + } + + @Test + public void intersects_pointOnLinestring() throws ParseException { + // Exercises the point-to-complex fast path + Geography line = Constructors.geogFromWKT("LINESTRING (0 0, 2 0)", 4326); + Geography pt = Constructors.geogFromWKT("POINT (1 0)", 4326); + assertTrue(Functions.intersects(line, pt)); + assertTrue(Functions.intersects(pt, line)); + } + + @Test + public void intersects_pointOffLinestring() throws ParseException { + Geography line = Constructors.geogFromWKT("LINESTRING (0 0, 2 0)", 4326); + Geography pt = Constructors.geogFromWKT("POINT (5 5)", 4326); + assertFalse(Functions.intersects(line, pt)); + assertFalse(Functions.intersects(pt, line)); + } + @Test public void intersects_nullHandling() throws ParseException { Geography g = Constructors.geogFromWKT("POINT (1 1)", 4326); From f7f848becb30f657ca9c4fe73e2c268630c41105 Mon Sep 17 00:00:00 2001 From: zhangfengcdt Date: Wed, 22 Apr 2026 12:03:43 -0700 Subject: [PATCH 3/4] address copilot comments --- docs/api/sql/geography/Geography-Functions.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/api/sql/geography/Geography-Functions.md b/docs/api/sql/geography/Geography-Functions.md index 1d9db213e17..74c68fd79a9 100644 --- a/docs/api/sql/geography/Geography-Functions.md +++ b/docs/api/sql/geography/Geography-Functions.md @@ -43,7 +43,7 @@ These functions operate on geography type objects. | :--- | :--- | :--- | :--- | | [ST_AsEWKT](Geography-Functions/ST_AsEWKT.md) | String | Return the Extended Well-Known Text representation of a geography. | v1.8.0 | | [ST_Envelope](Geography-Functions/ST_Envelope.md) | Geography | Return the bounding box (envelope) of a geography. Supports anti-meridian splitting. | v1.8.0 | -| [ST_NPoints](Geography-Functions/ST_NPoints.md) | Integer | Return the number of points (vertices) in a geography. | v1.9.0 | -| [ST_Distance](Geography-Functions/ST_Distance.md) | Double | Return the minimum geodesic distance between two geographies in meters. | v1.9.0 | -| [ST_Contains](Geography-Functions/ST_Contains.md) | Boolean | Test whether geography A fully contains geography B. | v1.9.0 | +| [ST_NPoints](Geography-Functions/ST_NPoints.md) | Integer | Return the number of points (vertices) in a geography. | v1.9.1 | +| [ST_Distance](Geography-Functions/ST_Distance.md) | Double | Return the minimum geodesic distance between two geographies in meters. | v1.9.1 | +| [ST_Contains](Geography-Functions/ST_Contains.md) | Boolean | Test whether geography A fully contains geography B. | v1.9.1 | | [ST_Intersects](Geography-Functions/ST_Intersects.md) | Boolean | Test whether two geographies intersect. | v1.9.1 | From 6419bd97779a543244427a505870e557951f1f6b Mon Sep 17 00:00:00 2001 From: zhangfengcdt Date: Wed, 29 Apr 2026 15:13:15 -0700 Subject: [PATCH 4/4] add svg diagram --- .../Geography-Functions/ST_Intersects.md | 28 ++++++-- .../ST_Intersects_geography_false.svg | 54 +++++++++++++++ .../ST_Intersects_geography_true.svg | 66 +++++++++++++++++++ 3 files changed, 144 insertions(+), 4 deletions(-) create mode 100644 docs/image/ST_Intersects_geography/ST_Intersects_geography_false.svg create mode 100644 docs/image/ST_Intersects_geography/ST_Intersects_geography_true.svg diff --git a/docs/api/sql/geography/Geography-Functions/ST_Intersects.md b/docs/api/sql/geography/Geography-Functions/ST_Intersects.md index 2512b3971f8..6fb1cb7f573 100644 --- a/docs/api/sql/geography/Geography-Functions/ST_Intersects.md +++ b/docs/api/sql/geography/Geography-Functions/ST_Intersects.md @@ -19,7 +19,12 @@ # ST_Intersects -Introduction: Tests whether two geography objects intersect using S2 spherical boolean operations. Returns true if A and B share any portion of space. +Introduction: Tests whether two geography objects intersect on the sphere using S2 spherical boolean operations. Returns `true` if `A` and `B` share any portion of space (including a single boundary point), and `false` if they are fully disjoint. + +Edges are interpreted as great-circle arcs, so the test is correct even when geographies cross the antimeridian or wrap around the poles — situations where a planar `ST_Intersects` would be wrong. + +![ST_Intersects returning true](../../../../image/ST_Intersects_geography/ST_Intersects_geography_true.svg "ST_Intersects returning true") +![ST_Intersects returning false](../../../../image/ST_Intersects_geography/ST_Intersects_geography_false.svg "ST_Intersects returning false") Format: @@ -29,12 +34,12 @@ Return type: `Boolean` Since: `v1.9.1` -SQL Example +SQL Example — overlapping polygons: ```sql SELECT ST_Intersects( - ST_GeogFromWKT('POLYGON ((0 0, 2 0, 2 2, 0 2, 0 0))'), - ST_GeogFromWKT('POLYGON ((1 1, 3 1, 3 3, 1 3, 1 1))') + ST_GeogFromWKT('POLYGON ((0 0, 2 0, 2 2, 0 2, 0 0))', 4326), + ST_GeogFromWKT('POLYGON ((1 1, 3 1, 3 3, 1 3, 1 1))', 4326) ); ``` @@ -43,3 +48,18 @@ Output: ``` true ``` + +SQL Example — disjoint polygons: + +```sql +SELECT ST_Intersects( + ST_GeogFromWKT('POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))', 4326), + ST_GeogFromWKT('POLYGON ((10 10, 11 10, 11 11, 10 11, 10 10))', 4326) +); +``` + +Output: + +``` +false +``` diff --git a/docs/image/ST_Intersects_geography/ST_Intersects_geography_false.svg b/docs/image/ST_Intersects_geography/ST_Intersects_geography_false.svg new file mode 100644 index 00000000000..9488ffd8ec6 --- /dev/null +++ b/docs/image/ST_Intersects_geography/ST_Intersects_geography_false.svg @@ -0,0 +1,54 @@ + + + + + + + + + + + + + + + ST_Intersects(A, B) is FALSE + ST_Intersects(A, P) is FALSE + + + + + + + + + + + + + + + + + + + + + + + A + B + P + + + + Polygon A + + + Polygon B (Disjoint from A) + + + Point P (Outside A) + diff --git a/docs/image/ST_Intersects_geography/ST_Intersects_geography_true.svg b/docs/image/ST_Intersects_geography/ST_Intersects_geography_true.svg new file mode 100644 index 00000000000..3479db0ae0d --- /dev/null +++ b/docs/image/ST_Intersects_geography/ST_Intersects_geography_true.svg @@ -0,0 +1,66 @@ + + + + + + + + + + + + + + + + + + + ST_Intersects(A, B) is TRUE + ST_Intersects(A, P) is TRUE + + + + + + + + + + + + + + + + + + + + + + + + + + + + A + B + P + + + + Polygon A + + + Polygon B + + + Shared region + + + Point P (Inside A) +