Skip to content

Commit bd909f7

Browse files
authored
[GH-2809] Support distance joins for raster predicates (#2980)
1 parent 5f40925 commit bd909f7

15 files changed

Lines changed: 756 additions & 13 deletions

File tree

common/src/main/java/org/apache/sedona/common/raster/RasterPredicates.java

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import java.util.Set;
2222
import org.apache.commons.lang3.tuple.Pair;
2323
import org.apache.sedona.common.FunctionsGeoTools;
24+
import org.apache.sedona.common.S2Geography.WKBGeography;
2425
import org.apache.sedona.common.utils.CachedCRSTransformFinder;
2526
import org.apache.sedona.common.utils.GeomUtils;
2627
import org.geotools.api.referencing.FactoryException;
@@ -34,7 +35,11 @@
3435
import org.geotools.referencing.CRS;
3536
import org.geotools.referencing.crs.DefaultEngineeringCRS;
3637
import org.geotools.referencing.crs.DefaultGeographicCRS;
38+
import org.locationtech.jts.algorithm.Orientation;
3739
import org.locationtech.jts.geom.Geometry;
40+
import org.locationtech.jts.geom.GeometryFactory;
41+
import org.locationtech.jts.geom.MultiPolygon;
42+
import org.locationtech.jts.geom.Polygon;
3843

3944
public class RasterPredicates {
4045
/**
@@ -82,6 +87,158 @@ public static boolean rsContains(GridCoverage2D left, GridCoverage2D right) {
8287
return leftGeometry.contains(rightGeometry);
8388
}
8489

90+
/**
91+
* Test if a raster is within {@code distance} meters of a geometry. Both shapes are projected to
92+
* WGS84 unconditionally so the distance unit is always meters regardless of input CRS, then the
93+
* minimum spherical distance between the two shapes (NOT centroid-to-centroid — uses S2's {@code
94+
* ClosestEdgeQuery}) is compared against the threshold. Two raster footprints that overlap or
95+
* touch therefore satisfy {@code RS_DWithin(a, b, 0)} just like {@link #rsIntersects}, which
96+
* matches the natural reading of "are these shapes within d meters." The WGS84-only path is also
97+
* what the join planner's R-tree filter assumes — see {@code
98+
* TraitJoinQueryBase.toExpandedWGS84EnvelopeRDD} and the {@code isGeography = true} envelope
99+
* expansion — so the index bound and the per-row predicate share one unit.
100+
*/
101+
public static boolean rsDWithin(GridCoverage2D raster, Geometry geometry, double distance) {
102+
Pair<Geometry, Geometry> geometries = toWGS84Pair(raster, geometry);
103+
return geographyDWithin(geometries.getLeft(), geometries.getRight(), distance);
104+
}
105+
106+
/** Raster-raster variant of {@link #rsDWithin(GridCoverage2D, Geometry, double)}. */
107+
public static boolean rsDWithin(GridCoverage2D left, GridCoverage2D right, double distance) {
108+
Pair<Geometry, Geometry> geometries = toWGS84Pair(left, right);
109+
return geographyDWithin(geometries.getLeft(), geometries.getRight(), distance);
110+
}
111+
112+
/**
113+
* Run {@link org.apache.sedona.common.geography.Functions#dWithin} on two WGS84 JTS geometries by
114+
* wrapping them as {@link WKBGeography}. The Geography path uses S2's {@code ClosestEdgeQuery}
115+
* for the minimum geodesic distance — overlap/touch returns 0 — so the threshold is interpreted
116+
* strictly as "meters between any two points on the shapes."
117+
*
118+
* <p>This intentionally does not go through {@code Constructors.geomToGeography}: that helper
119+
* builds S2 loops via {@code S2Loop.normalize()}, which always picks the smaller hemisphere as
120+
* the polygon interior. Raster convex hulls (especially global mosaics, polar projections, and
121+
* antimeridian-crossing UTM zones) can be larger than a hemisphere after WGS84 reprojection, so
122+
* normalisation would collapse the intended footprint to a tiny region between the projected
123+
* corners. The WKB path preserves the orientation we set in {@link #ensureCcwForS2}.
124+
*/
125+
private static boolean geographyDWithin(Geometry left, Geometry right, double distance) {
126+
WKBGeography leftGeog = WKBGeography.fromJTS(toS2Ready(left));
127+
WKBGeography rightGeog = WKBGeography.fromJTS(toS2Ready(right));
128+
return org.apache.sedona.common.geography.Functions.dWithin(leftGeog, rightGeog, distance);
129+
}
130+
131+
/**
132+
* Produce a fresh JTS geometry ready for the S2-backed distance query: shell orientation forced
133+
* to CCW (S2's expected interior side) and SRID stamped to 4326 (the caller has already
134+
* reprojected to WGS84 via {@link #toWGS84Pair}). Returns a copy whenever the caller-owned
135+
* geometry would otherwise be mutated, so this helper is side-effect-free with respect to its
136+
* input — important because {@link #rsDWithin} is a public predicate that should not modify
137+
* geometries handed to it.
138+
*/
139+
private static Geometry toS2Ready(Geometry geom) {
140+
Geometry oriented = ensureCcwForS2(geom);
141+
if (oriented == geom) {
142+
// ensureCcwForS2 returned the input unchanged (already CCW or non-polygon); clone before
143+
// touching SRID so we don't write back into the caller's object.
144+
oriented = geom.copy();
145+
}
146+
// S2 treats coordinates as lat/lng regardless of SRID metadata, but we tag the geography as
147+
// EPSG:4326 since both inputs are guaranteed to be in WGS84 here. JTS.transform does not
148+
// propagate SRID, so we set it explicitly on the (now-owned) copy.
149+
oriented.setSRID(4326);
150+
return oriented;
151+
}
152+
153+
/**
154+
* Return {@code geom} with every polygon shell oriented CCW (S2's expected orientation).
155+
* Non-polygon geometries are returned unchanged. For MultiPolygons each component polygon is
156+
* checked and reversed independently — JTS/OGC does not require consistent ring orientation
157+
* across the components of a user-supplied MultiPolygon, so flipping the whole geometry based on
158+
* the first shell would mis-orient any later polygon with the opposite winding. Empty polygons
159+
* and {@code MULTIPOLYGON EMPTY} pass through untouched.
160+
*/
161+
private static Geometry ensureCcwForS2(Geometry geom) {
162+
if (geom instanceof Polygon) {
163+
Polygon p = (Polygon) geom;
164+
if (p.isEmpty() || Orientation.isCCW(p.getExteriorRing().getCoordinates())) {
165+
return geom;
166+
}
167+
return geom.reverse();
168+
}
169+
if (geom instanceof MultiPolygon) {
170+
int n = geom.getNumGeometries();
171+
if (n == 0) {
172+
return geom;
173+
}
174+
Polygon[] reoriented = new Polygon[n];
175+
boolean anyReversal = false;
176+
for (int i = 0; i < n; i++) {
177+
Polygon p = (Polygon) geom.getGeometryN(i);
178+
if (p.isEmpty() || Orientation.isCCW(p.getExteriorRing().getCoordinates())) {
179+
reoriented[i] = p;
180+
} else {
181+
reoriented[i] = (Polygon) p.reverse();
182+
anyReversal = true;
183+
}
184+
}
185+
if (!anyReversal) {
186+
return geom;
187+
}
188+
GeometryFactory factory = geom.getFactory();
189+
return factory.createMultiPolygon(reoriented);
190+
}
191+
return geom;
192+
}
193+
194+
private static Pair<Geometry, Geometry> toWGS84Pair(GridCoverage2D raster, Geometry queryWindow) {
195+
Geometry rasterGeometry;
196+
try {
197+
rasterGeometry = GeometryFunctions.convexHull(raster);
198+
} catch (FactoryException | TransformException e) {
199+
throw new RuntimeException("Failed to calculate the convex hull of the raster", e);
200+
}
201+
202+
CoordinateReferenceSystem rasterCRS = raster.getCoordinateReferenceSystem();
203+
if (rasterCRS == null || rasterCRS instanceof DefaultEngineeringCRS) {
204+
rasterCRS = DefaultGeographicCRS.WGS84;
205+
}
206+
207+
int queryWindowSRID = queryWindow.getSRID();
208+
if (queryWindowSRID <= 0) {
209+
queryWindowSRID = 4326;
210+
}
211+
CoordinateReferenceSystem queryWindowCRS = FunctionsGeoTools.sridToCRS(queryWindowSRID);
212+
213+
Geometry transformedRaster = transformGeometryToWGS84(rasterGeometry, rasterCRS);
214+
Geometry transformedQuery = transformGeometryToWGS84(queryWindow, queryWindowCRS);
215+
return Pair.of(transformedRaster, transformedQuery);
216+
}
217+
218+
private static Pair<Geometry, Geometry> toWGS84Pair(GridCoverage2D left, GridCoverage2D right) {
219+
Geometry leftGeometry;
220+
Geometry rightGeometry;
221+
try {
222+
leftGeometry = GeometryFunctions.convexHull(left);
223+
rightGeometry = GeometryFunctions.convexHull(right);
224+
} catch (FactoryException | TransformException e) {
225+
throw new RuntimeException("Failed to calculate the convex hull of the raster", e);
226+
}
227+
228+
CoordinateReferenceSystem leftCRS = left.getCoordinateReferenceSystem();
229+
if (leftCRS == null || leftCRS instanceof DefaultEngineeringCRS) {
230+
leftCRS = DefaultGeographicCRS.WGS84;
231+
}
232+
CoordinateReferenceSystem rightCRS = right.getCoordinateReferenceSystem();
233+
if (rightCRS == null || rightCRS instanceof DefaultEngineeringCRS) {
234+
rightCRS = DefaultGeographicCRS.WGS84;
235+
}
236+
237+
Geometry transformedLeft = transformGeometryToWGS84(leftGeometry, leftCRS);
238+
Geometry transformedRight = transformGeometryToWGS84(rightGeometry, rightCRS);
239+
return Pair.of(transformedLeft, transformedRight);
240+
}
241+
85242
private static Pair<Geometry, Geometry> convertCRSIfNeeded(
86243
GridCoverage2D raster, Geometry queryWindow) {
87244
Geometry rasterGeometry;

common/src/test/java/org/apache/sedona/common/raster/RasterPredicatesTest.java

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import org.locationtech.jts.geom.GeometryFactory;
3737
import org.locationtech.jts.geom.PrecisionModel;
3838
import org.locationtech.jts.io.ParseException;
39+
import org.locationtech.jts.io.WKTReader;
3940

4041
public class RasterPredicatesTest extends RasterTestBase {
4142
private static final GeometryFactory GEOMETRY_FACTORY = new GeometryFactory();
@@ -489,6 +490,164 @@ public void testRasterRasterPredicatesNoCrs() throws FactoryException {
489490
Assert.assertFalse(RasterPredicates.rsIntersects(raster3, raster2));
490491
}
491492

493+
@Test
494+
public void testDWithinWGS84RasterPointMeterSemantics() throws FactoryException {
495+
// 5x5 degree raster at WGS84 origin: hull (0,-5)–(5,0), centroid (2.5, -2.5).
496+
GridCoverage2D raster = RasterConstructors.makeEmptyRaster(1, 5, 5, 0, 0, 1, -1, 0, 0, 4326);
497+
498+
// Point coincident with the raster centroid — distance is 0, so any non-negative
499+
// threshold matches and the unit cannot silently fall back to degrees.
500+
Geometry coincident = GEOMETRY_FACTORY.createPoint(new Coordinate(2.5, -2.5));
501+
coincident.setSRID(4326);
502+
Assert.assertTrue(RasterPredicates.rsDWithin(raster, coincident, 0.0));
503+
Assert.assertTrue(RasterPredicates.rsDWithin(raster, coincident, 1.0));
504+
505+
// Point ~10° east at the same latitude — geodesic distance ≈ 1 112 km. Bracket the
506+
// threshold above and below to catch unit mistakes (1° was the old buggy semantics)
507+
// and projection regressions.
508+
Geometry tenDegreesEast = GEOMETRY_FACTORY.createPoint(new Coordinate(12.5, -2.5));
509+
tenDegreesEast.setSRID(4326);
510+
Assert.assertTrue(RasterPredicates.rsDWithin(raster, tenDegreesEast, 2_000_000.0));
511+
Assert.assertFalse(RasterPredicates.rsDWithin(raster, tenDegreesEast, 500_000.0));
512+
// A degree-sized threshold (the pre-fix buggy unit) must reject this pair under the
513+
// meters contract.
514+
Assert.assertFalse(RasterPredicates.rsDWithin(raster, tenDegreesEast, 10.0));
515+
}
516+
517+
@Test
518+
public void testDWithinSwappedOperands() throws FactoryException {
519+
GridCoverage2D raster = RasterConstructors.makeEmptyRaster(1, 5, 5, 0, 0, 1, -1, 0, 0, 4326);
520+
Geometry point = GEOMETRY_FACTORY.createPoint(new Coordinate(12.5, -2.5));
521+
point.setSRID(4326);
522+
// The (raster, geom) and (geom, raster) overloads share the same minimum geodesic distance,
523+
// so both must agree at and around the threshold.
524+
Assert.assertEquals(
525+
RasterPredicates.rsDWithin(raster, point, 2_000_000.0),
526+
RasterPredicates.rsDWithin(raster, point, 2_000_000.0));
527+
Assert.assertTrue(RasterPredicates.rsDWithin(raster, point, 2_000_000.0));
528+
Assert.assertFalse(RasterPredicates.rsDWithin(raster, point, 500_000.0));
529+
}
530+
531+
@Test
532+
public void testDWithinProjectedRasterReprojects() throws FactoryException {
533+
// UTM 32610 (meters) raster centred near San Francisco. The predicate must reproject
534+
// both sides to WGS84 before measuring, regardless of the input CRS.
535+
GridCoverage2D raster =
536+
RasterConstructors.makeEmptyRaster(
537+
1, "B", 751, 742, 332385, 4258815, 300, -300, 0, 0, 32610);
538+
539+
// Point inside the raster footprint (≈ same area as the centroid): close geodesic
540+
// distance, matches even with a small threshold.
541+
Geometry nearby = GEOMETRY_FACTORY.createPoint(new Coordinate(-122.40, 37.75));
542+
nearby.setSRID(4326);
543+
Assert.assertTrue(RasterPredicates.rsDWithin(raster, nearby, 200_000.0));
544+
545+
// Point in Kansas — ≈ 2 400 km from the UTM-10N raster's WGS84 centroid.
546+
Geometry farAway = GEOMETRY_FACTORY.createPoint(new Coordinate(-95.0, 39.0));
547+
farAway.setSRID(4326);
548+
Assert.assertTrue(RasterPredicates.rsDWithin(raster, farAway, 3_000_000.0));
549+
Assert.assertFalse(RasterPredicates.rsDWithin(raster, farAway, 1_000_000.0));
550+
551+
// Same Kansas point expressed in EPSG:3857 (Web Mercator). Even though neither side is
552+
// in WGS84, the predicate must still reproject and produce identical truth values.
553+
Geometry farAwayMercator = GEOMETRY_FACTORY.createPoint(new Coordinate(-10575352, 4721671));
554+
farAwayMercator.setSRID(3857);
555+
Assert.assertTrue(RasterPredicates.rsDWithin(raster, farAwayMercator, 3_000_000.0));
556+
Assert.assertFalse(RasterPredicates.rsDWithin(raster, farAwayMercator, 1_000_000.0));
557+
}
558+
559+
@Test
560+
public void testDWithinRasterRaster() throws FactoryException {
561+
// Two WGS84 rasters whose centroids are exactly 10° of longitude apart on the equator:
562+
// geodesic centroid distance ≈ 1 112 km.
563+
GridCoverage2D rasterA = RasterConstructors.makeEmptyRaster(1, 5, 5, 0, 0, 1, -1, 0, 0, 4326);
564+
GridCoverage2D rasterB = RasterConstructors.makeEmptyRaster(1, 5, 5, 10, 0, 1, -1, 0, 0, 4326);
565+
Assert.assertTrue(RasterPredicates.rsDWithin(rasterA, rasterB, 2_000_000.0));
566+
Assert.assertFalse(RasterPredicates.rsDWithin(rasterA, rasterB, 500_000.0));
567+
// Symmetry: swapping the operands does not change the truth value.
568+
Assert.assertEquals(
569+
RasterPredicates.rsDWithin(rasterA, rasterB, 2_000_000.0),
570+
RasterPredicates.rsDWithin(rasterB, rasterA, 2_000_000.0));
571+
572+
// Cross-CRS pair (UTM 10N + WGS84): the projected raster must be reprojected before
573+
// distance is computed.
574+
GridCoverage2D utmRaster =
575+
RasterConstructors.makeEmptyRaster(
576+
1, "B", 751, 742, 332385, 4258815, 300, -300, 0, 0, 32610);
577+
// WGS84 raster co-located with the UTM raster's footprint near SF.
578+
GridCoverage2D wgs84Nearby =
579+
RasterConstructors.makeEmptyRaster(1, 10, 10, -122.45, 37.80, 0.01, -0.01, 0, 0, 4326);
580+
Assert.assertTrue(RasterPredicates.rsDWithin(utmRaster, wgs84Nearby, 500_000.0));
581+
Assert.assertEquals(
582+
RasterPredicates.rsDWithin(utmRaster, wgs84Nearby, 500_000.0),
583+
RasterPredicates.rsDWithin(wgs84Nearby, utmRaster, 500_000.0));
584+
}
585+
586+
@Test
587+
public void testDWithinMixedOrientationMultiPolygon() throws FactoryException, ParseException {
588+
// Regression: the helper used to flip the whole multipolygon based on the first shell's
589+
// orientation, which left any later polygon with a different winding mis-oriented when
590+
// handed to S2. Two semantically-identical multipolygons (one all-CCW, one with the second
591+
// shell wound CW) must produce identical truth values.
592+
GridCoverage2D raster = RasterConstructors.makeEmptyRaster(1, 5, 5, 0, 0, 1, -1, 0, 0, 4326);
593+
WKTReader reader = new WKTReader();
594+
Geometry allCcw =
595+
reader.read(
596+
"MULTIPOLYGON (((20 20, 21 20, 21 21, 20 21, 20 20)), "
597+
+ "((30 30, 31 30, 31 31, 30 31, 30 30)))");
598+
allCcw.setSRID(4326);
599+
Geometry mixed =
600+
reader.read(
601+
// First polygon CCW (as in `allCcw`); second polygon wound CW.
602+
"MULTIPOLYGON (((20 20, 21 20, 21 21, 20 21, 20 20)), "
603+
+ "((30 30, 30 31, 31 31, 31 30, 30 30)))");
604+
mixed.setSRID(4326);
605+
// Both forms describe the same pair of squares, so the predicate must agree on every
606+
// threshold around the actual geodesic distance.
607+
Assert.assertEquals(
608+
RasterPredicates.rsDWithin(raster, allCcw, 5_000_000.0),
609+
RasterPredicates.rsDWithin(raster, mixed, 5_000_000.0));
610+
Assert.assertEquals(
611+
RasterPredicates.rsDWithin(raster, allCcw, 1_000_000.0),
612+
RasterPredicates.rsDWithin(raster, mixed, 1_000_000.0));
613+
}
614+
615+
@Test
616+
public void testDWithinEmptyMultiPolygon() throws FactoryException, ParseException {
617+
// Regression: `getGeometryN(0)` would throw IndexOutOfBoundsException on MULTIPOLYGON EMPTY.
618+
// The predicate must accept an empty multipolygon operand without crashing.
619+
GridCoverage2D raster = RasterConstructors.makeEmptyRaster(1, 5, 5, 0, 0, 1, -1, 0, 0, 4326);
620+
WKTReader reader = new WKTReader();
621+
Geometry empty = reader.read("MULTIPOLYGON EMPTY");
622+
empty.setSRID(4326);
623+
RasterPredicates.rsDWithin(raster, empty, 1_000_000.0);
624+
}
625+
626+
@Test
627+
public void testDWithinDoesNotMutateCallerGeometry() throws FactoryException {
628+
// Regression: the predicate used to call setSRID(4326) directly on the caller's geometry
629+
// when no orientation change was needed. Verify SRID and coordinate identity are untouched
630+
// after the predicate runs, for both same-CRS (no transform) and cross-CRS paths.
631+
GridCoverage2D raster = RasterConstructors.makeEmptyRaster(1, 5, 5, 0, 0, 1, -1, 0, 0, 4326);
632+
633+
// Same-CRS: no transform happens, so the predicate sees the caller's exact JTS object.
634+
Geometry sameCrsPoint = GEOMETRY_FACTORY.createPoint(new Coordinate(2.5, -2.5));
635+
sameCrsPoint.setSRID(0);
636+
Coordinate originalCoord = sameCrsPoint.getCoordinate().copy();
637+
RasterPredicates.rsDWithin(raster, sameCrsPoint, 1.0);
638+
Assert.assertEquals(0, sameCrsPoint.getSRID());
639+
Assert.assertEquals(originalCoord, sameCrsPoint.getCoordinate());
640+
641+
// Cross-CRS: JTS.transform produces a fresh geometry internally, but the caller's object
642+
// must still be untouched.
643+
GeometryFactory mercatorFactory = new GeometryFactory(new PrecisionModel(), 3857);
644+
Geometry mercatorPoint = mercatorFactory.createPoint(new Coordinate(278300.0, -278300.0));
645+
Coordinate originalMercatorCoord = mercatorPoint.getCoordinate().copy();
646+
RasterPredicates.rsDWithin(raster, mercatorPoint, 1.0);
647+
Assert.assertEquals(3857, mercatorPoint.getSRID());
648+
Assert.assertEquals(originalMercatorCoord, mercatorPoint.getCoordinate());
649+
}
650+
492651
@Test
493652
public void testIsCRSMatchesEPSGCode() throws FactoryException {
494653
CoordinateReferenceSystem epsg4326 = CRS.decode("EPSG:4326");

0 commit comments

Comments
 (0)