Skip to content

Commit 4fb0d17

Browse files
committed
Fix: Update BoundingBox for empty and antimeridian handling
1 parent 22a23d0 commit 4fb0d17

2 files changed

Lines changed: 166 additions & 10 deletions

File tree

parquet-column/src/main/java/org/apache/parquet/column/statistics/geospatial/BoundingBox.java

Lines changed: 67 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
*/
1919
package org.apache.parquet.column.statistics.geospatial;
2020

21+
import org.apache.parquet.ShouldNeverHappenException;
2122
import org.locationtech.jts.geom.Coordinate;
2223
import org.locationtech.jts.geom.Envelope;
2324
import org.locationtech.jts.geom.Geometry;
@@ -169,7 +170,7 @@ public boolean isXYEmpty() {
169170
* @return true if the X dimension is empty, false otherwise.
170171
*/
171172
public boolean isXEmpty() {
172-
return Double.isInfinite(xMin) && Double.isInfinite(xMax);
173+
return Double.isInfinite(xMin - xMax);
173174
}
174175

175176
/**
@@ -178,7 +179,7 @@ public boolean isXEmpty() {
178179
* @return true if the Y dimension is empty, false otherwise.
179180
*/
180181
public boolean isYEmpty() {
181-
return Double.isInfinite(yMin) && Double.isInfinite(yMax);
182+
return Double.isInfinite(yMin - yMax);
182183
}
183184

184185
/**
@@ -187,7 +188,7 @@ public boolean isYEmpty() {
187188
* @return true if the Z dimension is empty, false otherwise.
188189
*/
189190
public boolean isZEmpty() {
190-
return Double.isInfinite(zMin) && Double.isInfinite(zMax);
191+
return Double.isInfinite(zMin - zMax);
191192
}
192193

193194
/**
@@ -196,14 +197,28 @@ public boolean isZEmpty() {
196197
* @return true if the M dimension is empty, false otherwise.
197198
*/
198199
public boolean isMEmpty() {
199-
return Double.isInfinite(mMin) && Double.isInfinite(mMax);
200+
return Double.isInfinite(mMin - mMax);
201+
}
202+
203+
/**
204+
* Checks if the X dimension of this bounding box wraps around the antimeridian.
205+
* This occurs when the minimum X value is greater than the maximum X value,
206+
* which is allowed by the Parquet specification for geometries that cross the antimeridian.
207+
*
208+
* @return true if the X dimension wraps around, false otherwise.
209+
*/
210+
public boolean isXWraparound() {
211+
return isWraparound(xMin, xMax);
200212
}
201213

202214
/**
203215
* Expands this bounding box to include the bounds of another box.
204216
* After merging, this bounding box will contain both its original extent
205217
* and the extent of the other bounding box.
206218
*
219+
* If either this bounding box or the other has wraparound X coordinates,
220+
* the X dimension will be marked as invalid (set to NaN) in the result.
221+
*
207222
* @param other the other BoundingBox whose bounds will be merged into this one
208223
*/
209224
public void merge(BoundingBox other) {
@@ -218,16 +233,27 @@ public void merge(BoundingBox other) {
218233
return;
219234
}
220235

221-
this.xMin = Math.min(this.xMin, other.xMin);
222-
this.xMax = Math.max(this.xMax, other.xMax);
236+
// We don't yet support merging wraparound bounds.
237+
// Rather than throw, we mark the X bounds as invalid.
238+
if (isXWraparound() || other.isXWraparound()) {
239+
// Mark X dimension as invalid by setting to NaN
240+
xMin = Double.NaN;
241+
xMax = Double.NaN;
242+
} else {
243+
// Normal case - merge X bounds
244+
this.xMin = Math.min(this.xMin, other.xMin);
245+
this.xMax = Math.max(this.xMax, other.xMax);
246+
}
247+
248+
// Always merge Y, Z, and M bounds
223249
this.yMin = Math.min(this.yMin, other.yMin);
224250
this.yMax = Math.max(this.yMax, other.yMax);
225251
this.zMin = Math.min(this.zMin, other.zMin);
226252
this.zMax = Math.max(this.zMax, other.zMax);
227253
this.mMin = Math.min(this.mMin, other.mMin);
228254
this.mMax = Math.max(this.mMax, other.mMax);
229255

230-
// Update the validity of this bounding box based on the other bounding box
256+
// Update the validity of this bounding box
231257
valid = isXYValid();
232258
}
233259

@@ -272,12 +298,33 @@ public void update(Geometry geometry) {
272298
* - X bounds are only updated if both minX and maxX are not NaN
273299
* - Y bounds are only updated if both minY and maxY are not NaN
274300
*
275-
* This allows partial updates while preserving valid dimensions.
301+
* Note: JTS (Java Topology Suite) does not natively support wraparound envelopes
302+
* or geometries that cross the antimeridian (±180° longitude). It operates strictly
303+
* in a 2D Cartesian coordinate space and doesn't account for the Earth's spherical
304+
* nature or longitudinal wrapping.
305+
*
306+
* When JTS encounters a geometry that crosses the antimeridian, it will represent
307+
* it with an envelope spanning from the westernmost to easternmost points, often
308+
* covering most of the Earth's longitude range (e.g., minX=-180, maxX=180).
309+
*
310+
* The wraparound check below is defensive but should never be triggered with standard
311+
* JTS geometry operations, as JTS will never produce an envelope with minX > maxX.
312+
*
313+
* @throws ShouldNeverHappenException if the update creates an X wraparound condition
276314
*/
277315
private void updateBounds(double minX, double maxX, double minY, double maxY) {
278316
if (!Double.isNaN(minX) && !Double.isNaN(maxX)) {
279-
xMin = Math.min(xMin, minX);
280-
xMax = Math.max(xMax, maxX);
317+
double newXMin = Math.min(xMin, minX);
318+
double newXMax = Math.max(xMax, maxX);
319+
320+
// Check if the update would create a wraparound condition
321+
// This should never happen with standard JTS geometry operations
322+
if (isWraparound(newXMin, newXMax)) {
323+
throw new ShouldNeverHappenException("Wraparound X is not supported by BoundingBox.update()");
324+
}
325+
326+
xMin = newXMin;
327+
xMax = newXMax;
281328
}
282329

283330
if (!Double.isNaN(minY) && !Double.isNaN(maxY)) {
@@ -302,6 +349,16 @@ public void reset() {
302349
valid = true;
303350
}
304351

352+
/**
353+
* The Parquet specification allows X bounds to be "wraparound" to allow for
354+
* more compact bounding boxes when a geometry happens to include components
355+
* on both sides of the antimeridian (e.g., the nation of Fiji). This function
356+
* checks for that case (see GeoStatistics::lower_bound/upper_bound for more details).
357+
*/
358+
public static boolean isWraparound(double xmin, double xmax) {
359+
return !Double.isInfinite(xmin - xmax) && xmin > xmax;
360+
}
361+
305362
/**
306363
* Creates a copy of the current bounding box.
307364
*

parquet-column/src/test/java/org/apache/parquet/column/statistics/geospatial/TestBoundingBox.java

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -681,4 +681,103 @@ public void testMergingRowGroupBoundingBoxes() {
681681
Assert.assertEquals(900.0, reverseMergeBox.getYMax(), 0.0);
682682
Assert.assertTrue(reverseMergeBox.isValid());
683683
}
684+
685+
@Test
686+
public void testIsXValidAndIsYValid() {
687+
// Test with valid X and Y
688+
BoundingBox validBox = new BoundingBox(1, 2, 3, 4, 5, 6, 7, 8);
689+
Assert.assertTrue(validBox.isXValid());
690+
Assert.assertTrue(validBox.isYValid());
691+
692+
// Test with invalid X (NaN)
693+
BoundingBox invalidXBox = new BoundingBox(Double.NaN, 2, 3, 4, 5, 6, 7, 8);
694+
Assert.assertFalse(invalidXBox.isXValid());
695+
Assert.assertTrue(invalidXBox.isYValid());
696+
697+
// Test with invalid Y (NaN)
698+
BoundingBox invalidYBox = new BoundingBox(1, 2, Double.NaN, 4, 5, 6, 7, 8);
699+
Assert.assertTrue(invalidYBox.isXValid());
700+
Assert.assertFalse(invalidYBox.isYValid());
701+
702+
// Test with both X and Y invalid
703+
BoundingBox invalidXYBox = new BoundingBox(Double.NaN, Double.NaN, Double.NaN, Double.NaN, 5, 6, 7, 8);
704+
Assert.assertFalse(invalidXYBox.isXValid());
705+
Assert.assertFalse(invalidXYBox.isYValid());
706+
Assert.assertFalse(invalidXYBox.isXYValid());
707+
}
708+
709+
@Test
710+
public void testIsXEmptyAndIsYEmpty() {
711+
// Empty bounding box (initial state)
712+
BoundingBox emptyBox = new BoundingBox();
713+
Assert.assertTrue(emptyBox.isXEmpty());
714+
Assert.assertTrue(emptyBox.isYEmpty());
715+
Assert.assertTrue(emptyBox.isXYEmpty());
716+
717+
// Non-empty box
718+
BoundingBox nonEmptyBox = new BoundingBox(1, 2, 3, 4, 5, 6, 7, 8);
719+
Assert.assertFalse(nonEmptyBox.isXEmpty());
720+
Assert.assertFalse(nonEmptyBox.isYEmpty());
721+
Assert.assertFalse(nonEmptyBox.isXYEmpty());
722+
723+
// Box with empty X dimension only
724+
GeometryFactory gf = new GeometryFactory();
725+
BoundingBox emptyXBox = new BoundingBox();
726+
// Only update Y dimension
727+
emptyXBox.update(gf.createPoint(new Coordinate(Double.NaN, 5)));
728+
Assert.assertTrue(emptyXBox.isXEmpty());
729+
Assert.assertFalse(emptyXBox.isYEmpty());
730+
Assert.assertTrue(emptyXBox.isXYEmpty());
731+
732+
// Box with empty Y dimension only
733+
BoundingBox emptyYBox = new BoundingBox();
734+
// Only update X dimension
735+
emptyYBox.update(gf.createPoint(new Coordinate(10, Double.NaN)));
736+
Assert.assertFalse(emptyYBox.isXEmpty());
737+
Assert.assertTrue(emptyYBox.isYEmpty());
738+
Assert.assertTrue(emptyYBox.isXYEmpty());
739+
}
740+
741+
@Test
742+
public void testIsXWraparound() {
743+
// Normal bounding box (no wraparound)
744+
BoundingBox normalBox = new BoundingBox(1, 2, 3, 4, 5, 6, 7, 8);
745+
Assert.assertFalse(normalBox.isXWraparound());
746+
747+
// Wraparound box (xMin > xMax)
748+
BoundingBox wraparoundBox = new BoundingBox(170, 20, 10, 20, 0, 0, 0, 0);
749+
Assert.assertTrue(wraparoundBox.isXWraparound());
750+
751+
// Edge case: equal bounds
752+
BoundingBox equalBoundsBox = new BoundingBox(10, 10, 20, 20, 0, 0, 0, 0);
753+
Assert.assertFalse(equalBoundsBox.isXWraparound());
754+
755+
// Test static method directly
756+
Assert.assertTrue(BoundingBox.isWraparound(180, -180));
757+
Assert.assertFalse(BoundingBox.isWraparound(-180, 180));
758+
}
759+
760+
@Test
761+
public void testWraparoundHandlingInMerge() {
762+
// Test with two normal boxes
763+
BoundingBox box1 = new BoundingBox(10, 20, 10, 20, 0, 0, 0, 0);
764+
BoundingBox box2 = new BoundingBox(15, 25, 15, 25, 0, 0, 0, 0);
765+
box1.merge(box2);
766+
767+
Assert.assertTrue(box1.isValid());
768+
Assert.assertEquals(10.0, box1.getXMin(), 0.0);
769+
Assert.assertEquals(25.0, box1.getXMax(), 0.0);
770+
771+
// Test with one wraparound box
772+
BoundingBox normalBox = new BoundingBox(0, 10, 0, 10, 0, 0, 0, 0);
773+
BoundingBox wraparoundBox = new BoundingBox(170, -170, 5, 15, 0, 0, 0, 0);
774+
775+
normalBox.merge(wraparoundBox);
776+
777+
Assert.assertFalse(normalBox.isValid());
778+
Assert.assertTrue(Double.isNaN(normalBox.getXMin()));
779+
Assert.assertTrue(Double.isNaN(normalBox.getXMax()));
780+
Assert.assertEquals(0.0, normalBox.getYMin(), 0.0);
781+
Assert.assertEquals(15.0, normalBox.getYMax(), 0.0);
782+
}
684783
}

0 commit comments

Comments
 (0)