Skip to content

Commit eb65987

Browse files
authored
MINOR: Update BoundingBox for Empty and Antimeridian Handling (#3222)
1 parent 1f1e07b commit eb65987

2 files changed

Lines changed: 212 additions & 16 deletions

File tree

parquet-column/src/main/java/org/apache/parquet/column/statistics/geospatial/BoundingBox.java

Lines changed: 62 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
*/
1919
package org.apache.parquet.column.statistics.geospatial;
2020

21+
import org.apache.parquet.ShouldNeverHappenException;
2122
import org.locationtech.jts.geom.Coordinate;
2223
import org.locationtech.jts.geom.Envelope;
2324
import org.locationtech.jts.geom.Geometry;
@@ -169,7 +170,7 @@ public boolean isXYEmpty() {
169170
* @return true if the X dimension is empty, false otherwise.
170171
*/
171172
public boolean isXEmpty() {
172-
return Double.isInfinite(xMin) && Double.isInfinite(xMax);
173+
return Double.isInfinite(xMin - xMax);
173174
}
174175

175176
/**
@@ -178,7 +179,7 @@ public boolean isXEmpty() {
178179
* @return true if the Y dimension is empty, false otherwise.
179180
*/
180181
public boolean isYEmpty() {
181-
return Double.isInfinite(yMin) && Double.isInfinite(yMax);
182+
return Double.isInfinite(yMin - yMax);
182183
}
183184

184185
/**
@@ -187,7 +188,7 @@ public boolean isYEmpty() {
187188
* @return true if the Z dimension is empty, false otherwise.
188189
*/
189190
public boolean isZEmpty() {
190-
return Double.isInfinite(zMin) && Double.isInfinite(zMax);
191+
return Double.isInfinite(zMin - zMax);
191192
}
192193

193194
/**
@@ -196,14 +197,28 @@ public boolean isZEmpty() {
196197
* @return true if the M dimension is empty, false otherwise.
197198
*/
198199
public boolean isMEmpty() {
199-
return Double.isInfinite(mMin) && Double.isInfinite(mMax);
200+
return Double.isInfinite(mMin - mMax);
201+
}
202+
203+
/**
204+
* Checks if the X dimension of this bounding box wraps around the antimeridian.
205+
* This occurs when the minimum X value is greater than the maximum X value,
206+
* which is allowed by the Parquet specification for geometries that cross the antimeridian.
207+
*
208+
* @return true if the X dimension wraps around, false otherwise.
209+
*/
210+
public boolean isXWraparound() {
211+
return isWraparound(xMin, xMax);
200212
}
201213

202214
/**
203215
* Expands this bounding box to include the bounds of another box.
204216
* After merging, this bounding box will contain both its original extent
205217
* and the extent of the other bounding box.
206218
*
219+
* If either this bounding box or the other has wraparound X coordinates,
220+
* the X dimension will be marked as invalid (set to NaN) in the result.
221+
*
207222
* @param other the other BoundingBox whose bounds will be merged into this one
208223
*/
209224
public void merge(BoundingBox other) {
@@ -218,16 +233,27 @@ public void merge(BoundingBox other) {
218233
return;
219234
}
220235

221-
this.xMin = Math.min(this.xMin, other.xMin);
222-
this.xMax = Math.max(this.xMax, other.xMax);
236+
// We don't yet support merging wraparound bounds.
237+
// Rather than throw, we mark the X bounds as invalid.
238+
if (isXWraparound() || other.isXWraparound()) {
239+
// Mark X dimension as invalid by setting to NaN
240+
xMin = Double.NaN;
241+
xMax = Double.NaN;
242+
} else {
243+
// Normal case - merge X bounds
244+
this.xMin = Math.min(this.xMin, other.xMin);
245+
this.xMax = Math.max(this.xMax, other.xMax);
246+
}
247+
248+
// Always merge Y, Z, and M bounds
223249
this.yMin = Math.min(this.yMin, other.yMin);
224250
this.yMax = Math.max(this.yMax, other.yMax);
225251
this.zMin = Math.min(this.zMin, other.zMin);
226252
this.zMax = Math.max(this.zMax, other.zMax);
227253
this.mMin = Math.min(this.mMin, other.mMin);
228254
this.mMax = Math.max(this.mMax, other.mMax);
229255

230-
// Update the validity of this bounding box based on the other bounding box
256+
// Update the validity of this bounding box
231257
valid = isXYValid();
232258
}
233259

@@ -272,10 +298,28 @@ public void update(Geometry geometry) {
272298
* - X bounds are only updated if both minX and maxX are not NaN
273299
* - Y bounds are only updated if both minY and maxY are not NaN
274300
*
275-
* This allows partial updates while preserving valid dimensions.
301+
* Note: JTS (Java Topology Suite) does not natively support wraparound envelopes
302+
* or geometries that cross the antimeridian (±180° longitude). It operates strictly
303+
* in a 2D Cartesian coordinate space and doesn't account for the Earth's spherical
304+
* nature or longitudinal wrapping.
305+
*
306+
* When JTS encounters a geometry that crosses the antimeridian, it will represent
307+
* it with an envelope spanning from the westernmost to easternmost points, often
308+
* covering most of the Earth's longitude range (e.g., minX=-180, maxX=180).
309+
*
310+
* The wraparound check below is defensive but should never be triggered with standard
311+
* JTS geometry operations, as JTS will never produce an envelope with minX > maxX.
312+
*
313+
* @throws ShouldNeverHappenException if the update creates an X wraparound condition
276314
*/
277315
private void updateBounds(double minX, double maxX, double minY, double maxY) {
278316
if (!Double.isNaN(minX) && !Double.isNaN(maxX)) {
317+
// Check if the update would create a wraparound condition
318+
// This should never happen with standard JTS geometry operations
319+
if (isWraparound(minX, maxX) || isWraparound(xMin, xMax)) {
320+
throw new ShouldNeverHappenException("Wraparound bounding boxes are not yet supported");
321+
}
322+
279323
xMin = Math.min(xMin, minX);
280324
xMax = Math.max(xMax, maxX);
281325
}
@@ -302,6 +346,16 @@ public void reset() {
302346
valid = true;
303347
}
304348

349+
/**
350+
* The Parquet specification allows X bounds to be "wraparound" to allow for
351+
* more compact bounding boxes when a geometry happens to include components
352+
* on both sides of the antimeridian (e.g., the nation of Fiji). This function
353+
* checks for that case.
354+
*/
355+
public static boolean isWraparound(double xmin, double xmax) {
356+
return !Double.isInfinite(xmin - xmax) && xmin > xmax;
357+
}
358+
305359
/**
306360
* Creates a copy of the current bounding box.
307361
*

parquet-column/src/test/java/org/apache/parquet/column/statistics/geospatial/TestBoundingBox.java

Lines changed: 150 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -530,34 +530,34 @@ public void testLineStringWithPartialNaNCoordinates() {
530530

531531
/**
532532
* Tests the end-to-end case for updating and merging bounding boxes with mixed valid and NaN coordinates.
533-
*
533+
* <p>
534534
* Scenario - Parquet file with multiple row groups:
535535
* file-level bbox: [1, 9, 100, 900]
536-
*
536+
* <p>
537537
* Row group 1: [1, 2, 100, 100]
538538
* - POINT (1, 100)
539539
* - POINT (2, NaN)
540-
*
540+
* <p>
541541
* Row group 2: [3, 3, 300, 300]
542542
* - POINT (3, 300)
543543
* - POINT (NaN, NaN)
544-
*
544+
* <p>
545545
* Row group 3: no valid bbox
546546
* - POINT (5, NaN)
547547
* - POINT (6, NaN)
548-
*
548+
* <p>
549549
* Row group 4: [7, 8, 700, 800]
550550
* - POINT (7, 700)
551551
* - POINT (8, 800)
552-
*
552+
* <p>
553553
* Row group 5: no valid bbox
554554
* - POINT (NaN, NaN)
555555
* - POINT (NaN, NaN)
556-
*
556+
* <p>
557557
* Row group 6: [9, 9, 900, 900]
558558
* - POINT (9, 900)
559559
* - LINESTRING EMPTY
560-
*
560+
* <p>
561561
* The test verifies that:
562562
* 1. Individual row group bounding boxes correctly handle NaN coordinates
563563
* 2. The merge operation correctly combines valid bounding boxes and ignores invalid ones
@@ -681,4 +681,146 @@ public void testMergingRowGroupBoundingBoxes() {
681681
Assert.assertEquals(900.0, reverseMergeBox.getYMax(), 0.0);
682682
Assert.assertTrue(reverseMergeBox.isValid());
683683
}
684+
685+
@Test
686+
public void testIsXValidAndIsYValid() {
687+
// Test with valid X and Y
688+
BoundingBox validBox = new BoundingBox(1, 2, 3, 4, 5, 6, 7, 8);
689+
Assert.assertTrue(validBox.isXValid());
690+
Assert.assertTrue(validBox.isYValid());
691+
Assert.assertTrue(validBox.isXYValid());
692+
Assert.assertTrue(validBox.isZValid());
693+
Assert.assertTrue(validBox.isMValid());
694+
695+
// Test with invalid X (NaN)
696+
BoundingBox invalidXBox = new BoundingBox(Double.NaN, 2, 3, 4, 5, 6, 7, 8);
697+
Assert.assertFalse(invalidXBox.isXValid());
698+
Assert.assertTrue(invalidXBox.isYValid());
699+
Assert.assertFalse(invalidXBox.isXYValid());
700+
Assert.assertTrue(invalidXBox.isZValid());
701+
Assert.assertTrue(invalidXBox.isMValid());
702+
703+
// Test with invalid Y (NaN)
704+
BoundingBox invalidYBox = new BoundingBox(1, 2, Double.NaN, 4, 5, 6, 7, 8);
705+
Assert.assertTrue(invalidYBox.isXValid());
706+
Assert.assertFalse(invalidYBox.isYValid());
707+
Assert.assertFalse(invalidXBox.isXYValid());
708+
Assert.assertTrue(invalidXBox.isZValid());
709+
Assert.assertTrue(invalidXBox.isMValid());
710+
711+
// Test with both X and Y invalid
712+
BoundingBox invalidXYBox = new BoundingBox(Double.NaN, Double.NaN, Double.NaN, Double.NaN, 5, 6, 7, 8);
713+
Assert.assertFalse(invalidXYBox.isXValid());
714+
Assert.assertFalse(invalidXYBox.isYValid());
715+
Assert.assertFalse(invalidXYBox.isXYValid());
716+
Assert.assertTrue(invalidXBox.isZValid());
717+
Assert.assertTrue(invalidXBox.isMValid());
718+
}
719+
720+
@Test
721+
public void testIsXEmptyAndIsYEmpty() {
722+
// Empty bounding box (initial state)
723+
BoundingBox emptyBox = new BoundingBox();
724+
Assert.assertTrue(emptyBox.isXEmpty());
725+
Assert.assertTrue(emptyBox.isYEmpty());
726+
Assert.assertTrue(emptyBox.isXYEmpty());
727+
728+
// Non-empty box
729+
BoundingBox nonEmptyBox = new BoundingBox(1, 2, 3, 4, 5, 6, 7, 8);
730+
Assert.assertFalse(nonEmptyBox.isXEmpty());
731+
Assert.assertFalse(nonEmptyBox.isYEmpty());
732+
Assert.assertFalse(nonEmptyBox.isXYEmpty());
733+
734+
// Box with empty X dimension only
735+
GeometryFactory gf = new GeometryFactory();
736+
BoundingBox emptyXBox = new BoundingBox();
737+
// Only update Y dimension
738+
emptyXBox.update(gf.createPoint(new Coordinate(Double.NaN, 5)));
739+
Assert.assertTrue(emptyXBox.isXEmpty());
740+
Assert.assertFalse(emptyXBox.isYEmpty());
741+
Assert.assertTrue(emptyXBox.isXYEmpty());
742+
743+
// Box with empty Y dimension only
744+
BoundingBox emptyYBox = new BoundingBox();
745+
// Only update X dimension
746+
emptyYBox.update(gf.createPoint(new Coordinate(10, Double.NaN)));
747+
Assert.assertFalse(emptyYBox.isXEmpty());
748+
Assert.assertTrue(emptyYBox.isYEmpty());
749+
Assert.assertTrue(emptyYBox.isXYEmpty());
750+
}
751+
752+
@Test
753+
public void testIsXWraparound() {
754+
// Normal bounding box (no wraparound)
755+
BoundingBox normalBox = new BoundingBox(1, 2, 3, 4, 5, 6, 7, 8);
756+
Assert.assertFalse(normalBox.isXWraparound());
757+
758+
// Wraparound box (xMin > xMax)
759+
BoundingBox wraparoundBox = new BoundingBox(170, 20, 10, 20, 0, 0, 0, 0);
760+
Assert.assertTrue(wraparoundBox.isXWraparound());
761+
762+
// Edge case: equal bounds
763+
BoundingBox equalBoundsBox = new BoundingBox(10, 10, 20, 20, 0, 0, 0, 0);
764+
Assert.assertFalse(equalBoundsBox.isXWraparound());
765+
766+
// Test static method directly
767+
Assert.assertTrue(BoundingBox.isWraparound(180, -180));
768+
Assert.assertFalse(BoundingBox.isWraparound(-180, 180));
769+
770+
// Test with infinity values
771+
Assert.assertFalse(BoundingBox.isWraparound(Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY));
772+
Assert.assertFalse(BoundingBox.isWraparound(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY));
773+
Assert.assertFalse(BoundingBox.isWraparound(Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY));
774+
Assert.assertFalse(BoundingBox.isWraparound(Double.NEGATIVE_INFINITY, Double.NEGATIVE_INFINITY));
775+
776+
// Check edge cases
777+
Assert.assertFalse(BoundingBox.isWraparound(0.0, Double.POSITIVE_INFINITY));
778+
Assert.assertFalse(BoundingBox.isWraparound(Double.NEGATIVE_INFINITY, 0.0));
779+
}
780+
781+
@Test
782+
public void testWraparoundHandlingInMerge() {
783+
// Test with two normal boxes
784+
BoundingBox box1 = new BoundingBox(10, 20, 10, 20, 0, 0, 0, 0);
785+
BoundingBox box2 = new BoundingBox(15, 25, 15, 25, 0, 0, 0, 0);
786+
box1.merge(box2);
787+
788+
Assert.assertTrue(box1.isValid());
789+
Assert.assertEquals(10.0, box1.getXMin(), 0.0);
790+
Assert.assertEquals(25.0, box1.getXMax(), 0.0);
791+
792+
// Test with one wraparound box
793+
BoundingBox normalBox = new BoundingBox(0, 10, 0, 10, 0, 0, 0, 0);
794+
BoundingBox wraparoundBox = new BoundingBox(170, -170, 5, 15, 0, 0, 0, 0);
795+
796+
normalBox.merge(wraparoundBox);
797+
798+
Assert.assertFalse(normalBox.isValid());
799+
Assert.assertTrue(Double.isNaN(normalBox.getXMin()));
800+
Assert.assertTrue(Double.isNaN(normalBox.getXMax()));
801+
Assert.assertEquals(0.0, normalBox.getYMin(), 0.0);
802+
Assert.assertEquals(15.0, normalBox.getYMax(), 0.0);
803+
}
804+
805+
@Test
806+
public void testWraparoundBoxMergingNormalBox() {
807+
// Create a normal bounding box
808+
BoundingBox normalBox = new BoundingBox(0, 10, 0, 10, 0, 0, 0, 0);
809+
810+
// Create a wraparound bounding box (xMin > xMax)
811+
BoundingBox wraparoundBox = new BoundingBox(170, -170, 5, 15, 0, 0, 0, 0);
812+
813+
// Merge the normal box into the wraparound box
814+
wraparoundBox.merge(normalBox);
815+
816+
// After merging, X dimension should be marked as invalid (NaN)
817+
// because we don't support merging wraparound bounds
818+
Assert.assertFalse(wraparoundBox.isValid());
819+
Assert.assertTrue(Double.isNaN(wraparoundBox.getXMin()));
820+
Assert.assertTrue(Double.isNaN(wraparoundBox.getXMax()));
821+
822+
// Y dimension should be properly merged
823+
Assert.assertEquals(0.0, wraparoundBox.getYMin(), 0.0);
824+
Assert.assertEquals(15.0, wraparoundBox.getYMax(), 0.0);
825+
}
684826
}

0 commit comments

Comments
 (0)