Skip to content

Commit 1ecb5b6

Browse files
committed
improve handling null, empty, and nan cases in boundingbox
1 parent 92f45c2 commit 1ecb5b6

2 files changed

Lines changed: 148 additions & 14 deletions

File tree

parquet-column/src/main/java/org/apache/parquet/column/statistics/geometry/BoundingBox.java

Lines changed: 44 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -89,9 +89,20 @@ public double getMMax() {
8989
// Method to update the bounding box with the coordinates of a Geometry object
9090
// geometry can be changed by this method
9191
void update(Geometry geometry, String crs) {
92+
if (geometry == null) {
93+
// If geometry is null, abort
94+
abort();
95+
return;
96+
}
97+
if (geometry.isEmpty()) {
98+
// For empty geometries, keep track of the geometry type but set bounds to empty interval
99+
return; // Keep the initial -Inf/Inf state for bounds
100+
}
101+
92102
if (shouldNormalizeLongitude(crs)) {
93103
GeospatialUtils.normalizeLongitude(geometry);
94104
}
105+
95106
Envelope envelope = geometry.getEnvelopeInternal();
96107
double minX = envelope.getMinX();
97108
double minY = envelope.getMinY();
@@ -106,24 +117,43 @@ void update(Geometry geometry, String crs) {
106117

107118
Coordinate[] coordinates = geometry.getCoordinates();
108119
for (Coordinate coord : coordinates) {
109-
if (!Double.isNaN(coord.getZ())) {
110-
minZ = Math.min(minZ, coord.getZ());
111-
maxZ = Math.max(maxZ, coord.getZ());
112-
}
113-
if (!Double.isNaN(coord.getM())) {
114-
minM = Math.min(minM, coord.getM());
115-
maxM = Math.max(maxM, coord.getM());
120+
// Skip NaN values
121+
if (!Double.isNaN(coord.x) && !Double.isNaN(coord.y)) {
122+
if (!Double.isNaN(coord.getZ())) {
123+
minZ = Math.min(minZ, coord.getZ());
124+
maxZ = Math.max(maxZ, coord.getZ());
125+
}
126+
if (!Double.isNaN(coord.getM())) {
127+
minM = Math.min(minM, coord.getM());
128+
maxM = Math.max(maxM, coord.getM());
129+
}
116130
}
117131
}
132+
// Only update bounds if we have valid coordinates
133+
if (!Double.isNaN(minX) && !Double.isNaN(maxX)) {
134+
updateXBounds(minX, maxX, allowWraparound);
135+
}
118136

119-
updateXBounds(minX, maxX, allowWraparound);
137+
if (!Double.isNaN(minY)) {
138+
yMin = Math.min(yMin, minY);
139+
}
140+
if (!Double.isNaN(maxY)) {
141+
yMax = Math.max(yMax, maxY);
142+
}
143+
144+
if (minZ != Double.POSITIVE_INFINITY) {
145+
zMin = Math.min(zMin, minZ);
146+
}
147+
if (maxZ != Double.NEGATIVE_INFINITY) {
148+
zMax = Math.max(zMax, maxZ);
149+
}
120150

121-
yMin = Math.min(yMin, minY);
122-
yMax = Math.max(yMax, maxY);
123-
zMin = Math.min(zMin, minZ);
124-
zMax = Math.max(zMax, maxZ);
125-
mMin = Math.min(mMin, minM);
126-
mMax = Math.max(mMax, maxM);
151+
if (minM != Double.POSITIVE_INFINITY) {
152+
mMin = Math.min(mMin, minM);
153+
}
154+
if (maxM != Double.NEGATIVE_INFINITY) {
155+
mMax = Math.max(mMax, maxM);
156+
}
127157
}
128158

129159
void merge(BoundingBox other) {

parquet-column/src/test/java/org/apache/parquet/column/statistics/geometry/TestBoundingBox.java

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,4 +85,108 @@ public void testWraparound() {
8585
Assert.assertEquals(-10.0, boundingBox.getYMin(), 0.0);
8686
Assert.assertEquals(25.0, boundingBox.getYMax(), 0.0);
8787
}
88+
89+
@Test
90+
public void testEmptyGeometry() {
91+
GeometryFactory geometryFactory = new GeometryFactory();
92+
BoundingBox boundingBox = new BoundingBox();
93+
94+
// Create an empty point
95+
Point emptyPoint = geometryFactory.createPoint();
96+
boundingBox.update(emptyPoint, "EPSG:4326");
97+
98+
// Empty geometry should retain the initial -Inf/Inf state
99+
Assert.assertEquals(Double.POSITIVE_INFINITY, boundingBox.getXMin(), 0.0);
100+
Assert.assertEquals(Double.NEGATIVE_INFINITY, boundingBox.getXMax(), 0.0);
101+
Assert.assertEquals(Double.POSITIVE_INFINITY, boundingBox.getYMin(), 0.0);
102+
Assert.assertEquals(Double.NEGATIVE_INFINITY, boundingBox.getYMax(), 0.0);
103+
104+
// Test that after adding a non-empty geometry, values are updated correctly
105+
Point point = geometryFactory.createPoint(new Coordinate(10, 20));
106+
boundingBox.update(point, "EPSG:4326");
107+
Assert.assertEquals(10.0, boundingBox.getXMin(), 0.0);
108+
Assert.assertEquals(10.0, boundingBox.getXMax(), 0.0);
109+
Assert.assertEquals(20.0, boundingBox.getYMin(), 0.0);
110+
Assert.assertEquals(20.0, boundingBox.getYMax(), 0.0);
111+
112+
// Update with another empty geometry, should not change the bounds
113+
boundingBox.update(emptyPoint, "EPSG:4326");
114+
Assert.assertEquals(10.0, boundingBox.getXMin(), 0.0);
115+
Assert.assertEquals(10.0, boundingBox.getXMax(), 0.0);
116+
Assert.assertEquals(20.0, boundingBox.getYMin(), 0.0);
117+
Assert.assertEquals(20.0, boundingBox.getYMax(), 0.0);
118+
}
119+
120+
@Test
121+
public void testNaNCoordinates() {
122+
GeometryFactory geometryFactory = new GeometryFactory();
123+
BoundingBox boundingBox = new BoundingBox();
124+
125+
// Create a point with NaN coordinates
126+
Point nanPoint = geometryFactory.createPoint(new Coordinate(Double.NaN, Double.NaN));
127+
boundingBox.update(nanPoint, "EPSG:4326");
128+
129+
// NaN values should be ignored, maintaining the initial -Inf/Inf state
130+
Assert.assertEquals(Double.POSITIVE_INFINITY, boundingBox.getXMin(), 0.0);
131+
Assert.assertEquals(Double.NEGATIVE_INFINITY, boundingBox.getXMax(), 0.0);
132+
Assert.assertEquals(Double.POSITIVE_INFINITY, boundingBox.getYMin(), 0.0);
133+
Assert.assertEquals(Double.NEGATIVE_INFINITY, boundingBox.getYMax(), 0.0);
134+
135+
// Create a mixed point with a valid coordinate and a NaN coordinate
136+
Point mixedPoint = geometryFactory.createPoint(new Coordinate(15.0, Double.NaN));
137+
boundingBox.update(mixedPoint, "EPSG:4326");
138+
139+
// The valid X coordinate should be used, but Y should still be ignored
140+
Assert.assertEquals(15.0, boundingBox.getXMin(), 0.0);
141+
Assert.assertEquals(15.0, boundingBox.getXMax(), 0.0);
142+
Assert.assertEquals(Double.POSITIVE_INFINITY, boundingBox.getYMin(), 0.0);
143+
Assert.assertEquals(Double.NEGATIVE_INFINITY, boundingBox.getYMax(), 0.0);
144+
145+
// Add a fully valid point
146+
Point validPoint = geometryFactory.createPoint(new Coordinate(10, 20));
147+
boundingBox.update(validPoint, "EPSG:4326");
148+
149+
// Both X and Y should now be updated
150+
Assert.assertEquals(10.0, boundingBox.getXMin(), 0.0);
151+
Assert.assertEquals(15.0, boundingBox.getXMax(), 0.0);
152+
Assert.assertEquals(20.0, boundingBox.getYMin(), 0.0);
153+
Assert.assertEquals(20.0, boundingBox.getYMax(), 0.0);
154+
}
155+
156+
@Test
157+
public void testNaNZAndMValues() {
158+
GeometryFactory geometryFactory = new GeometryFactory();
159+
BoundingBox boundingBox = new BoundingBox();
160+
161+
// Create a point with NaN Z value only
162+
Coordinate coord = new Coordinate(10, 20);
163+
coord.setZ(Double.NaN); // Only set Z, not M
164+
Point nanZPoint = geometryFactory.createPoint(coord);
165+
boundingBox.update(nanZPoint, "EPSG:4326");
166+
167+
// X and Y should be updated, but Z should remain -Inf/Inf
168+
Assert.assertEquals(10.0, boundingBox.getXMin(), 0.0);
169+
Assert.assertEquals(10.0, boundingBox.getXMax(), 0.0);
170+
Assert.assertEquals(20.0, boundingBox.getYMin(), 0.0);
171+
Assert.assertEquals(20.0, boundingBox.getYMax(), 0.0);
172+
Assert.assertEquals(Double.POSITIVE_INFINITY, boundingBox.getZMin(), 0.0);
173+
Assert.assertEquals(Double.NEGATIVE_INFINITY, boundingBox.getZMax(), 0.0);
174+
Assert.assertEquals(Double.POSITIVE_INFINITY, boundingBox.getMMin(), 0.0);
175+
Assert.assertEquals(Double.NEGATIVE_INFINITY, boundingBox.getMMax(), 0.0);
176+
177+
// Add a point with valid Z value
178+
Coordinate coord2 = new Coordinate(15, 25, 30); // Using constructor with Z
179+
Point validZPoint = geometryFactory.createPoint(coord2);
180+
boundingBox.update(validZPoint, "EPSG:4326");
181+
182+
// X, Y, and Z values should now be updated
183+
Assert.assertEquals(10.0, boundingBox.getXMin(), 0.0);
184+
Assert.assertEquals(15.0, boundingBox.getXMax(), 0.0);
185+
Assert.assertEquals(20.0, boundingBox.getYMin(), 0.0);
186+
Assert.assertEquals(25.0, boundingBox.getYMax(), 0.0);
187+
Assert.assertEquals(30.0, boundingBox.getZMin(), 0.0);
188+
Assert.assertEquals(30.0, boundingBox.getZMax(), 0.0);
189+
Assert.assertEquals(Double.POSITIVE_INFINITY, boundingBox.getMMin(), 0.0);
190+
Assert.assertEquals(Double.NEGATIVE_INFINITY, boundingBox.getMMax(), 0.0);
191+
}
88192
}

0 commit comments

Comments
 (0)