Skip to content

Commit d8ffe0e

Browse files
committed
address some pr comments
- to string format - z and m valid checking - warn with abort
1 parent 819b3e3 commit d8ffe0e

4 files changed

Lines changed: 43 additions & 9 deletions

File tree

parquet-column/src/main/java/org/apache/parquet/column/statistics/geometry/BoundingBox.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,30 @@ public boolean isValid() {
9090
return !(Double.isNaN(xMin) || Double.isNaN(xMax) || Double.isNaN(yMin) || Double.isNaN(yMax));
9191
}
9292

93+
/**
94+
* Checks if the Z dimension of the bounding box is valid.
95+
* The Z dimension is considered valid if zMin and zMax are not infinite
96+
* and don't contain NaN values.
97+
*
98+
* @return true if the Z dimension is valid, false otherwise.
99+
*/
100+
public boolean isZValid() {
101+
return !(Double.isInfinite(zMin) || Double.isInfinite(zMax) ||
102+
Double.isNaN(zMin) || Double.isNaN(zMax));
103+
}
104+
105+
/**
106+
* Checks if the M dimension of the bounding box is valid.
107+
* The M dimension is considered valid if mMin and mMax are not infinite
108+
* and don't contain NaN values.
109+
*
110+
* @return true if the M dimension is valid, false otherwise.
111+
*/
112+
public boolean isMValid() {
113+
return !(Double.isInfinite(mMin) || Double.isInfinite(mMax) ||
114+
Double.isNaN(mMin) || Double.isNaN(mMax));
115+
}
116+
93117
/**
94118
* Checks if the bounding box is empty.
95119
* A bounding box is considered empty if any bounds are in their initial state

parquet-column/src/main/java/org/apache/parquet/column/statistics/geometry/GeospatialStatistics.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,20 +18,22 @@
1818
*/
1919
package org.apache.parquet.column.statistics.geometry;
2020

21-
import java.util.Objects;
2221
import org.apache.parquet.Preconditions;
2322
import org.apache.parquet.io.api.Binary;
2423
import org.apache.parquet.schema.LogicalTypeAnnotation;
2524
import org.apache.parquet.schema.PrimitiveType;
2625
import org.locationtech.jts.geom.Geometry;
2726
import org.locationtech.jts.io.ParseException;
2827
import org.locationtech.jts.io.WKBReader;
28+
import org.slf4j.Logger;
29+
import org.slf4j.LoggerFactory;
2930

3031
/**
3132
* A structure for capturing metadata for estimating the unencoded,
3233
* uncompressed size of geospatial data written.
3334
*/
3435
public class GeospatialStatistics {
36+
private static final Logger LOG = LoggerFactory.getLogger(GeospatialStatistics.class);
3537

3638
// Metadata that may impact the statistics calculation
3739
private final BoundingBox boundingBox;
@@ -92,6 +94,7 @@ public void update(Binary value) {
9294
Geometry geom = reader.read(value.getBytes());
9395
update(geom);
9496
} catch (ParseException e) {
97+
LOG.warn("Failed to parse WKB geometry, aborting statistics update", e);
9598
abort();
9699
}
97100
}
@@ -183,11 +186,10 @@ public GeospatialTypes getGeospatialTypes() {
183186
* @return whether the statistics has valid value.
184187
*/
185188
public boolean isValid() {
186-
if (boundingBox == null && geospatialTypes == null) {
189+
if (boundingBox == null || geospatialTypes == null) {
187190
return false;
188191
}
189-
return Objects.requireNonNull(this.boundingBox).isValid()
190-
&& Objects.requireNonNull(this.geospatialTypes).isValid();
192+
return boundingBox.isValid() && geospatialTypes.isValid();
191193
}
192194

193195
public void merge(GeospatialStatistics other) {

parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1301,12 +1301,20 @@ LogicalTypeToken getType() {
13011301
@Override
13021302
protected String typeParametersAsString() {
13031303
StringBuilder sb = new StringBuilder();
1304+
1305+
boolean hasCrs = crs != null && !crs.isEmpty();
1306+
boolean hasEdgeAlgorithm = edgeAlgorithm != null;
1307+
1308+
if (!hasCrs && !hasEdgeAlgorithm) {
1309+
return ""; // Return empty string when both are empty
1310+
}
1311+
13041312
sb.append("(");
1305-
if (crs != null && !crs.isEmpty()) {
1313+
if (hasCrs) {
13061314
sb.append(crs);
13071315
}
1308-
if (edgeAlgorithm != null) {
1309-
if (crs != null && !crs.isEmpty()) sb.append(",");
1316+
if (hasEdgeAlgorithm) {
1317+
if (hasCrs) sb.append(",");
13101318
sb.append(edgeAlgorithm);
13111319
}
13121320
sb.append(")");

parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -824,12 +824,12 @@ private static BoundingBox toParquetBoundingBox(org.apache.parquet.column.statis
824824
formatBbox.setYmin(bbox.getYMin());
825825
formatBbox.setYmax(bbox.getYMax());
826826

827-
if (Double.isInfinite(bbox.getZMin()) && Double.isInfinite(bbox.getZMax())) {
827+
if (bbox.isZValid()) {
828828
formatBbox.setZmin(bbox.getZMin());
829829
formatBbox.setZmax(bbox.getZMax());
830830
}
831831

832-
if (Double.isInfinite(bbox.getMMin()) && Double.isInfinite(bbox.getMMax())) {
832+
if (bbox.isMValid()) {
833833
formatBbox.setMmin(bbox.getMMin());
834834
formatBbox.setMmax(bbox.getMMax());
835835
}

0 commit comments

Comments
 (0)