Skip to content

Commit 375de55

Browse files
committed
add allowWraparound in bbox to ParquetProperties
1 parent d0081be commit 375de55

4 files changed

Lines changed: 37 additions & 5 deletions

File tree

parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,9 @@ public class ParquetProperties {
6565
public static final int DEFAULT_BLOOM_FILTER_CANDIDATES_NUMBER = 5;
6666
public static final boolean DEFAULT_STATISTICS_ENABLED = true;
6767
public static final boolean DEFAULT_SIZE_STATISTICS_ENABLED = true;
68+
public static final boolean DEFAULT_GEO_STATISTICS_ENABLED = true;
6869
public static final boolean DEFAULT_GEOSPATIAL_STATISTICS_ENABLED = true;
70+
public static final boolean DEFAULT_GEO_STATISTICS_WRAPAROUNDENABLED = true;
6971

7072
public static final boolean DEFAULT_PAGE_WRITE_CHECKSUM_ENABLED = true;
7173

@@ -116,6 +118,7 @@ public static WriterVersion fromString(String name) {
116118
private final boolean statisticsEnabled;
117119
private final boolean sizeStatisticsEnabled;
118120
private final boolean geospatialStatisticsEnabled;
121+
private final boolean geospatialStatisticsBBoxWraparoundEnabled;
119122

120123
// The expected NDV (number of distinct values) for each columns
121124
private final ColumnProperty<Long> bloomFilterNDVs;
@@ -131,6 +134,7 @@ public static WriterVersion fromString(String name) {
131134
private final ColumnProperty<Boolean> statistics;
132135
private final ColumnProperty<Boolean> sizeStatistics;
133136
private final ColumnProperty<Boolean> geospatialStatistics;
137+
private final ColumnProperty<Boolean> geospatialStatisticsBBoxWraparound;
134138

135139
private ParquetProperties(Builder builder) {
136140
this.pageSizeThreshold = builder.pageSize;
@@ -151,6 +155,7 @@ private ParquetProperties(Builder builder) {
151155
this.statisticsEnabled = builder.statisticsEnabled;
152156
this.sizeStatisticsEnabled = builder.sizeStatisticsEnabled;
153157
this.geospatialStatisticsEnabled = builder.geospatialStatisticsEnabled;
158+
this.geospatialStatisticsBBoxWraparoundEnabled = builder.geospatialStatisticsBBoxWraparoundEnable;
154159
this.bloomFilterNDVs = builder.bloomFilterNDVs.build();
155160
this.bloomFilterFPPs = builder.bloomFilterFPPs.build();
156161
this.bloomFilterEnabled = builder.bloomFilterEnabled.build();
@@ -164,6 +169,7 @@ private ParquetProperties(Builder builder) {
164169
this.statistics = builder.statistics.build();
165170
this.sizeStatistics = builder.sizeStatistics.build();
166171
this.geospatialStatistics = builder.geospatialStatistics.build();
172+
this.geospatialStatisticsBBoxWraparound = builder.geospatialStatisticsBBoxWraparound.build();
167173
}
168174

169175
public static Builder builder() {
@@ -371,6 +377,10 @@ public boolean getGeoSpatialStatisticsEnabled(ColumnDescriptor column) {
371377
return geospatialStatisticsEnabled;
372378
}
373379

380+
public boolean getGeoSpatialStatisticsBBoxWraparoundEnabled(ColumnDescriptor column) {
381+
return geospatialStatisticsBBoxWraparoundEnabled;
382+
}
383+
374384
@Override
375385
public String toString() {
376386
return "Parquet page size to " + getPageSizeThreshold() + '\n'
@@ -389,7 +399,8 @@ public String toString() {
389399
+ "Page row count limit to " + getPageRowCountLimit() + '\n'
390400
+ "Writing page checksums is: " + (getPageWriteChecksumEnabled() ? "on" : "off") + '\n'
391401
+ "Statistics enabled: " + statisticsEnabled + '\n'
392-
+ "Size statistics enabled: " + sizeStatisticsEnabled;
402+
+ "Size statistics enabled: " + sizeStatisticsEnabled + '\n'
403+
+ "Geospatial statistics enabled: " + geospatialStatisticsEnabled;
393404
}
394405

395406
public static class Builder {
@@ -407,7 +418,8 @@ public static class Builder {
407418
private int statisticsTruncateLength = DEFAULT_STATISTICS_TRUNCATE_LENGTH;
408419
private boolean statisticsEnabled = DEFAULT_STATISTICS_ENABLED;
409420
private boolean sizeStatisticsEnabled = DEFAULT_SIZE_STATISTICS_ENABLED;
410-
private boolean geospatialStatisticsEnabled = DEFAULT_SIZE_STATISTICS_ENABLED;
421+
private boolean geospatialStatisticsEnabled = DEFAULT_GEO_STATISTICS_ENABLED;
422+
private boolean geospatialStatisticsBBoxWraparoundEnable = DEFAULT_GEO_STATISTICS_WRAPAROUNDENABLED;
411423
private final ColumnProperty.Builder<Long> bloomFilterNDVs;
412424
private final ColumnProperty.Builder<Double> bloomFilterFPPs;
413425
private int maxBloomFilterBytes = DEFAULT_MAX_BLOOM_FILTER_BYTES;
@@ -421,6 +433,7 @@ public static class Builder {
421433
private final ColumnProperty.Builder<Boolean> statistics;
422434
private final ColumnProperty.Builder<Boolean> sizeStatistics;
423435
private final ColumnProperty.Builder<Boolean> geospatialStatistics;
436+
private final ColumnProperty.Builder<Boolean> geospatialStatisticsBBoxWraparound;
424437

425438
private Builder() {
426439
enableDict = ColumnProperty.<Boolean>builder().withDefaultValue(DEFAULT_IS_DICTIONARY_ENABLED);
@@ -440,6 +453,8 @@ private Builder() {
440453
sizeStatistics = ColumnProperty.<Boolean>builder().withDefaultValue(DEFAULT_SIZE_STATISTICS_ENABLED);
441454
geospatialStatistics =
442455
ColumnProperty.<Boolean>builder().withDefaultValue(DEFAULT_GEOSPATIAL_STATISTICS_ENABLED);
456+
geospatialStatisticsBBoxWraparound =
457+
ColumnProperty.<Boolean>builder().withDefaultValue(DEFAULT_GEO_STATISTICS_WRAPAROUNDENABLED);
443458
}
444459

445460
private Builder(ParquetProperties toCopy) {
@@ -465,6 +480,7 @@ private Builder(ParquetProperties toCopy) {
465480
this.statistics = ColumnProperty.builder(toCopy.statistics);
466481
this.sizeStatistics = ColumnProperty.builder(toCopy.sizeStatistics);
467482
this.geospatialStatistics = ColumnProperty.builder(toCopy.geospatialStatistics);
483+
this.geospatialStatisticsBBoxWraparound = ColumnProperty.builder(toCopy.geospatialStatisticsBBoxWraparound);
468484
}
469485

470486
/**

parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnValueCollector.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ class ColumnValueCollector {
4040
private final boolean statisticsEnabled;
4141
private final boolean sizeStatisticsEnabled;
4242
private final boolean geospatialStatisticsEnabled;
43+
private final boolean geospatialStatisticsBBoxWraparoundEnabled;
4344
private BloomFilterWriter bloomFilterWriter;
4445
private BloomFilter bloomFilter;
4546
private Statistics<?> statistics;
@@ -51,6 +52,7 @@ class ColumnValueCollector {
5152
this.statisticsEnabled = props.getStatisticsEnabled(path);
5253
this.sizeStatisticsEnabled = props.getSizeStatisticsEnabled(path);
5354
this.geospatialStatisticsEnabled = props.getGeoSpatialStatisticsEnabled(path);
55+
this.geospatialStatisticsBBoxWraparoundEnabled = props.getGeoSpatialStatisticsBBoxWraparoundEnabled(path);
5456
resetPageStatistics();
5557
initBloomFilter(bloomFilterWriter, props);
5658
}
@@ -65,7 +67,7 @@ void resetPageStatistics() {
6567
: SizeStatistics.noopBuilder(
6668
path.getPrimitiveType(), path.getMaxRepetitionLevel(), path.getMaxDefinitionLevel());
6769
this.geospatialStatisticsBuilder = geospatialStatisticsEnabled
68-
? GeospatialStatistics.newBuilder(path.getPrimitiveType())
70+
? GeospatialStatistics.newBuilder(path.getPrimitiveType(), geospatialStatisticsBBoxWraparoundEnabled)
6971
: GeospatialStatistics.noopBuilder();
7072
}
7173

parquet-column/src/main/java/org/apache/parquet/column/statistics/geometry/BoundingBox.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525

2626
public class BoundingBox {
2727

28-
boolean allowWraparound = Boolean.parseBoolean(System.getenv().getOrDefault("ALLOW_BBOX_WRAPAROUND", "true"));
28+
private boolean allowWraparound = true;
2929

3030
private double xMin = Double.POSITIVE_INFINITY;
3131
private double xMax = Double.NEGATIVE_INFINITY;
@@ -50,6 +50,10 @@ public BoundingBox(
5050

5151
public BoundingBox() {}
5252

53+
void enableWraparound(boolean enable) {
54+
allowWraparound = enable;
55+
}
56+
5357
public double getXMin() {
5458
return xMin;
5559
}
@@ -216,7 +220,8 @@ public BoundingBox copy() {
216220

217221
@Override
218222
public String toString() {
219-
return "BoundingBox{" + "xMin="
223+
return "BoundingBox{" + "allowWraparound="
224+
+ allowWraparound + ", xMin="
220225
+ xMin + ", xMax="
221226
+ xMax + ", yMin="
222227
+ yMin + ", yMax="

parquet-column/src/main/java/org/apache/parquet/column/statistics/geometry/GeospatialStatistics.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,15 @@ public static GeospatialStatistics.Builder newBuilder(PrimitiveType type) {
155155
}
156156
}
157157

158+
/** Create a new GeospatialStatistics builder with the additional geospatialStatisticsBBoxWraparoundEnabled property */
159+
public static Builder newBuilder(PrimitiveType primitiveType, boolean geospatialStatisticsBBoxWraparoundEnabled) {
160+
Builder builder = newBuilder(primitiveType);
161+
if (geospatialStatisticsBBoxWraparoundEnabled) {
162+
builder.boundingBox.enableWraparound(true);
163+
}
164+
return builder;
165+
}
166+
158167
/**
159168
* Constructs a GeospatialStatistics object with the specified CRS, bounding box, and geospatial types.
160169
*

0 commit comments

Comments
 (0)