Skip to content

Commit 71629c1

Browse files
authored
API, Core: Avoid boxing of integer in evaluators / simplify ManifestReader (#13589)
1 parent c9154bd commit 71629c1

3 files changed

Lines changed: 30 additions & 56 deletions

File tree

api/src/main/java/org/apache/iceberg/expressions/InclusiveMetricsEvaluator.java

Lines changed: 16 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ public <T> Boolean isNull(Bound<T> term) {
139139
// if the column has no null values, the expression cannot match
140140
if (isNonNullPreserving(term)) {
141141
// number of non-nulls is the same as for the ref
142-
Integer id = term.ref().fieldId();
142+
int id = term.ref().fieldId();
143143
if (!mayContainNull(id)) {
144144
return ROWS_CANNOT_MATCH;
145145
}
@@ -154,7 +154,7 @@ public <T> Boolean notNull(Bound<T> term) {
154154
// if the column has no non-null values, the expression cannot match
155155

156156
// all terms are null preserving. see #isNullPreserving(Bound)
157-
Integer id = term.ref().fieldId();
157+
int id = term.ref().fieldId();
158158
if (containsNullsOnly(id)) {
159159
return ROWS_CANNOT_MATCH;
160160
}
@@ -166,7 +166,7 @@ public <T> Boolean notNull(Bound<T> term) {
166166
public <T> Boolean isNaN(Bound<T> term) {
167167
// when there's no nanCounts information, but we already know the column only contains null,
168168
// it's guaranteed that there's no NaN value
169-
Integer id = term.ref().fieldId();
169+
int id = term.ref().fieldId();
170170
if (containsNullsOnly(id)) {
171171
return ROWS_CANNOT_MATCH;
172172
}
@@ -189,7 +189,7 @@ public <T> Boolean notNaN(Bound<T> term) {
189189
return ROWS_MIGHT_MATCH;
190190
}
191191

192-
Integer id = term.ref().fieldId();
192+
int id = term.ref().fieldId();
193193

194194
if (containsNaNsOnly(id)) {
195195
return ROWS_CANNOT_MATCH;
@@ -201,7 +201,7 @@ public <T> Boolean notNaN(Bound<T> term) {
201201
@Override
202202
public <T> Boolean lt(Bound<T> term, Literal<T> lit) {
203203
// all terms are null preserving. see #isNullPreserving(Bound)
204-
Integer id = term.ref().fieldId();
204+
int id = term.ref().fieldId();
205205
if (containsNullsOnly(id) || containsNaNsOnly(id)) {
206206
return ROWS_CANNOT_MATCH;
207207
}
@@ -228,7 +228,7 @@ public <T> Boolean lt(Bound<T> term, Literal<T> lit) {
228228
@Override
229229
public <T> Boolean ltEq(Bound<T> term, Literal<T> lit) {
230230
// all terms are null preserving. see #isNullPreserving(Bound)
231-
Integer id = term.ref().fieldId();
231+
int id = term.ref().fieldId();
232232
if (containsNullsOnly(id) || containsNaNsOnly(id)) {
233233
return ROWS_CANNOT_MATCH;
234234
}
@@ -255,7 +255,7 @@ public <T> Boolean ltEq(Bound<T> term, Literal<T> lit) {
255255
@Override
256256
public <T> Boolean gt(Bound<T> term, Literal<T> lit) {
257257
// all terms are null preserving. see #isNullPreserving(Bound)
258-
Integer id = term.ref().fieldId();
258+
int id = term.ref().fieldId();
259259
if (containsNullsOnly(id) || containsNaNsOnly(id)) {
260260
return ROWS_CANNOT_MATCH;
261261
}
@@ -276,7 +276,7 @@ public <T> Boolean gt(Bound<T> term, Literal<T> lit) {
276276
@Override
277277
public <T> Boolean gtEq(Bound<T> term, Literal<T> lit) {
278278
// all terms are null preserving. see #isNullPreserving(Bound)
279-
Integer id = term.ref().fieldId();
279+
int id = term.ref().fieldId();
280280
if (containsNullsOnly(id) || containsNaNsOnly(id)) {
281281
return ROWS_CANNOT_MATCH;
282282
}
@@ -297,7 +297,7 @@ public <T> Boolean gtEq(Bound<T> term, Literal<T> lit) {
297297
@Override
298298
public <T> Boolean eq(Bound<T> term, Literal<T> lit) {
299299
// all terms are null preserving. see #isNullPreserving(Bound)
300-
Integer id = term.ref().fieldId();
300+
int id = term.ref().fieldId();
301301
if (containsNullsOnly(id) || containsNaNsOnly(id)) {
302302
return ROWS_CANNOT_MATCH;
303303
}
@@ -333,7 +333,7 @@ public <T> Boolean notEq(Bound<T> term, Literal<T> lit) {
333333
@Override
334334
public <T> Boolean in(Bound<T> term, Set<T> literalSet) {
335335
// all terms are null preserving. see #isNullPreserving(Bound)
336-
Integer id = term.ref().fieldId();
336+
int id = term.ref().fieldId();
337337
if (containsNullsOnly(id) || containsNaNsOnly(id)) {
338338
return ROWS_CANNOT_MATCH;
339339
}
@@ -392,7 +392,7 @@ public <T> Boolean startsWith(Bound<T> term, Literal<T> lit) {
392392
return ROWS_MIGHT_MATCH;
393393
}
394394

395-
Integer id = term.ref().fieldId();
395+
int id = term.ref().fieldId();
396396
if (containsNullsOnly(id)) {
397397
return ROWS_CANNOT_MATCH;
398398
}
@@ -431,7 +431,7 @@ public <T> Boolean startsWith(Bound<T> term, Literal<T> lit) {
431431
@Override
432432
public <T> Boolean notStartsWith(Bound<T> term, Literal<T> lit) {
433433
// the only transforms that produce strings are truncate and identity, which work with this
434-
Integer id = term.ref().fieldId();
434+
int id = term.ref().fieldId();
435435
if (mayContainNull(id)) {
436436
return ROWS_MIGHT_MATCH;
437437
}
@@ -515,7 +515,7 @@ private <T> T upperBound(Bound<T> term) {
515515
}
516516

517517
private <T> T parseLowerBound(BoundReference<T> ref) {
518-
Integer id = ref.fieldId();
518+
int id = ref.fieldId();
519519
if (lowerBounds != null && lowerBounds.containsKey(id)) {
520520
return Conversions.fromByteBuffer(ref.ref().type(), lowerBounds.get(id));
521521
}
@@ -524,7 +524,7 @@ private <T> T parseLowerBound(BoundReference<T> ref) {
524524
}
525525

526526
private <T> T parseUpperBound(BoundReference<T> ref) {
527-
Integer id = ref.fieldId();
527+
int id = ref.fieldId();
528528
if (upperBounds != null && upperBounds.containsKey(id)) {
529529
return Conversions.fromByteBuffer(ref.ref().type(), upperBounds.get(id));
530530
}
@@ -553,7 +553,7 @@ private <S, T> T transformUpperBound(BoundTransform<S, T> boundTransform) {
553553
}
554554

555555
private <T> T extractLowerBound(BoundExtract<T> bound) {
556-
Integer id = bound.ref().fieldId();
556+
int id = bound.ref().fieldId();
557557
if (lowerBounds != null && lowerBounds.containsKey(id)) {
558558
VariantObject fieldLowerBounds = parseBounds(lowerBounds.get(id));
559559
return VariantExpressionUtil.castTo(fieldLowerBounds.get(bound.path()), bound.type());
@@ -563,7 +563,7 @@ private <T> T extractLowerBound(BoundExtract<T> bound) {
563563
}
564564

565565
private <T> T extractUpperBound(BoundExtract<T> bound) {
566-
Integer id = bound.ref().fieldId();
566+
int id = bound.ref().fieldId();
567567
if (upperBounds != null && upperBounds.containsKey(id)) {
568568
VariantObject fieldUpperBounds = parseBounds(upperBounds.get(id));
569569
return VariantExpressionUtil.castTo(fieldUpperBounds.get(bound.path()), bound.type());
@@ -572,22 +572,6 @@ private <T> T extractUpperBound(BoundExtract<T> bound) {
572572
return null;
573573
}
574574

575-
/** Returns true if the expression term produces a null value for a null input. */
576-
// private boolean isNullPreserving(Bound<?> term) {
577-
// if (term instanceof BoundReference) {
578-
// return true;
579-
// } else if (term instanceof BoundTransform<?, ?>) {
580-
// // transforms must map null to null
581-
// return true;
582-
// } else if (term instanceof BoundExtract) {
583-
// // a null variant contains no non-null values
584-
// return true;
585-
// }
586-
//
587-
// // unknown cases are not null preserving
588-
// return false;
589-
// }
590-
591575
/** Returns true if the expression term produces a non-null value for non-null input. */
592576
private boolean isNonNullPreserving(Bound<?> term) {
593577
if (term instanceof BoundReference) {

api/src/main/java/org/apache/iceberg/expressions/StrictMetricsEvaluator.java

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ public <T> Boolean notNaN(BoundReference<T> ref) {
196196
@Override
197197
public <T> Boolean lt(BoundReference<T> ref, Literal<T> lit) {
198198
// Rows must match when: <----------Min----Max---X------->
199-
Integer id = ref.fieldId();
199+
int id = ref.fieldId();
200200
if (isNestedColumn(id)) {
201201
return ROWS_MIGHT_NOT_MATCH;
202202
}
@@ -220,7 +220,7 @@ public <T> Boolean lt(BoundReference<T> ref, Literal<T> lit) {
220220
@Override
221221
public <T> Boolean ltEq(BoundReference<T> ref, Literal<T> lit) {
222222
// Rows must match when: <----------Min----Max---X------->
223-
Integer id = ref.fieldId();
223+
int id = ref.fieldId();
224224
if (isNestedColumn(id)) {
225225
return ROWS_MIGHT_NOT_MATCH;
226226
}
@@ -244,7 +244,7 @@ public <T> Boolean ltEq(BoundReference<T> ref, Literal<T> lit) {
244244
@Override
245245
public <T> Boolean gt(BoundReference<T> ref, Literal<T> lit) {
246246
// Rows must match when: <-------X---Min----Max---------->
247-
Integer id = ref.fieldId();
247+
int id = ref.fieldId();
248248
if (isNestedColumn(id)) {
249249
return ROWS_MIGHT_NOT_MATCH;
250250
}
@@ -273,7 +273,7 @@ public <T> Boolean gt(BoundReference<T> ref, Literal<T> lit) {
273273
@Override
274274
public <T> Boolean gtEq(BoundReference<T> ref, Literal<T> lit) {
275275
// Rows must match when: <-------X---Min----Max---------->
276-
Integer id = ref.fieldId();
276+
int id = ref.fieldId();
277277
if (isNestedColumn(id)) {
278278
return ROWS_MIGHT_NOT_MATCH;
279279
}
@@ -302,7 +302,7 @@ public <T> Boolean gtEq(BoundReference<T> ref, Literal<T> lit) {
302302
@Override
303303
public <T> Boolean eq(BoundReference<T> ref, Literal<T> lit) {
304304
// Rows must match when Min == X == Max
305-
Integer id = ref.fieldId();
305+
int id = ref.fieldId();
306306
if (isNestedColumn(id)) {
307307
return ROWS_MIGHT_NOT_MATCH;
308308
}
@@ -338,7 +338,7 @@ public <T> Boolean eq(BoundReference<T> ref, Literal<T> lit) {
338338
@Override
339339
public <T> Boolean notEq(BoundReference<T> ref, Literal<T> lit) {
340340
// Rows must match when X < Min or Max < X because it is not in the range
341-
Integer id = ref.fieldId();
341+
int id = ref.fieldId();
342342
if (isNestedColumn(id)) {
343343
return ROWS_MIGHT_NOT_MATCH;
344344
}
@@ -375,7 +375,7 @@ public <T> Boolean notEq(BoundReference<T> ref, Literal<T> lit) {
375375

376376
@Override
377377
public <T> Boolean in(BoundReference<T> ref, Set<T> literalSet) {
378-
Integer id = ref.fieldId();
378+
int id = ref.fieldId();
379379
if (isNestedColumn(id)) {
380380
return ROWS_MIGHT_NOT_MATCH;
381381
}
@@ -415,7 +415,7 @@ public <T> Boolean in(BoundReference<T> ref, Set<T> literalSet) {
415415

416416
@Override
417417
public <T> Boolean notIn(BoundReference<T> ref, Set<T> literalSet) {
418-
Integer id = ref.fieldId();
418+
int id = ref.fieldId();
419419
if (isNestedColumn(id)) {
420420
return ROWS_MIGHT_NOT_MATCH;
421421
}

core/src/main/java/org/apache/iceberg/ManifestReader.java

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -256,11 +256,11 @@ private CloseableIterable<ManifestEntry<F>> entries(boolean onlyLive) {
256256
}
257257

258258
private boolean hasRowFilter() {
259-
return rowFilter != null && rowFilter != Expressions.alwaysTrue();
259+
return rowFilter != alwaysTrue();
260260
}
261261

262262
private boolean hasPartitionFilter() {
263-
return partFilter != null && partFilter != Expressions.alwaysTrue();
263+
return partFilter != alwaysTrue();
264264
}
265265

266266
private boolean inPartitionSet(F fileToCheck) {
@@ -340,32 +340,22 @@ private Evaluator evaluator() {
340340
if (lazyEvaluator == null) {
341341
Expression projected = Projections.inclusive(spec, caseSensitive).project(rowFilter);
342342
Expression finalPartFilter = Expressions.and(projected, partFilter);
343-
if (finalPartFilter != null) {
344-
this.lazyEvaluator = new Evaluator(spec.partitionType(), finalPartFilter, caseSensitive);
345-
} else {
346-
this.lazyEvaluator =
347-
new Evaluator(spec.partitionType(), Expressions.alwaysTrue(), caseSensitive);
348-
}
343+
this.lazyEvaluator = new Evaluator(spec.partitionType(), finalPartFilter, caseSensitive);
349344
}
350345
return lazyEvaluator;
351346
}
352347

353348
private InclusiveMetricsEvaluator metricsEvaluator() {
354349
if (lazyMetricsEvaluator == null) {
355-
if (rowFilter != null) {
356-
this.lazyMetricsEvaluator =
357-
new InclusiveMetricsEvaluator(spec.schema(), rowFilter, caseSensitive);
358-
} else {
359-
this.lazyMetricsEvaluator =
360-
new InclusiveMetricsEvaluator(spec.schema(), Expressions.alwaysTrue(), caseSensitive);
361-
}
350+
this.lazyMetricsEvaluator =
351+
new InclusiveMetricsEvaluator(spec.schema(), rowFilter, caseSensitive);
362352
}
363353
return lazyMetricsEvaluator;
364354
}
365355

366356
private static boolean requireStatsProjection(Expression rowFilter, Collection<String> columns) {
367357
// Make sure we have all stats columns for metrics evaluator
368-
return rowFilter != Expressions.alwaysTrue()
358+
return rowFilter != alwaysTrue()
369359
&& columns != null
370360
&& !columns.containsAll(ManifestReader.ALL_COLUMNS)
371361
&& !columns.containsAll(STATS_COLUMNS);

0 commit comments

Comments
 (0)