Skip to content

Commit fceabb3

Browse files
committed
Fix ResidualVisitor null handling for comparisons and not-NaN
ResidualVisitor diverged from row-level expression evaluation on null values: - visit_less_than / visit_less_than_or_equal / visit_greater_than / visit_greater_than_or_equal compared the partition value to the literal directly. A nullable identity-partitioned column with a None partition value raised a TypeError (None < literal), while _ExpressionEvaluator guards with "value is not None" and treats the row as non-matching. Add the same guard so a null partition value yields AlwaysFalse instead of crashing during scan planning (ResidualEvaluator.residual_for). - visit_not_nan returned AlwaysFalse for a None value because None is not a SupportsFloat, whereas _ExpressionEvaluator.visit_not_nan (val == val) treats null as satisfying not-NaN. Invert the check so only NaN fails not-NaN and null (and any non-float value) passes, matching row evaluation. Update the test that encoded the old NotNaN(None) -> AlwaysFalse result and add a regression test covering None partition values for all four ordering comparisons. Fixes #3498 (partially)
1 parent d101879 commit fceabb3

2 files changed

Lines changed: 30 additions & 9 deletions

File tree

pyiceberg/expressions/visitors.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1844,31 +1844,37 @@ def visit_is_nan(self, term: BoundTerm) -> BooleanExpression:
18441844

18451845
def visit_not_nan(self, term: BoundTerm) -> BooleanExpression:
18461846
val = term.eval(self.struct)
1847-
if isinstance(val, SupportsFloat) and not math.isnan(val):
1848-
return self.visit_true()
1849-
else:
1847+
# Mirror _ExpressionEvaluator.visit_not_nan (val == val): only NaN fails not-NaN.
1848+
# A null (and any non-float value) is not NaN, so the predicate holds.
1849+
if isinstance(val, SupportsFloat) and math.isnan(val):
18501850
return self.visit_false()
1851+
else:
1852+
return self.visit_true()
18511853

18521854
def visit_less_than(self, term: BoundTerm, literal: LiteralValue) -> BooleanExpression:
1853-
if term.eval(self.struct) < literal.value:
1855+
value = term.eval(self.struct)
1856+
if value is not None and value < literal.value:
18541857
return self.visit_true()
18551858
else:
18561859
return self.visit_false()
18571860

18581861
def visit_less_than_or_equal(self, term: BoundTerm, literal: LiteralValue) -> BooleanExpression:
1859-
if term.eval(self.struct) <= literal.value:
1862+
value = term.eval(self.struct)
1863+
if value is not None and value <= literal.value:
18601864
return self.visit_true()
18611865
else:
18621866
return self.visit_false()
18631867

18641868
def visit_greater_than(self, term: BoundTerm, literal: LiteralValue) -> BooleanExpression:
1865-
if term.eval(self.struct) > literal.value:
1869+
value = term.eval(self.struct)
1870+
if value is not None and value > literal.value:
18661871
return self.visit_true()
18671872
else:
18681873
return self.visit_false()
18691874

18701875
def visit_greater_than_or_equal(self, term: BoundTerm, literal: LiteralValue) -> BooleanExpression:
1871-
if term.eval(self.struct) >= literal.value:
1876+
value = term.eval(self.struct)
1877+
if value is not None and value >= literal.value:
18721878
return self.visit_true()
18731879
else:
18741880
return self.visit_false()

tests/expressions/test_residual_evaluator.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
IsNaN,
2929
IsNull,
3030
LessThan,
31+
LessThanOrEqual,
3132
NotIn,
3233
NotNaN,
3334
NotNull,
@@ -211,7 +212,7 @@ def test_is_not_nan() -> None:
211212
res_eval = residual_evaluator_of(spec=spec, expr=predicate, case_sensitive=True, schema=schema)
212213

213214
residual = res_eval.residual_for(Record(None))
214-
assert residual == AlwaysFalse()
215+
assert residual == AlwaysTrue()
215216

216217
residual = res_eval.residual_for(Record(2))
217218
assert residual == AlwaysTrue()
@@ -223,12 +224,26 @@ def test_is_not_nan() -> None:
223224
res_eval = residual_evaluator_of(spec=spec, expr=predicate, case_sensitive=True, schema=schema)
224225

225226
residual = res_eval.residual_for(Record(None))
226-
assert residual == AlwaysFalse()
227+
assert residual == AlwaysTrue()
227228

228229
residual = res_eval.residual_for(Record(2))
229230
assert residual == AlwaysTrue()
230231

231232

233+
def test_comparison_residual_with_null_partition_value() -> None:
234+
# Regression test for https://github.com/apache/iceberg-python/issues/3498
235+
# A nullable identity-partitioned column whose partition value is None must not raise a
236+
# TypeError when compared against a literal; it should behave like row evaluation, where a
237+
# null value never satisfies an ordering predicate.
238+
schema = Schema(NestedField(50, "x", IntegerType(), required=False), NestedField(51, "hour", IntegerType()))
239+
spec = PartitionSpec(PartitionField(50, 1050, IdentityTransform(), "x_part"))
240+
241+
for predicate in (LessThan("x", 1), LessThanOrEqual("x", 1), GreaterThan("x", 1), GreaterThanOrEqual("x", 1)):
242+
res_eval = residual_evaluator_of(spec=spec, expr=predicate, case_sensitive=True, schema=schema)
243+
residual = res_eval.residual_for(Record(None))
244+
assert residual == AlwaysFalse(), f"null partition value should not match {predicate}"
245+
246+
232247
def test_not_in_timestamp() -> None:
233248
schema = Schema(NestedField(50, "ts", TimestampType()), NestedField(51, "dateint", IntegerType()))
234249

0 commit comments

Comments
 (0)