Fix ResidualVisitor null handling for comparisons and not-NaN

tanmayrauth · tanmayrauth · commit fceabb32141a · 2026-06-14T18:00:19.000+05:30
ResidualVisitor diverged from row-level expression evaluation on null values: - visit_less_than / visit_less_than_or_equal / visit_greater_than / visit_greater_than_or_equal compared the partition value to the literal directly. A nullable identity-partitioned column with a None partition value raised a TypeError (None < literal), while _ExpressionEvaluator guards with "value is not None" and treats the row as non-matching. Add the same guard so a null partition value yields AlwaysFalse instead of crashing during scan planning (ResidualEvaluator.residual_for). - visit_not_nan returned AlwaysFalse for a None value because None is not a SupportsFloat, whereas _ExpressionEvaluator.visit_not_nan (val == val) treats null as satisfying not-NaN. Invert the check so only NaN fails not-NaN and null (and any non-float value) passes, matching row evaluation. Update the test that encoded the old NotNaN(None) -> AlwaysFalse result and add a regression test covering None partition values for all four ordering comparisons. Fixes #3498 (partially)
diff --git a/pyiceberg/expressions/visitors.py b/pyiceberg/expressions/visitors.py
@@ -1844,31 +1844,37 @@ def visit_is_nan(self, term: BoundTerm) -> BooleanExpression:
 
     def visit_not_nan(self, term: BoundTerm) -> BooleanExpression:
         val = term.eval(self.struct)
-        if isinstance(val, SupportsFloat) and not math.isnan(val):
-            return self.visit_true()
-        else:
+        # Mirror _ExpressionEvaluator.visit_not_nan (val == val): only NaN fails not-NaN.
+        # A null (and any non-float value) is not NaN, so the predicate holds.
+        if isinstance(val, SupportsFloat) and math.isnan(val):
             return self.visit_false()
+        else:
+            return self.visit_true()
 
     def visit_less_than(self, term: BoundTerm, literal: LiteralValue) -> BooleanExpression:
-        if term.eval(self.struct) < literal.value:
+        value = term.eval(self.struct)
+        if value is not None and value < literal.value:
             return self.visit_true()
         else:
             return self.visit_false()
 
     def visit_less_than_or_equal(self, term: BoundTerm, literal: LiteralValue) -> BooleanExpression:
-        if term.eval(self.struct) <= literal.value:
+        value = term.eval(self.struct)
+        if value is not None and value <= literal.value:
             return self.visit_true()
         else:
             return self.visit_false()
 
     def visit_greater_than(self, term: BoundTerm, literal: LiteralValue) -> BooleanExpression:
-        if term.eval(self.struct) > literal.value:
+        value = term.eval(self.struct)
+        if value is not None and value > literal.value:
             return self.visit_true()
         else:
             return self.visit_false()
 
     def visit_greater_than_or_equal(self, term: BoundTerm, literal: LiteralValue) -> BooleanExpression:
-        if term.eval(self.struct) >= literal.value:
+        value = term.eval(self.struct)
+        if value is not None and value >= literal.value:
             return self.visit_true()
         else:
             return self.visit_false()
diff --git a/tests/expressions/test_residual_evaluator.py b/tests/expressions/test_residual_evaluator.py
@@ -28,6 +28,7 @@
     IsNaN,
     IsNull,
     LessThan,
+    LessThanOrEqual,
     NotIn,
     NotNaN,
     NotNull,
@@ -211,7 +212,7 @@ def test_is_not_nan() -> None:
     res_eval = residual_evaluator_of(spec=spec, expr=predicate, case_sensitive=True, schema=schema)
 
     residual = res_eval.residual_for(Record(None))
-    assert residual == AlwaysFalse()
+    assert residual == AlwaysTrue()
 
     residual = res_eval.residual_for(Record(2))
     assert residual == AlwaysTrue()
@@ -223,12 +224,26 @@ def test_is_not_nan() -> None:
     res_eval = residual_evaluator_of(spec=spec, expr=predicate, case_sensitive=True, schema=schema)
 
     residual = res_eval.residual_for(Record(None))
-    assert residual == AlwaysFalse()
+    assert residual == AlwaysTrue()
 
     residual = res_eval.residual_for(Record(2))
     assert residual == AlwaysTrue()
 
 
+def test_comparison_residual_with_null_partition_value() -> None:
+    # Regression test for https://github.com/apache/iceberg-python/issues/3498
+    # A nullable identity-partitioned column whose partition value is None must not raise a
+    # TypeError when compared against a literal; it should behave like row evaluation, where a
+    # null value never satisfies an ordering predicate.
+    schema = Schema(NestedField(50, "x", IntegerType(), required=False), NestedField(51, "hour", IntegerType()))
+    spec = PartitionSpec(PartitionField(50, 1050, IdentityTransform(), "x_part"))
+
+    for predicate in (LessThan("x", 1), LessThanOrEqual("x", 1), GreaterThan("x", 1), GreaterThanOrEqual("x", 1)):
+        res_eval = residual_evaluator_of(spec=spec, expr=predicate, case_sensitive=True, schema=schema)
+        residual = res_eval.residual_for(Record(None))
+        assert residual == AlwaysFalse(), f"null partition value should not match {predicate}"
+
+
 def test_not_in_timestamp() -> None:
     schema = Schema(NestedField(50, "ts", TimestampType()), NestedField(51, "dateint", IntegerType()))