Skip to content

Commit b9185ec

Browse files
ebyhrkevinjqliu
andauthored
Fix NotStartsWith residual evaluation to return correct result (#3503)
# Rationale for this change - Fixes #3496 ## Are these changes tested? Yes - the added test fails if we revert this PR's change. ## Are there any user-facing changes? Yes - this PR fixes a correctness issue of `NotStartsWith`. --------- Co-authored-by: Kevin Liu <kevinjqliu@users.noreply.github.com> Co-authored-by: Kevin Liu <kevin.jq.liu@gmail.com>
1 parent fe7c749 commit b9185ec

3 files changed

Lines changed: 28 additions & 6 deletions

File tree

pyiceberg/expressions/visitors.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1905,10 +1905,7 @@ def visit_starts_with(self, term: BoundTerm, literal: LiteralValue) -> BooleanEx
19051905
return AlwaysFalse()
19061906

19071907
def visit_not_starts_with(self, term: BoundTerm, literal: LiteralValue) -> BooleanExpression:
1908-
if not self.visit_starts_with(term, literal):
1909-
return AlwaysTrue()
1910-
else:
1911-
return AlwaysFalse()
1908+
return ~self.visit_starts_with(term, literal)
19121909

19131910
def visit_bound_predicate(self, predicate: BoundPredicate) -> BooleanExpression:
19141911
"""

tests/expressions/test_residual_evaluator.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
from pyiceberg.schema import Schema
4242
from pyiceberg.transforms import DayTransform, IdentityTransform
4343
from pyiceberg.typedef import Record
44-
from pyiceberg.types import DoubleType, FloatType, IntegerType, NestedField, TimestampType
44+
from pyiceberg.types import DoubleType, FloatType, IntegerType, NestedField, StringType, TimestampType
4545

4646

4747
def test_identity_transform_residual() -> None:
@@ -249,3 +249,29 @@ def test_not_in_timestamp() -> None:
249249
ts_day += 3 # type: ignore
250250
residual = res_eval.residual_for(Record(ts_day))
251251
assert residual == AlwaysTrue()
252+
253+
254+
def test_starts_with() -> None:
255+
schema = Schema(NestedField(1, "x", StringType()))
256+
spec = PartitionSpec(PartitionField(1, 1001, IdentityTransform(), "x_part"))
257+
258+
predicate = StartsWith("x", "a")
259+
res_eval = residual_evaluator_of(spec=spec, expr=predicate, case_sensitive=True, schema=schema)
260+
261+
assert res_eval.residual_for(Record("bb")) == AlwaysFalse()
262+
assert res_eval.residual_for(Record("abc")) == AlwaysTrue()
263+
assert res_eval.residual_for(Record("a")) == AlwaysTrue()
264+
assert res_eval.residual_for(Record("zoo")) == AlwaysFalse()
265+
266+
267+
def test_not_starts_with() -> None:
268+
schema = Schema(NestedField(1, "x", StringType()))
269+
spec = PartitionSpec(PartitionField(1, 1001, IdentityTransform(), "x_part"))
270+
271+
predicate = NotStartsWith("x", "a")
272+
res_eval = residual_evaluator_of(spec=spec, expr=predicate, case_sensitive=True, schema=schema)
273+
274+
assert res_eval.residual_for(Record("bb")) == AlwaysTrue()
275+
assert res_eval.residual_for(Record("abc")) == AlwaysFalse()
276+
assert res_eval.residual_for(Record("a")) == AlwaysFalse()
277+
assert res_eval.residual_for(Record("zoo")) == AlwaysTrue()

tests/test_transforms.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1030,7 +1030,6 @@ def test_projection_truncate_string_not_starts_with_longer_literal(bound_referen
10301030
assert TruncateTransform(2).project("name", BoundNotStartsWith(term=bound_reference_str, literal=literal("hello"))) is None
10311031

10321032

1033-
def test_projection_truncate_string_not_starts_with_shorter_literal(bound_reference_str: BoundReference) -> None:
10341033
def test_projection_truncate_string_not_starts_with_equal_width_literal(bound_reference_str: BoundReference) -> None:
10351034
# Valid projection: improve NOT STARTS WITH "he" to partition != "he".
10361035
assert TruncateTransform(2).project(

0 commit comments

Comments
 (0)