Skip to content

Commit 7fb5867

Browse files
Gayathri Srividya RajavarapuGayathri Srividya Rajavarapu
authored andcommitted
fix: correct NOT STARTS WITH projection for truncated partitions
1 parent 6da06ad commit 7fb5867

2 files changed

Lines changed: 25 additions & 3 deletions

File tree

pyiceberg/transforms.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -813,7 +813,16 @@ def project(self, name: str, pred: BoundPredicate) -> UnboundPredicate | None:
813813
return _truncate_number(name, pred, self.transform(field_type))
814814
elif isinstance(field_type, (BinaryType, StringType)):
815815
if isinstance(pred, BoundLiteralPredicate):
816-
return _truncate_array(name, pred, self.transform(field_type))
816+
if isinstance(pred, BoundNotStartsWith):
817+
literal_width = len(pred.literal.value)
818+
if literal_width < self.width:
819+
return pred.as_unbound(name, pred.literal.value)
820+
elif literal_width == self.width:
821+
return NotEqualTo(name, pred.literal.value)
822+
else:
823+
return None
824+
else:
825+
return _truncate_array(name, pred, self.transform(field_type))
817826

818827
def strict_project(self, name: str, pred: BoundPredicate) -> UnboundPredicate | None:
819828
field_type = pred.term.ref().field.field_type

tests/test_transforms.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1026,9 +1026,22 @@ def test_projection_truncate_string_starts_with(bound_reference_str: BoundRefere
10261026

10271027

10281028
def test_projection_truncate_string_not_starts_with(bound_reference_str: BoundReference) -> None:
1029+
# literal_width (5) > truncate width (2): no inclusive projection possible (unsafe)
1030+
assert TruncateTransform(2).project("name", BoundNotStartsWith(term=bound_reference_str, literal=literal("hello"))) is None
1031+
1032+
1033+
def test_projection_truncate_string_not_starts_with_shorter_literal(bound_reference_str: BoundReference) -> None:
1034+
# literal_width (2) == truncate width (2): project to !=
1035+
assert TruncateTransform(2).project(
1036+
"name", BoundNotStartsWith(term=bound_reference_str, literal=literal("he"))
1037+
) == NotEqualTo(term="name", literal=literal("he"))
1038+
1039+
1040+
def test_projection_truncate_string_not_starts_with_original_literal(bound_reference_str: BoundReference) -> None:
1041+
# literal_width (1) < truncate width (2): keep original literal
10291042
assert TruncateTransform(2).project(
1030-
"name", BoundNotStartsWith(term=bound_reference_str, literal=literal("hello"))
1031-
) == NotStartsWith(term="name", literal=literal("he"))
1043+
"name", BoundNotStartsWith(term=bound_reference_str, literal=literal("h"))
1044+
) == NotStartsWith(term="name", literal=literal("h"))
10321045

10331046

10341047
def _test_projection(lhs: UnboundPredicate | None, rhs: UnboundPredicate | None) -> None:

0 commit comments

Comments
 (0)