Skip to content

Commit a0be7e0

Browse files
committed
Truncate - length > width
1 parent a1e12ad commit a0be7e0

2 files changed

Lines changed: 16 additions & 1 deletion

File tree

pyiceberg/transforms.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -812,6 +812,10 @@ def project(self, name: str, pred: BoundPredicate) -> UnboundPredicate | None:
812812
if isinstance(pred, BoundLiteralPredicate):
813813
return _truncate_number(name, pred, self.transform(field_type))
814814
elif isinstance(field_type, (BinaryType, StringType)):
815+
if isinstance(pred, BoundNotStartsWith) and len(pred.literal.value) > self.width:
816+
# A prefix longer than the width can't be projected: the truncated partition
817+
# holds both matching and non-matching rows, so it cannot be pruned.
818+
return None
815819
if isinstance(pred, BoundLiteralPredicate):
816820
return _truncate_array(name, pred, self.transform(field_type))
817821

tests/test_transforms.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1026,10 +1026,21 @@ def test_projection_truncate_string_starts_with(bound_reference_str: BoundRefere
10261026

10271027

10281028
def test_projection_truncate_string_not_starts_with(bound_reference_str: BoundReference) -> None:
1029+
# shorter than width: projects to not-starts-with on the untruncated prefix
10291030
assert TruncateTransform(2).project(
1030-
"name", BoundNotStartsWith(term=bound_reference_str, literal=literal("hello"))
1031+
"name", BoundNotStartsWith(term=bound_reference_str, literal=literal("h"))
1032+
) == NotStartsWith(term="name", literal=literal("h"))
1033+
1034+
# equal to width: projects to not-starts-with on the full prefix
1035+
assert TruncateTransform(2).project(
1036+
"name", BoundNotStartsWith(term=bound_reference_str, literal=literal("he"))
10311037
) == NotStartsWith(term="name", literal=literal("he"))
10321038

1039+
# longer than width: can't be projected, so the partition is always read
1040+
assert (
1041+
TruncateTransform(2).project("name", BoundNotStartsWith(term=bound_reference_str, literal=literal("hello"))) is None
1042+
)
1043+
10331044

10341045
def _test_projection(lhs: UnboundPredicate | None, rhs: UnboundPredicate | None) -> None:
10351046
assert type(lhs) is type(lhs), f"Different classes: {type(lhs)} != {type(rhs)}"

0 commit comments

Comments
 (0)