Skip to content

Commit 1ba39b5

Browse files
committed
fix(decimal): use minimal byte length for negative powers of two
1 parent a1e12ad commit 1ba39b5

2 files changed

Lines changed: 31 additions & 5 deletions

File tree

pyiceberg/utils/decimal.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,16 @@ def bytes_required(value: int | Decimal) -> int:
5858
int: the minimum number of bytes needed to serialize the value.
5959
"""
6060
if isinstance(value, int):
61-
return (value.bit_length() + 8) // 8
61+
unscaled = value
6262
elif isinstance(value, Decimal):
63-
return (decimal_to_unscaled(value).bit_length() + 8) // 8
64-
65-
raise ValueError(f"Unsupported value: {value}")
63+
unscaled = decimal_to_unscaled(value)
64+
else:
65+
raise ValueError(f"Unsupported value: {value}")
66+
67+
# bit_length() overcounts negatives equal to -2**(8k-1) (e.g. -128, -32768) by one byte;
68+
# using (unscaled + 1) for negatives yields the true minimum, matching the Iceberg spec.
69+
n_bits = unscaled.bit_length() if unscaled >= 0 else (unscaled + 1).bit_length()
70+
return (n_bits + 8) // 8
6671

6772

6873
def decimal_to_bytes(value: Decimal, byte_length: int | None = None) -> bytes:

tests/utils/test_decimal.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
import pytest
2020

21-
from pyiceberg.utils.decimal import decimal_required_bytes, decimal_to_bytes
21+
from pyiceberg.utils.decimal import bytes_required, decimal_required_bytes, decimal_to_bytes
2222

2323

2424
def test_decimal_required_bytes() -> None:
@@ -42,8 +42,29 @@ def test_decimal_required_bytes() -> None:
4242
assert "(0, 40]" in str(exc_info.value)
4343

4444

45+
def test_bytes_required() -> None:
46+
# Positive values and the negative values just past a byte boundary are unaffected.
47+
assert bytes_required(0) == 1
48+
assert bytes_required(127) == 1
49+
assert bytes_required(128) == 2
50+
assert bytes_required(-127) == 1
51+
assert bytes_required(-129) == 2
52+
# The most-negative value that fits in N bytes (-2**(8N-1)) must require exactly N bytes,
53+
# not N + 1. These are the cases the previous (value.bit_length() + 8) // 8 formula overcounted.
54+
assert bytes_required(-128) == 1
55+
assert bytes_required(-32768) == 2
56+
assert bytes_required(-8388608) == 3
57+
# The same applies when the unscaled value comes from a Decimal.
58+
assert bytes_required(Decimal("-1.28")) == 1
59+
assert bytes_required(Decimal("-327.68")) == 2
60+
61+
4562
def test_decimal_to_bytes() -> None:
4663
# Check the boundary between 2 and 3 bytes.
4764
# 2 bytes has a minimum of -32,768 and a maximum value of 32,767 (inclusive).
4865
assert decimal_to_bytes(Decimal("32767.")) == b"\x7f\xff"
4966
assert decimal_to_bytes(Decimal("32768.")) == b"\x00\x80\x00"
67+
# The most-negative value for a given width must serialize to the minimum number of bytes
68+
# (matching the Iceberg spec / Java BigInteger.toByteArray), not one byte longer.
69+
assert decimal_to_bytes(Decimal("-1.28")) == b"\x80"
70+
assert decimal_to_bytes(Decimal("-327.68")) == b"\x80\x00"

0 commit comments

Comments
 (0)