Skip to content

Commit f9f2de0

Browse files
committed
fix: preserve precision for large integral string conversions
1 parent 3bd5f27 commit f9f2de0

4 files changed

Lines changed: 17 additions & 6 deletions

File tree

pyiceberg/conversions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ def _(primitive_type: PrimitiveType, value_str: str) -> int:
143143
_, _, exponent = Decimal(value_str).as_tuple()
144144
if exponent != 0: # Raise if there are digits to the right of the decimal
145145
raise ValueError(f"Cannot convert partition value, value cannot have fractional digits for {primitive_type} partition")
146-
return int(float(value_str))
146+
return int(value_str)
147147

148148

149149
@partition_to_py.register(FloatType)

pyiceberg/expressions/literals.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
import struct
2525
from abc import ABC, abstractmethod
2626
from datetime import date, datetime, time
27-
from decimal import ROUND_HALF_UP, Decimal
27+
from decimal import ROUND_DOWN, ROUND_HALF_UP, Decimal
2828
from functools import singledispatchmethod
2929
from math import isnan
3030
from typing import Any, Generic
@@ -555,27 +555,27 @@ def _(self, _: StringType) -> Literal[str]:
555555
@to.register(IntegerType)
556556
def _(self, type_var: IntegerType) -> Literal[int]:
557557
try:
558-
number = int(float(self.value))
558+
number = int(Decimal(self.value).to_integral_value(rounding=ROUND_DOWN))
559559

560560
if IntegerType.max < number:
561561
return IntAboveMax()
562562
elif IntegerType.min > number:
563563
return IntBelowMin()
564564
return LongLiteral(number)
565-
except ValueError as e:
565+
except (ArithmeticError, OverflowError, ValueError) as e:
566566
raise ValueError(f"Could not convert {self.value} into a {type_var}") from e
567567

568568
@to.register(LongType)
569569
def _(self, type_var: LongType) -> Literal[int]:
570570
try:
571-
long_value = int(float(self.value))
571+
long_value = int(Decimal(self.value).to_integral_value(rounding=ROUND_DOWN))
572572
if LongType.max < long_value:
573573
return LongAboveMax()
574574
elif LongType.min > long_value:
575575
return LongBelowMin()
576576
else:
577577
return LongLiteral(long_value)
578-
except (TypeError, ValueError) as e:
578+
except (ArithmeticError, OverflowError, TypeError, ValueError) as e:
579579
raise ValueError(f"Could not convert {self.value} into a {type_var}") from e
580580

581581
@to.register(DateType)

tests/expressions/test_literals.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -845,6 +845,14 @@ def test_string_to_int_min_value() -> None:
845845
assert isinstance(literal(str(IntegerType.min - 1)).to(IntegerType()), IntBelowMin)
846846

847847

848+
def test_string_to_long_max_value_without_precision_loss() -> None:
849+
assert literal(str(LongType.max)).to(LongType()) == literal(LongType.max)
850+
851+
852+
def test_string_to_long_large_integer_without_precision_loss() -> None:
853+
assert literal("9007199254740993").to(LongType()) == literal(9007199254740993)
854+
855+
848856
def test_string_to_integer_type_invalid_value() -> None:
849857
with pytest.raises(ValueError) as e:
850858
_ = literal("abc").to(IntegerType())

tests/test_conversions.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,8 +160,11 @@ def test_unscaled_to_decimal(unscaled: int, scale: int, expected_result: Decimal
160160
(IntegerType(), "1", 1),
161161
(IntegerType(), "9999", 9999),
162162
(LongType(), "123456789", 123456789),
163+
(LongType(), "9007199254740993", 9007199254740993),
164+
(LongType(), str(LongType.max), LongType.max),
163165
(FloatType(), "1.1", 1.1),
164166
(DoubleType(), "99999.9", 99999.9),
167+
(TimestampNanoType(), "9007199254740993", 9007199254740993),
165168
(DecimalType(5, 2), "123.45", Decimal("123.45")),
166169
(StringType(), "foo", "foo"),
167170
(UUIDType(), "f79c3e09-677c-4bbd-a479-3f349cb785e7", uuid.UUID("f79c3e09-677c-4bbd-a479-3f349cb785e7")),

0 commit comments

Comments
 (0)