Skip to content

Commit 9de3793

Browse files
committed
Fix floating-point precision loss for timestamps far from epoch
Replace float arithmetic (1e3, /1000.0) with integer arithmetic (1000, //1000) in DateType serialize/deserialize and encoder to prevent millisecond precision loss for dates far from the Unix epoch (e.g. year 2300+). Add datetime_from_ms_timestamp() that operates on integer milliseconds directly, avoiding the lossy float-seconds intermediate representation. The Cython variant uses the fast timedelta_new() C API to avoid performance regression on the hot deserialization path. Fixes GH-532
1 parent 29ac4e1 commit 9de3793

10 files changed

Lines changed: 86 additions & 11 deletions

File tree

cassandra/cqltypes.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -636,24 +636,24 @@ def interpret_datestring(val):
636636
except ValueError:
637637
continue
638638
# scale seconds to millis for the raw value
639-
return (calendar.timegm(tval) + offset) * 1e3
639+
return (calendar.timegm(tval) + offset) * 1000
640640
else:
641641
raise ValueError("can't interpret %r as a date" % (val,))
642642

643643
@staticmethod
644644
def deserialize(byts, protocol_version):
645-
timestamp = int64_unpack(byts) / 1000.0
646-
return util.datetime_from_timestamp(timestamp)
645+
timestamp_ms = int64_unpack(byts)
646+
return util.datetime_from_ms_timestamp(timestamp_ms)
647647

648648
@staticmethod
649649
def serialize(v, protocol_version):
650650
try:
651651
# v is datetime
652652
timestamp_seconds = calendar.timegm(v.utctimetuple())
653-
timestamp = timestamp_seconds * 1e3 + getattr(v, 'microsecond', 0) / 1e3
653+
timestamp = timestamp_seconds * 1000 + getattr(v, 'microsecond', 0) // 1000
654654
except AttributeError:
655655
try:
656-
timestamp = calendar.timegm(v.timetuple()) * 1e3
656+
timestamp = calendar.timegm(v.timetuple()) * 1000
657657
except AttributeError:
658658
# Ints and floats are valid timestamps too
659659
if type(v) not in _number_types:

cassandra/cython_utils.pxd

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
from libc.stdint cimport int64_t
22
cdef datetime_from_timestamp(double timestamp)
3+
cdef datetime_from_ms_timestamp(int64_t timestamp_ms)

cassandra/cython_utils.pyx

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,22 @@ cdef datetime_from_timestamp(double timestamp):
6060
microseconds += <int>tmp
6161

6262
return DATETIME_EPOC + timedelta_new(days, seconds, microseconds)
63+
64+
65+
cdef datetime_from_ms_timestamp(int64_t timestamp_ms):
66+
"""
67+
Creates a datetime from a timestamp in milliseconds using integer
68+
arithmetic to preserve precision for large values.
69+
"""
70+
cdef int64_t total_seconds = timestamp_ms // 1000
71+
cdef int microseconds = <int>((timestamp_ms % 1000) * 1000)
72+
# For negative timestamps, ensure microseconds is non-negative
73+
if microseconds < 0:
74+
total_seconds -= 1
75+
microseconds += 1000000
76+
cdef int days = <int>(total_seconds // 86400)
77+
cdef int seconds = <int>(total_seconds % 86400)
78+
if seconds < 0:
79+
days -= 1
80+
seconds += 86400
81+
return DATETIME_EPOC + timedelta_new(days, seconds, microseconds)

cassandra/deserializers.pyx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ from libc.stdint cimport int32_t, uint16_t
1717

1818
include 'cython_marshal.pyx'
1919
from cassandra.buffer cimport Buffer, to_bytes, slice_buffer
20-
from cassandra.cython_utils cimport datetime_from_timestamp
20+
from cassandra.cython_utils cimport datetime_from_timestamp, datetime_from_ms_timestamp
2121

2222
from cython.view cimport array as cython_array
2323
from cassandra.tuple cimport tuple_new, tuple_set
@@ -135,8 +135,8 @@ cdef class DesCounterColumnType(DesLongType):
135135

136136
cdef class DesDateType(Deserializer):
137137
cdef deserialize(self, Buffer *buf, int protocol_version):
138-
cdef double timestamp = unpack_num[int64_t](buf) / 1000.0
139-
return datetime_from_timestamp(timestamp)
138+
cdef int64_t timestamp_ms = unpack_num[int64_t](buf)
139+
return datetime_from_ms_timestamp(timestamp_ms)
140140

141141

142142
cdef class TimestampType(DesDateType):

cassandra/encoder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def cql_encode_datetime(self, val):
142142
with millisecond precision.
143143
"""
144144
timestamp = calendar.timegm(val.utctimetuple())
145-
return str(int(timestamp * 1e3 + getattr(val, 'microsecond', 0) / 1e3))
145+
return str(timestamp * 1000 + getattr(val, 'microsecond', 0) // 1000)
146146

147147
def cql_encode_date(self, val):
148148
"""

cassandra/util.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,16 @@ def datetime_from_timestamp(timestamp):
6262
return dt
6363

6464

65+
def datetime_from_ms_timestamp(timestamp_ms):
66+
"""
67+
Creates a timezone-agnostic datetime from a timestamp in milliseconds,
68+
using integer arithmetic to preserve precision for large values.
69+
70+
:param timestamp_ms: a unix timestamp, in milliseconds (integer)
71+
"""
72+
return DATETIME_EPOC + datetime.timedelta(milliseconds=timestamp_ms)
73+
74+
6575
def utc_datetime_from_ms_timestamp(timestamp):
6676
"""
6777
Creates a UTC datetime from a timestamp in milliseconds. See

tests/unit/cython/test_utils.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,7 @@ class UtilsTest(unittest.TestCase):
2424
@cythontest
2525
def test_datetime_from_timestamp(self):
2626
utils_testhelper.test_datetime_from_timestamp()
27+
28+
@cythontest
29+
def test_datetime_from_ms_timestamp(self):
30+
utils_testhelper.test_datetime_from_ms_timestamp()

tests/unit/cython/utils_testhelper.pyx

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,23 @@
1414

1515
import datetime
1616

17-
from cassandra.cython_utils cimport datetime_from_timestamp
17+
from cassandra.cython_utils cimport datetime_from_timestamp, datetime_from_ms_timestamp
1818

1919

2020
def test_datetime_from_timestamp():
2121
assert datetime_from_timestamp(1454781157.123456) == datetime.datetime(2016, 2, 6, 17, 52, 37, 123456)
2222
# PYTHON-452
2323
assert datetime_from_timestamp(2177403010.123456) == datetime.datetime(2038, 12, 31, 10, 10, 10, 123456)
24+
25+
26+
def test_datetime_from_ms_timestamp():
27+
# epoch
28+
assert datetime_from_ms_timestamp(0) == datetime.datetime(1970, 1, 1)
29+
# positive with millisecond precision
30+
assert datetime_from_ms_timestamp(1454781157123) == datetime.datetime(2016, 2, 6, 17, 52, 37, 123000)
31+
# large positive far from epoch (GH-532)
32+
assert datetime_from_ms_timestamp(10413792000001) == datetime.datetime(2300, 1, 1, 0, 0, 0, 1000)
33+
# negative timestamp
34+
assert datetime_from_ms_timestamp(-770172256000) == datetime.datetime(1945, 8, 5, 23, 15, 44)
35+
# large negative with millisecond precision
36+
assert datetime_from_ms_timestamp(-11676095999999) == datetime.datetime(1600, 1, 1, 0, 0, 0, 1000)

tests/unit/test_time_util.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,19 @@ def test_datetime_from_timestamp(self):
3636

3737
assert util.datetime_from_timestamp(2177403010.123456) == datetime.datetime(2038, 12, 31, 10, 10, 10, 123456)
3838

39+
def test_datetime_from_ms_timestamp(self):
40+
# epoch
41+
assert util.datetime_from_ms_timestamp(0) == datetime.datetime(1970, 1, 1)
42+
# positive with millisecond precision
43+
assert util.datetime_from_ms_timestamp(1000) == datetime.datetime(1970, 1, 1, 0, 0, 1)
44+
assert util.datetime_from_ms_timestamp(1454781157123) == datetime.datetime(2016, 2, 6, 17, 52, 37, 123000)
45+
# large positive far from epoch (GH-532) - must not lose precision
46+
assert util.datetime_from_ms_timestamp(10413792000001) == datetime.datetime(2300, 1, 1, 0, 0, 0, 1000)
47+
# negative timestamp
48+
assert util.datetime_from_ms_timestamp(-770172256000) == datetime.datetime(1945, 8, 5, 23, 15, 44)
49+
# large negative with millisecond precision
50+
assert util.datetime_from_ms_timestamp(-11676095999999) == datetime.datetime(1600, 1, 1, 0, 0, 0, 1000)
51+
3952
def test_times_from_uuid1(self):
4053
node = uuid.getnode()
4154
now = time.time()

tests/unit/test_types.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from cassandra.cqltypes import (
2525
CassandraType, DateRangeType, DateType, DecimalType,
2626
EmptyValue, LongType, SetType, UTF8Type,
27-
cql_typename, int8_pack, int64_pack, lookup_casstype,
27+
cql_typename, int8_pack, int64_pack, int64_unpack, lookup_casstype,
2828
lookup_casstype_simple, parse_casstype_args,
2929
int32_pack, Int32Type, ListType, MapType, VectorType,
3030
FloatType
@@ -241,6 +241,21 @@ def test_datetype(self):
241241
expected = 2177403010.123
242242
assert DateType.deserialize(int64_pack(int(1000 * expected)), 0) == datetime.datetime(2038, 12, 31, 10, 10, 10, 123000, tzinfo=datetime.timezone.utc).replace(tzinfo=None)
243243

244+
# Large timestamp precision (GH-532) - timestamps far from epoch must
245+
# not lose precision due to floating-point conversions.
246+
# 2300-01-01 00:00:00.001 UTC
247+
ts_ms = 10413792000001
248+
deserialized = DateType.deserialize(int64_pack(ts_ms), 0)
249+
assert deserialized == datetime.datetime(2300, 1, 1, 0, 0, 0, 1000)
250+
# Round-trip: serialize the deserialized datetime back to milliseconds
251+
assert int64_unpack(DateType.serialize(deserialized, 0)) == ts_ms
252+
253+
# Negative large timestamp: 1600-01-01 00:00:00.001 UTC
254+
ts_ms_neg = -11676096000000 + 1 # -11676095999999
255+
deserialized_neg = DateType.deserialize(int64_pack(ts_ms_neg), 0)
256+
assert deserialized_neg == datetime.datetime(1600, 1, 1, 0, 0, 0, 1000)
257+
assert int64_unpack(DateType.serialize(deserialized_neg, 0)) == ts_ms_neg
258+
244259
def test_collection_null_support(self):
245260
"""
246261
Test that null values in collection are decoded properly.

0 commit comments

Comments
 (0)