Skip to content

Commit 4456cd4

Browse files
committed
fix: address remaining review feedback for Cython serializers
- Fix float range check: use cast-to-float + isinf check instead of FLT_MAX comparison, matching struct.pack('>f') rounding semantics (e.g. 3.4028235e38 now correctly accepted) - Normalize vector input with tuple() so non-subscriptable iterables with __len__ + __iter__ work (matches Python VectorType.serialize) - Change vector_size from int to Py_ssize_t to prevent overflow - Fix int32 vector double-indexing: store values[i] in local variable - Update docstrings to reflect actual behavior - Add regression tests for float boundary rounding and iterable input
1 parent d382d5f commit 4456cd4

2 files changed

Lines changed: 92 additions & 17 deletions

File tree

cassandra/serializers.pyx

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -44,14 +44,19 @@ from cassandra.util import is_little_endian
4444
# ---------------------------------------------------------------------------
4545

4646
cdef inline void _check_float_range(double value) except *:
47-
"""Raise OverflowError for finite values outside float32 range.
47+
"""Raise OverflowError for finite values that overflow float32.
48+
49+
Uses the same semantics as ``struct.pack('>f', value)``: cast the
50+
double to float and reject only if the result is ±inf while the
51+
input was finite. This correctly accepts values slightly above
52+
``FLT_MAX`` that round down (e.g. 3.4028235e38).
4853
4954
Intentionally raises OverflowError (not struct.error) so callers
50-
can catch a single exception type. The accepted range matches
51-
struct.pack('>f', ...). inf, -inf, and nan pass through unchanged.
55+
can catch a single exception type. inf, -inf, and nan pass
56+
through unchanged.
5257
"""
5358
if not isinf(value) and not isnan(value):
54-
if value > <double>FLT_MAX or value < -<double>FLT_MAX:
59+
if isinf(<float>value):
5560
raise OverflowError(
5661
"Value %r too large for float32 (max %r)" % (value, FLT_MAX))
5762

@@ -179,7 +184,7 @@ cdef class SerVectorType(Serializer):
179184
per-element Python serialization.
180185
"""
181186

182-
cdef int vector_size
187+
cdef Py_ssize_t vector_size
183188
cdef object subtype
184189
# 0 = generic, 1 = float, 2 = double, 3 = int32
185190
cdef int type_code
@@ -199,7 +204,11 @@ cdef class SerVectorType(Serializer):
199204
self.type_code = 0
200205

201206
cpdef bytes serialize(self, object value, int protocol_version):
202-
cdef int v_length = len(value)
207+
# Normalize to tuple so indexing works for any iterable with __len__.
208+
# The Python VectorType.serialize() only requires len() + iteration,
209+
# so we must accept the same inputs.
210+
value = tuple(value)
211+
cdef Py_ssize_t v_length = len(value)
203212
if v_length != self.vector_size:
204213
raise ValueError(
205214
"Expected sequence of size %d for vector of type %s and "
@@ -219,9 +228,8 @@ cdef class SerVectorType(Serializer):
219228
cdef inline bytes _serialize_float(self, object values):
220229
"""Serialize a list of floats into a contiguous big-endian buffer.
221230
222-
Uses ``values[i]`` (``__getitem__``) intentionally rather than
223-
iteration so Cython can emit a tight indexed loop with minimal
224-
Python overhead. Callers must pass a sequence (list, tuple, etc.).
231+
``values`` is already a tuple (normalized in ``serialize()``), so
232+
indexing is always safe and fast.
225233
"""
226234
cdef Py_ssize_t i
227235
cdef Py_ssize_t buf_size = self.vector_size * 4
@@ -255,9 +263,8 @@ cdef class SerVectorType(Serializer):
255263
cdef inline bytes _serialize_double(self, object values):
256264
"""Serialize a list of doubles into a contiguous big-endian buffer.
257265
258-
Uses ``values[i]`` (``__getitem__``) intentionally rather than
259-
iteration so Cython can emit a tight indexed loop with minimal
260-
Python overhead. Callers must pass a sequence (list, tuple, etc.).
266+
``values`` is already a tuple (normalized in ``serialize()``), so
267+
indexing is always safe and fast.
261268
"""
262269
cdef Py_ssize_t i
263270
cdef Py_ssize_t buf_size = self.vector_size * 8
@@ -292,9 +299,8 @@ cdef class SerVectorType(Serializer):
292299
cdef inline bytes _serialize_int32(self, object values):
293300
"""Serialize a list of int32 values into a contiguous big-endian buffer.
294301
295-
Uses ``values[i]`` (``__getitem__``) intentionally rather than
296-
iteration so Cython can emit a tight indexed loop with minimal
297-
Python overhead. Callers must pass a sequence (list, tuple, etc.).
302+
``values`` is already a tuple (normalized in ``serialize()``), so
303+
indexing is always safe and fast.
298304
"""
299305
cdef Py_ssize_t i
300306
cdef Py_ssize_t buf_size = self.vector_size * 4
@@ -304,12 +310,14 @@ cdef class SerVectorType(Serializer):
304310
cdef object result = PyBytes_FromStringAndSize(NULL, buf_size)
305311
cdef char *buf = PyBytes_AS_STRING(result)
306312
cdef int32_t val
313+
cdef object item
307314
cdef char *src
308315
cdef char *dst
309316

310317
for i in range(self.vector_size):
311-
_check_int32_range(values[i])
312-
val = <int32_t>values[i]
318+
item = values[i]
319+
_check_int32_range(item)
320+
val = <int32_t>item
313321
src = <char *>&val
314322
dst = buf + i * 4
315323

tests/unit/test_serializers.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,16 @@ def test_flt_max(self):
107107
self._assert_equiv(flt_max)
108108
self._assert_equiv(-flt_max)
109109

110+
def test_flt_max_rounding_boundary(self):
111+
"""Values slightly above FLT_MAX that round down should be accepted.
112+
113+
3.4028235e38 is above FLT_MAX (3.4028234663852886e38) but
114+
struct.pack('>f', 3.4028235e38) rounds it down to FLT_MAX.
115+
The Cython serializer must accept the same inputs.
116+
"""
117+
self._assert_equiv(3.4028235e38)
118+
self._assert_equiv(-3.4028235e38)
119+
110120
def test_subnormal_values(self):
111121
"""Subnormal (denormalized) floats should serialize correctly."""
112122
self._assert_equiv(1e-45)
@@ -451,6 +461,63 @@ def test_int32_vector_round_trip(self):
451461
self.assertEqual(list(deserialized), values)
452462

453463

464+
@cythontest
465+
class TestSerVectorTypeIterableInput(unittest.TestCase):
466+
"""Verify vector serializer accepts non-subscriptable iterables.
467+
468+
The Python VectorType.serialize() only requires len() + iteration,
469+
so the Cython path must accept the same inputs after normalization.
470+
"""
471+
472+
def test_iterable_with_len_no_getitem(self):
473+
"""A custom iterable with __len__ + __iter__ but no __getitem__."""
474+
475+
class IterableOnly:
476+
def __init__(self, data):
477+
self._data = list(data)
478+
479+
def __len__(self):
480+
return len(self._data)
481+
482+
def __iter__(self):
483+
return iter(self._data)
484+
485+
vec_type = _make_vector_type(FloatType, 3)
486+
ser = SerVectorType(vec_type)
487+
values = IterableOnly([1.0, 2.0, 3.0])
488+
cython_bytes = ser.serialize(values, PROTO)
489+
python_bytes = vec_type.serialize([1.0, 2.0, 3.0], PROTO)
490+
self.assertEqual(cython_bytes, python_bytes)
491+
492+
def test_iterable_int32_vector(self):
493+
"""Non-subscriptable iterable with int32 fast-path."""
494+
495+
class IterableOnly:
496+
def __init__(self, data):
497+
self._data = list(data)
498+
499+
def __len__(self):
500+
return len(self._data)
501+
502+
def __iter__(self):
503+
return iter(self._data)
504+
505+
vec_type = _make_vector_type(Int32Type, 3)
506+
ser = SerVectorType(vec_type)
507+
values = IterableOnly([1, 2, 3])
508+
cython_bytes = ser.serialize(values, PROTO)
509+
python_bytes = vec_type.serialize([1, 2, 3], PROTO)
510+
self.assertEqual(cython_bytes, python_bytes)
511+
512+
def test_tuple_input(self):
513+
"""Tuple input should work (already subscriptable, but verify)."""
514+
vec_type = _make_vector_type(Int32Type, 3)
515+
ser = SerVectorType(vec_type)
516+
cython_bytes = ser.serialize((1, 2, 3), PROTO)
517+
python_bytes = vec_type.serialize([1, 2, 3], PROTO)
518+
self.assertEqual(cython_bytes, python_bytes)
519+
520+
454521
# ---------------------------------------------------------------------------
455522
# Factory function tests
456523
# ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)