Skip to content

Commit 910a55a

Browse files
committed
fix: add bounds checks, remove malloc, and add tests for Cython serializers
Address all 8 Copilot review comments on PR scylladb#748: - Add _check_float_range() for float overflow detection matching struct.pack - Add _check_int32_range() for int32 bounds checking before C cast - Wire bounds checks into SerFloatType, SerInt32Type, and VectorType fast-paths - Replace malloc/free with PyBytes_FromStringAndSize(NULL,n)+PyBytes_AS_STRING - Add empty vector early return (b'') before allocation - Remove unused uint32_t cimport and libc.stdlib import - Add comprehensive test suite (67 tests) covering equivalence, overflow, special values, vectors, round-trips, and factory functions
1 parent 70a0193 commit 910a55a

2 files changed

Lines changed: 604 additions & 65 deletions

File tree

cassandra/serializers.pyx

Lines changed: 95 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -26,17 +26,49 @@ For all other types, GenericSerializer delegates to the Python-level
2626
cqltype.serialize() classmethod.
2727
"""
2828

29-
from libc.stdint cimport int32_t, uint32_t
29+
from libc.stdint cimport int32_t
3030
from libc.string cimport memcpy
31-
from libc.stdlib cimport malloc, free
32-
from cpython.bytes cimport PyBytes_FromStringAndSize
31+
from libc.math cimport isinf, isnan
32+
from libc.float cimport FLT_MAX
33+
from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_AS_STRING
3334

3435
from cassandra import cqltypes
3536

3637
cdef bint is_little_endian
3738
from cassandra.util import is_little_endian
3839

3940

41+
# ---------------------------------------------------------------------------
42+
# Range-check helpers (match struct.pack error semantics)
43+
# ---------------------------------------------------------------------------
44+
45+
cdef inline void _check_float_range(double value) except *:
46+
"""Raise OverflowError for finite values outside float32 range.
47+
48+
Matches the behavior of struct.pack('>f', value), which raises
49+
struct.error for values that cannot be represented as a 32-bit
50+
IEEE 754 float. inf, -inf, and nan pass through unchanged.
51+
"""
52+
if not isinf(value) and not isnan(value):
53+
if value > <double>FLT_MAX or value < -<double>FLT_MAX:
54+
raise OverflowError(
55+
"Value %r too large for float32 (max %r)" % (value, FLT_MAX))
56+
57+
58+
cdef inline void _check_int32_range(object value) except *:
59+
"""Raise OverflowError for values outside the signed int32 range.
60+
61+
Matches the behavior of struct.pack('>i', value), which raises
62+
struct.error for values outside [-2147483648, 2147483647]. The check
63+
must be done on the Python int *before* the C-level <int32_t> cast,
64+
which would silently truncate.
65+
"""
66+
if value > 2147483647 or value < -2147483648:
67+
raise OverflowError(
68+
"Value %r out of range for int32 "
69+
"(must be between -2147483648 and 2147483647)" % (value,))
70+
71+
4072
# ---------------------------------------------------------------------------
4173
# Base class
4274
# ---------------------------------------------------------------------------
@@ -59,6 +91,7 @@ cdef class SerFloatType(Serializer):
5991
"""Serialize a Python float to 4-byte big-endian IEEE 754."""
6092

6193
cpdef bytes serialize(self, object value, int protocol_version):
94+
_check_float_range(<double>value)
6295
cdef float val = <float>value
6396
cdef char out[4]
6497
cdef char *src = <char *>&val
@@ -101,6 +134,7 @@ cdef class SerInt32Type(Serializer):
101134
"""Serialize a Python int to 4-byte big-endian signed int32."""
102135

103136
cpdef bytes serialize(self, object value, int protocol_version):
137+
_check_int32_range(value)
104138
cdef int32_t val = <int32_t>value
105139
cdef char out[4]
106140
cdef char *src = <char *>&val
@@ -184,95 +218,91 @@ cdef class SerVectorType(Serializer):
184218
"""Serialize a list of floats into a contiguous big-endian buffer."""
185219
cdef Py_ssize_t i
186220
cdef Py_ssize_t buf_size = self.vector_size * 4
187-
cdef char *buf = <char *>malloc(buf_size)
188-
if buf == NULL:
189-
raise MemoryError("Failed to allocate %d bytes for vector serialization" % buf_size)
221+
if buf_size == 0:
222+
return b""
190223

224+
cdef object result = PyBytes_FromStringAndSize(NULL, buf_size)
225+
cdef char *buf = PyBytes_AS_STRING(result)
191226
cdef float val
192227
cdef char *src
193228
cdef char *dst
194229

195-
try:
196-
for i in range(self.vector_size):
197-
val = <float>values[i]
198-
src = <char *>&val
199-
dst = buf + i * 4
230+
for i in range(self.vector_size):
231+
_check_float_range(<double>values[i])
232+
val = <float>values[i]
233+
src = <char *>&val
234+
dst = buf + i * 4
200235

201-
if is_little_endian:
202-
dst[0] = src[3]
203-
dst[1] = src[2]
204-
dst[2] = src[1]
205-
dst[3] = src[0]
206-
else:
207-
memcpy(dst, src, 4)
236+
if is_little_endian:
237+
dst[0] = src[3]
238+
dst[1] = src[2]
239+
dst[2] = src[1]
240+
dst[3] = src[0]
241+
else:
242+
memcpy(dst, src, 4)
208243

209-
return PyBytes_FromStringAndSize(buf, buf_size)
210-
finally:
211-
free(buf)
244+
return result
212245

213246
cdef inline bytes _serialize_double(self, object values):
214247
"""Serialize a list of doubles into a contiguous big-endian buffer."""
215248
cdef Py_ssize_t i
216249
cdef Py_ssize_t buf_size = self.vector_size * 8
217-
cdef char *buf = <char *>malloc(buf_size)
218-
if buf == NULL:
219-
raise MemoryError("Failed to allocate %d bytes for vector serialization" % buf_size)
250+
if buf_size == 0:
251+
return b""
220252

253+
cdef object result = PyBytes_FromStringAndSize(NULL, buf_size)
254+
cdef char *buf = PyBytes_AS_STRING(result)
221255
cdef double val
222256
cdef char *src
223257
cdef char *dst
224258

225-
try:
226-
for i in range(self.vector_size):
227-
val = <double>values[i]
228-
src = <char *>&val
229-
dst = buf + i * 8
230-
231-
if is_little_endian:
232-
dst[0] = src[7]
233-
dst[1] = src[6]
234-
dst[2] = src[5]
235-
dst[3] = src[4]
236-
dst[4] = src[3]
237-
dst[5] = src[2]
238-
dst[6] = src[1]
239-
dst[7] = src[0]
240-
else:
241-
memcpy(dst, src, 8)
242-
243-
return PyBytes_FromStringAndSize(buf, buf_size)
244-
finally:
245-
free(buf)
259+
for i in range(self.vector_size):
260+
val = <double>values[i]
261+
src = <char *>&val
262+
dst = buf + i * 8
263+
264+
if is_little_endian:
265+
dst[0] = src[7]
266+
dst[1] = src[6]
267+
dst[2] = src[5]
268+
dst[3] = src[4]
269+
dst[4] = src[3]
270+
dst[5] = src[2]
271+
dst[6] = src[1]
272+
dst[7] = src[0]
273+
else:
274+
memcpy(dst, src, 8)
275+
276+
return result
246277

247278
cdef inline bytes _serialize_int32(self, object values):
248279
"""Serialize a list of int32 values into a contiguous big-endian buffer."""
249280
cdef Py_ssize_t i
250281
cdef Py_ssize_t buf_size = self.vector_size * 4
251-
cdef char *buf = <char *>malloc(buf_size)
252-
if buf == NULL:
253-
raise MemoryError("Failed to allocate %d bytes for vector serialization" % buf_size)
282+
if buf_size == 0:
283+
return b""
254284

285+
cdef object result = PyBytes_FromStringAndSize(NULL, buf_size)
286+
cdef char *buf = PyBytes_AS_STRING(result)
255287
cdef int32_t val
256288
cdef char *src
257289
cdef char *dst
258290

259-
try:
260-
for i in range(self.vector_size):
261-
val = <int32_t>values[i]
262-
src = <char *>&val
263-
dst = buf + i * 4
264-
265-
if is_little_endian:
266-
dst[0] = src[3]
267-
dst[1] = src[2]
268-
dst[2] = src[1]
269-
dst[3] = src[0]
270-
else:
271-
memcpy(dst, src, 4)
272-
273-
return PyBytes_FromStringAndSize(buf, buf_size)
274-
finally:
275-
free(buf)
291+
for i in range(self.vector_size):
292+
_check_int32_range(values[i])
293+
val = <int32_t>values[i]
294+
src = <char *>&val
295+
dst = buf + i * 4
296+
297+
if is_little_endian:
298+
dst[0] = src[3]
299+
dst[1] = src[2]
300+
dst[2] = src[1]
301+
dst[3] = src[0]
302+
else:
303+
memcpy(dst, src, 4)
304+
305+
return result
276306

277307
cdef inline bytes _serialize_generic(self, object values, int protocol_version):
278308
"""Fallback: element-by-element Python serialization for non-optimized types."""

0 commit comments

Comments
 (0)