1313# limitations under the License.
1414
1515
16- from libc.stdint cimport int32_t, uint16_t, uint32_t
16+ from libc.stdint cimport int32_t, int64_t, int16_t, uint16_t, uint32_t
17+ from libc.string cimport memcpy
1718
1819include ' cython_marshal.pyx'
19- from cassandra.buffer cimport Buffer, to_bytes, slice_buffer
20+ from cassandra.buffer cimport Buffer, to_bytes, from_ptr_and_size
2021from cassandra.cython_utils cimport datetime_from_timestamp, datetime_from_ms_timestamp
2122
2223from cython.view cimport array as cython_array
@@ -29,6 +30,11 @@ from uuid import UUID
2930from cassandra import cqltypes
3031from cassandra import util
3132
33+ # Import numpy availability flag and conditionally import numpy
34+ from cassandra.cython_deps import HAVE_NUMPY
35+ if HAVE_NUMPY:
36+ import numpy as np
37+
3238cdef class Deserializer:
3339 """ Cython-based deserializer class for a cqltype"""
3440
@@ -182,9 +188,240 @@ cdef class DesVarcharType(DesUTF8Type):
182188 pass
183189
184190
191+ # --------------------------------------------------------------------------
192+ # Vector deserialization
193+
194+ cdef inline bint _is_float_type(object subtype):
195+ return subtype is cqltypes.FloatType or issubclass (subtype, cqltypes.FloatType)
196+
197+ cdef inline bint _is_double_type(object subtype):
198+ return subtype is cqltypes.DoubleType or issubclass (subtype, cqltypes.DoubleType)
199+
200+ cdef inline bint _is_int32_type(object subtype):
201+ return subtype is cqltypes.Int32Type or issubclass (subtype, cqltypes.Int32Type)
202+
203+ cdef inline bint _is_int64_type(object subtype):
204+ return subtype is cqltypes.LongType or issubclass (subtype, cqltypes.LongType)
205+
206+ cdef inline bint _is_int16_type(object subtype):
207+ return subtype is cqltypes.ShortType or issubclass (subtype, cqltypes.ShortType)
208+
209+ cdef inline list _deserialize_numpy_vector(Buffer * buf, int vector_size, str dtype):
210+ """ Unified numpy deserialization for large vectors"""
211+ return np.frombuffer(buf.ptr[:buf.size], dtype = dtype, count = vector_size).tolist()
212+
213+ cdef class DesVectorType(Deserializer):
214+ """
215+ Optimized Cython deserializer for VectorType.
216+
217+ For float and double vectors, uses direct memory access with C-level casting
218+ for significantly better performance than Python-level deserialization.
219+ """
220+
221+ cdef int vector_size
222+ cdef object subtype
223+
224+ def __init__ (self , cqltype ):
225+ super ().__init__(cqltype)
226+ self .vector_size = cqltype.vector_size
227+ self .subtype = cqltype.subtype
228+
229+ def deserialize_bytes (self , bytes data , int protocol_version ):
230+ """ Python-callable wrapper for deserialize that takes bytes."""
231+ cdef Buffer buf
232+ buf.ptr = < char * > data
233+ buf.size = len (data)
234+ return self .deserialize(& buf, protocol_version)
235+
236+ cdef deserialize(self , Buffer * buf, int protocol_version):
237+ cdef int expected_size
238+ cdef int elem_size
239+ cdef bint use_numpy = HAVE_NUMPY and self .vector_size >= 32
240+
241+ # Determine element type, size, and dispatch appropriately
242+ if _is_float_type(self .subtype):
243+ elem_size = 4
244+ expected_size = self .vector_size * elem_size
245+ if buf.size == expected_size:
246+ if use_numpy:
247+ return _deserialize_numpy_vector(buf, self .vector_size, ' >f4' )
248+ return self ._deserialize_float(buf)
249+ raise ValueError (
250+ f" Expected vector of type {self.subtype.typename} and dimension {self.vector_size} "
251+ f" to have serialized size {expected_size}; observed serialized size of {buf.size} instead" )
252+ elif _is_double_type(self .subtype):
253+ elem_size = 8
254+ expected_size = self .vector_size * elem_size
255+ if buf.size == expected_size:
256+ if use_numpy:
257+ return _deserialize_numpy_vector(buf, self .vector_size, ' >f8' )
258+ return self ._deserialize_double(buf)
259+ raise ValueError (
260+ f" Expected vector of type {self.subtype.typename} and dimension {self.vector_size} "
261+ f" to have serialized size {expected_size}; observed serialized size of {buf.size} instead" )
262+ elif _is_int32_type(self .subtype):
263+ elem_size = 4
264+ expected_size = self .vector_size * elem_size
265+ if buf.size == expected_size:
266+ if use_numpy:
267+ return _deserialize_numpy_vector(buf, self .vector_size, ' >i4' )
268+ return self ._deserialize_int32(buf)
269+ raise ValueError (
270+ f" Expected vector of type {self.subtype.typename} and dimension {self.vector_size} "
271+ f" to have serialized size {expected_size}; observed serialized size of {buf.size} instead" )
272+ elif _is_int64_type(self .subtype):
273+ elem_size = 8
274+ expected_size = self .vector_size * elem_size
275+ if buf.size == expected_size:
276+ if use_numpy:
277+ return _deserialize_numpy_vector(buf, self .vector_size, ' >i8' )
278+ return self ._deserialize_int64(buf)
279+ raise ValueError (
280+ f" Expected vector of type {self.subtype.typename} and dimension {self.vector_size} "
281+ f" to have serialized size {expected_size}; observed serialized size of {buf.size} instead" )
282+ elif _is_int16_type(self .subtype):
283+ elem_size = 2
284+ expected_size = self .vector_size * elem_size
285+ if buf.size == expected_size:
286+ if use_numpy:
287+ return _deserialize_numpy_vector(buf, self .vector_size, ' >i2' )
288+ return self ._deserialize_int16(buf)
289+ raise ValueError (
290+ f" Expected vector of type {self.subtype.typename} and dimension {self.vector_size} "
291+ f" to have serialized size {expected_size}; observed serialized size of {buf.size} instead" )
292+ else :
293+ # Unsupported type, use generic deserialization
294+ return self ._deserialize_generic(buf, protocol_version)
295+
296+ cdef inline list _deserialize_float(self , Buffer * buf):
297+ """ Deserialize float vector using direct C-level access with byte swapping"""
298+ cdef Py_ssize_t i
299+ cdef list result
300+ cdef float temp
301+ cdef uint32_t temp32
302+
303+ result = [None ] * self .vector_size
304+ for i in range (self .vector_size):
305+ # Copy to aligned local, then convert from big-endian
306+ memcpy(& temp32, buf.ptr + i * 4 , 4 )
307+ temp32 = ntohl(temp32)
308+ temp = (< float * > & temp32)[0 ]
309+ result[i] = temp
310+
311+ return result
312+
313+ cdef inline list _deserialize_double(self , Buffer * buf):
314+ """ Deserialize double vector using direct C-level access with byte swapping"""
315+ cdef Py_ssize_t i
316+ cdef list result
317+ cdef double temp
318+ cdef char * src_bytes
319+ cdef char * out_bytes
320+ cdef int j
321+
322+ result = [None ] * self .vector_size
323+ for i in range (self .vector_size):
324+ src_bytes = buf.ptr + i * 8
325+ out_bytes = < char * > & temp
326+
327+ # Swap bytes for big-endian to native conversion
328+ if is_little_endian:
329+ for j in range (8 ):
330+ out_bytes[7 - j] = src_bytes[j]
331+ else :
332+ memcpy(& temp, src_bytes, 8 )
333+
334+ result[i] = temp
335+
336+ return result
337+
338+ cdef inline list _deserialize_int32(self , Buffer * buf):
339+ """ Deserialize int32 vector using direct C-level access with ntohl"""
340+ cdef Py_ssize_t i
341+ cdef list result
342+ cdef int32_t temp
343+ cdef uint32_t temp32
344+
345+ result = [None ] * self .vector_size
346+ for i in range (self .vector_size):
347+ # Copy to aligned local, then convert from big-endian
348+ memcpy(& temp32, buf.ptr + i * 4 , 4 )
349+ temp = < int32_t> ntohl(temp32)
350+ result[i] = temp
351+
352+ return result
353+
354+ cdef inline list _deserialize_int64(self , Buffer * buf):
355+ """ Deserialize int64/long vector using direct C-level access with byte swapping"""
356+ cdef Py_ssize_t i
357+ cdef list result
358+ cdef int64_t temp
359+ cdef char * src_bytes
360+ cdef char * out_bytes
361+ cdef int j
362+
363+ result = [None ] * self .vector_size
364+ for i in range (self .vector_size):
365+ src_bytes = buf.ptr + i * 8
366+ out_bytes = < char * > & temp
367+
368+ # Swap bytes for big-endian to native conversion
369+ if is_little_endian:
370+ for j in range (8 ):
371+ out_bytes[7 - j] = src_bytes[j]
372+ else :
373+ memcpy(& temp, src_bytes, 8 )
374+
375+ result[i] = temp
376+
377+ return result
378+
379+ cdef inline list _deserialize_int16(self , Buffer * buf):
380+ """ Deserialize int16/short vector using direct C-level access with ntohs"""
381+ cdef Py_ssize_t i
382+ cdef list result
383+ cdef int16_t temp
384+
385+ result = [None ] * self .vector_size
386+ for i in range (self .vector_size):
387+ temp = < int16_t> ntohs((< uint16_t* > (buf.ptr + i * 2 ))[0 ])
388+ result[i] = temp
389+
390+ return result
391+
392+ cdef inline list _deserialize_generic(self , Buffer * buf, int protocol_version):
393+ """ Fallback: element-by-element deserialization for non-optimized types"""
394+ cdef Py_ssize_t i
395+ cdef Buffer elem_buf
396+ cdef int offset = 0
397+ cdef list result = [None ] * self .vector_size
398+
399+ _serialized_size = self .subtype.serial_size()
400+ if _serialized_size is None :
401+ raise ValueError (
402+ f" VectorType with variable-size subtype {self.subtype.typename} "
403+ " is not supported in Cython deserializer" )
404+ cdef int serialized_size = < int > _serialized_size
405+
406+ # Validate total size before processing
407+ cdef int expected_size = self .vector_size * serialized_size
408+ if buf.size != expected_size:
409+ raise ValueError (
410+ f" Expected vector of type {self.subtype.typename} and dimension {self.vector_size} "
411+ f" to have serialized size {expected_size}; observed serialized size of {buf.size} instead" )
412+
413+ for i in range (self .vector_size):
414+ from_ptr_and_size(buf.ptr + offset, serialized_size, & elem_buf)
415+ result[i] = self .subtype.deserialize(to_bytes(& elem_buf), protocol_version)
416+ offset += serialized_size
417+
418+ return result
419+
420+
185421
186422cdef class _DesParameterizedType(Deserializer):
187423
424+
188425 cdef object subtypes
189426 cdef Deserializer[::1 ] deserializers
190427 cdef Py_ssize_t subtypes_len
@@ -511,6 +748,8 @@ cpdef Deserializer find_deserializer(cqltype):
511748 cls = DesReversedType
512749 elif issubclass (cqltype, cqltypes.FrozenType):
513750 cls = DesFrozenType
751+ elif issubclass (cqltype, cqltypes.VectorType):
752+ cls = DesVectorType
514753 else :
515754 cls = GenericDeserializer
516755
0 commit comments