5050 varint_pack , varint_unpack , point_be , point_le ,
5151 vints_pack , vints_unpack , uvint_unpack , uvint_pack )
5252from cassandra import util
53+ from cassandra .cython_deps import HAVE_NUMPY
54+
55+ if HAVE_NUMPY :
56+ import numpy as np
5357
5458_little_endian_flag = 1 # we always serialize LE
5559import ipaddress
@@ -1432,6 +1436,7 @@ class VectorType(_CassandraType):
14321436 subtype = None
14331437 _vector_struct = None # Cached struct.Struct for bulk deserialization
14341438 _struct_format_map = {} # Populated after FloatType etc. are defined
1439+ _numpy_dtype = None # Cached numpy dtype string for large vector deserialization
14351440
14361441 @classmethod
14371442 def serial_size (cls ):
@@ -1445,12 +1450,14 @@ def apply_parameters(cls, params, names):
14451450 vsize = params [1 ]
14461451 # Cache a struct.Struct for bulk deserialization of known numeric types
14471452 vector_struct = None
1453+ numpy_dtype = None
14481454 for base_type , fmt_char in cls ._struct_format_map .items ():
14491455 if subtype is base_type or (isinstance (subtype , type ) and issubclass (subtype , base_type )):
14501456 vector_struct = struct .Struct (f'>{ vsize } { fmt_char } ' )
1457+ numpy_dtype = cls ._numpy_dtype_map .get (fmt_char )
14511458 break
14521459 return type ('%s(%s)' % (cls .cass_parameterized_type_with ([]), vsize ), (cls ,),
1453- {'vector_size' : vsize , 'subtype' : subtype , '_vector_struct' : vector_struct })
1460+ {'vector_size' : vsize , 'subtype' : subtype , '_vector_struct' : vector_struct , '_numpy_dtype' : numpy_dtype })
14541461
14551462 @classmethod
14561463 def deserialize (cls , byts , protocol_version ):
@@ -1467,13 +1474,8 @@ def deserialize(cls, byts, protocol_version):
14671474 # For large vectors with numpy: use numpy.frombuffer (1.3-1.5x faster for 128+ elements)
14681475 # Threshold at 32 elements balances simplicity with performance
14691476 if cls ._vector_struct is not None :
1470- use_numpy = HAVE_NUMPY and cls .vector_size >= 32
1471- if use_numpy :
1472- _dtype_map = {'f' : '>f4' , 'd' : '>f8' , 'i' : '>i4' , 'q' : '>i8' }
1473- fmt_char = cls ._vector_struct .format [- 1 :]
1474- numpy_dtype = _dtype_map .get (fmt_char )
1475- if numpy_dtype is not None :
1476- return np .frombuffer (byts , dtype = numpy_dtype , count = cls .vector_size ).tolist ()
1477+ if HAVE_NUMPY and cls .vector_size >= 32 and cls ._numpy_dtype is not None :
1478+ return np .frombuffer (byts , dtype = cls ._numpy_dtype , count = cls .vector_size ).tolist ()
14771479 return list (cls ._vector_struct .unpack (byts ))
14781480 # Fallback: element-by-element deserialization for other fixed-size types
14791481 result = [None ] * cls .vector_size
@@ -1549,3 +1551,6 @@ def cql_parameterized_type(cls):
15491551 LongType : 'q' ,
15501552 ShortType : 'h' ,
15511553}
1554+
1555+ # Map struct format chars to numpy dtype strings for large vector deserialization
1556+ VectorType ._numpy_dtype_map = {'f' : '>f4' , 'd' : '>f8' , 'i' : '>i4' , 'q' : '>i8' , 'h' : '>i2' }
0 commit comments