5050 varint_pack , varint_unpack , point_be , point_le ,
5151 vints_pack , vints_unpack , uvint_unpack , uvint_pack )
5252from cassandra import util
53+ from cassandra .cython_deps import HAVE_NUMPY
54+
55+ if HAVE_NUMPY :
56+ import numpy as np
5357
5458_little_endian_flag = 1 # we always serialize LE
5559import ipaddress
@@ -1434,6 +1438,7 @@ class VectorType(_CassandraType):
14341438 subtype = None
14351439 _vector_struct = None # Cached struct.Struct for bulk deserialization
14361440 _struct_format_map = {} # Populated after FloatType etc. are defined
1441+ _numpy_dtype = None # Cached numpy dtype string for large vector deserialization
14371442
14381443 @classmethod
14391444 def serial_size (cls ):
@@ -1447,12 +1452,14 @@ def apply_parameters(cls, params, names):
14471452 vsize = params [1 ]
14481453 # Cache a struct.Struct for bulk deserialization of known numeric types
14491454 vector_struct = None
1455+ numpy_dtype = None
14501456 for base_type , fmt_char in cls ._struct_format_map .items ():
14511457 if subtype is base_type or (isinstance (subtype , type ) and issubclass (subtype , base_type )):
14521458 vector_struct = struct .Struct (f'>{ vsize } { fmt_char } ' )
1459+ numpy_dtype = cls ._numpy_dtype_map .get (fmt_char )
14531460 break
14541461 return type ('%s(%s)' % (cls .cass_parameterized_type_with ([]), vsize ), (cls ,),
1455- {'vector_size' : vsize , 'subtype' : subtype , '_vector_struct' : vector_struct })
1462+ {'vector_size' : vsize , 'subtype' : subtype , '_vector_struct' : vector_struct , '_numpy_dtype' : numpy_dtype })
14561463
14571464 @classmethod
14581465 def deserialize (cls , byts , protocol_version ):
@@ -1469,13 +1476,8 @@ def deserialize(cls, byts, protocol_version):
14691476 # For large vectors with numpy: use numpy.frombuffer (1.3-1.5x faster for 128+ elements)
14701477 # Threshold at 32 elements balances simplicity with performance
14711478 if cls ._vector_struct is not None :
1472- use_numpy = HAVE_NUMPY and cls .vector_size >= 32
1473- if use_numpy :
1474- _dtype_map = {'f' : '>f4' , 'd' : '>f8' , 'i' : '>i4' , 'q' : '>i8' }
1475- fmt_char = cls ._vector_struct .format [- 1 :]
1476- numpy_dtype = _dtype_map .get (fmt_char )
1477- if numpy_dtype is not None :
1478- return np .frombuffer (byts , dtype = numpy_dtype , count = cls .vector_size ).tolist ()
1479+ if HAVE_NUMPY and cls .vector_size >= 32 and cls ._numpy_dtype is not None :
1480+ return np .frombuffer (byts , dtype = cls ._numpy_dtype , count = cls .vector_size ).tolist ()
14791481 return list (cls ._vector_struct .unpack (byts ))
14801482 # Fallback: element-by-element deserialization for other fixed-size types
14811483 result = [None ] * cls .vector_size
@@ -1555,3 +1557,6 @@ def cql_parameterized_type(cls):
15551557 LongType : 'q' ,
15561558 ShortType : 'h' ,
15571559}
1560+
1561+ # Map struct format chars to numpy dtype strings for large vector deserialization
1562+ VectorType ._numpy_dtype_map = {'f' : '>f4' , 'd' : '>f8' , 'i' : '>i4' , 'q' : '>i8' , 'h' : '>i2' }
0 commit comments