@@ -1430,6 +1430,8 @@ class VectorType(_CassandraType):
14301430 typename = 'org.apache.cassandra.db.marshal.VectorType'
14311431 vector_size = 0
14321432 subtype = None
1433+ _vector_struct = None # Cached struct.Struct for bulk deserialization
1434+ _struct_format_map = {} # Populated after FloatType etc. are defined
14331435
14341436 @classmethod
14351437 def serial_size (cls ):
@@ -1441,7 +1443,14 @@ def apply_parameters(cls, params, names):
14411443 assert len (params ) == 2
14421444 subtype = lookup_casstype (params [0 ])
14431445 vsize = params [1 ]
1444- return type ('%s(%s)' % (cls .cass_parameterized_type_with ([]), vsize ), (cls ,), {'vector_size' : vsize , 'subtype' : subtype })
1446+ # Cache a struct.Struct for bulk deserialization of known numeric types
1447+ vector_struct = None
1448+ for base_type , fmt_char in cls ._struct_format_map .items ():
1449+ if subtype is base_type or (isinstance (subtype , type ) and issubclass (subtype , base_type )):
1450+ vector_struct = struct .Struct (f'>{ vsize } { fmt_char } ' )
1451+ break
1452+ return type ('%s(%s)' % (cls .cass_parameterized_type_with ([]), vsize ), (cls ,),
1453+ {'vector_size' : vsize , 'subtype' : subtype , '_vector_struct' : vector_struct })
14451454
14461455 @classmethod
14471456 def deserialize (cls , byts , protocol_version ):
@@ -1452,25 +1461,60 @@ def deserialize(cls, byts, protocol_version):
14521461 raise ValueError (
14531462 "Expected vector of type {0} and dimension {1} to have serialized size {2}; observed serialized size of {3} instead" \
14541463 .format (cls .subtype .typename , cls .vector_size , expected_byte_size , len (byts )))
1455- indexes = (serialized_size * x for x in range (0 , cls .vector_size ))
1456- return [cls .subtype .deserialize (byts [idx :idx + serialized_size ], protocol_version ) for idx in indexes ]
14571464
1465+ # Optimization: bulk deserialization for common numeric types
1466+ # For small vectors: use cached struct.Struct (avoids per-call format string allocation)
1467+ # For large vectors with numpy: use numpy.frombuffer (1.3-1.5x faster for 128+ elements)
1468+ # Threshold at 32 elements balances simplicity with performance
1469+ if cls ._vector_struct is not None :
1470+ use_numpy = HAVE_NUMPY and cls .vector_size >= 32
1471+ if use_numpy :
1472+ _dtype_map = {'f' : '>f4' , 'd' : '>f8' , 'i' : '>i4' , 'q' : '>i8' }
1473+ fmt_char = cls ._vector_struct .format [- 1 :]
1474+ numpy_dtype = _dtype_map .get (fmt_char )
1475+ if numpy_dtype is not None :
1476+ return np .frombuffer (byts , dtype = numpy_dtype , count = cls .vector_size ).tolist ()
1477+ return list (cls ._vector_struct .unpack (byts ))
1478+ # Fallback: element-by-element deserialization for other fixed-size types
1479+ result = [None ] * cls .vector_size
1480+ subtype_deserialize = cls .subtype .deserialize
1481+ offset = 0
1482+ for i in range (cls .vector_size ):
1483+ result [i ] = subtype_deserialize (byts [offset :offset + serialized_size ], protocol_version )
1484+ offset += serialized_size
1485+ return result
1486+
1487+ # Variable-size subtype path
1488+ result = [None ] * cls .vector_size
14581489 idx = 0
1459- rv = []
1460- while (len (rv ) < cls .vector_size ):
1490+ byts_len = len (byts )
1491+ subtype_deserialize = cls .subtype .deserialize
1492+
1493+ for i in range (cls .vector_size ):
1494+ if idx >= byts_len :
1495+ raise ValueError ("Error reading additional data during vector deserialization after successfully adding {} elements" \
1496+ .format (i ))
1497+
14611498 try :
14621499 size , bytes_read = uvint_unpack (byts [idx :])
1463- idx += bytes_read
1464- rv .append (cls .subtype .deserialize (byts [idx :idx + size ], protocol_version ))
1465- idx += size
1466- except :
1500+ except (IndexError , KeyError ):
14671501 raise ValueError ("Error reading additional data during vector deserialization after successfully adding {} elements" \
1468- .format (len (rv )))
1502+ .format (i ))
1503+
1504+ idx += bytes_read
14691505
1470- # If we have any additional data in the serialized vector treat that as an error as well
1471- if idx < len (byts ):
1506+ if idx + size > byts_len :
1507+ raise ValueError ("Error reading additional data during vector deserialization after successfully adding {} elements" \
1508+ .format (i ))
1509+
1510+ result [i ] = subtype_deserialize (byts [idx :idx + size ], protocol_version )
1511+ idx += size
1512+
1513+ # Check for additional data
1514+ if idx < byts_len :
14721515 raise ValueError ("Additional bytes remaining after vector deserialization completed" )
1473- return rv
1516+
1517+ return result
14741518
14751519 @classmethod
14761520 def serialize (cls , v , protocol_version ):
@@ -1481,6 +1525,9 @@ def serialize(cls, v, protocol_version):
14811525 .format (cls .vector_size , cls .subtype .typename , v_length ))
14821526
14831527 serialized_size = cls .subtype .serial_size ()
1528+ # Bulk serialization for known numeric types (symmetric with struct.unpack in deserialize)
1529+ if cls ._vector_struct is not None and serialized_size is not None :
1530+ return cls ._vector_struct .pack (* v )
14841531 buf = io .BytesIO ()
14851532 for item in v :
14861533 item_bytes = cls .subtype .serialize (item , protocol_version )
@@ -1492,3 +1539,13 @@ def serialize(cls, v, protocol_version):
14921539 @classmethod
14931540 def cql_parameterized_type (cls ):
14941541 return "%s<%s, %s>" % (cls .typename , cls .subtype .cql_parameterized_type (), cls .vector_size )
1542+
1543+
1544+ # Populate VectorType._struct_format_map now that all types are defined
1545+ VectorType ._struct_format_map = {
1546+ FloatType : 'f' ,
1547+ DoubleType : 'd' ,
1548+ Int32Type : 'i' ,
1549+ LongType : 'q' ,
1550+ ShortType : 'h' ,
1551+ }
0 commit comments