(improvement) cqltypes: Use numpy for large VectorType deserialization

mykaul · mykaul · commit 0535ecda678e · 2026-04-02T13:51:48.000+03:00
For vectors with 32 or more elements, use numpy.frombuffer() which provides
1.3-1.5x speedup for large vectors (128+ elements) compared to struct.unpack.

The hybrid approach:
- Small vectors (&lt; 32 elements): struct.unpack (2.8-3.6x faster than baseline)
- Large vectors (&gt;= 32 elements): numpy.frombuffer().tolist() (1.3-1.5x faster than struct.unpack)

Threshold of 32 elements balances code complexity with performance gains.

Benchmark results:
- float[128]:  2.15 μs → 1.87 μs (1.15x faster)
- float[384]:  6.17 μs → 4.44 μs (1.39x faster)
- float[768]: 12.25 μs → 8.45 μs (1.45x faster)
- float[1536]: 24.44 μs → 15.77 μs (1.55x faster)

Signed-off-by: Yaniv Kaul &lt;yaniv.kaul@scylladb.com&gt;
diff --git a/cassandra/cqltypes.py b/cassandra/cqltypes.py
@@ -50,6 +50,10 @@
                                varint_pack, varint_unpack, point_be, point_le,
                                vints_pack, vints_unpack, uvint_unpack, uvint_pack)
 from cassandra import util
+from cassandra.cython_deps import HAVE_NUMPY
+
+if HAVE_NUMPY:
+    import numpy as np
 
 _little_endian_flag = 1  # we always serialize LE
 import ipaddress
@@ -1434,6 +1438,7 @@ class VectorType(_CassandraType):
     subtype = None
     _vector_struct = None  # Cached struct.Struct for bulk deserialization
     _struct_format_map = {}  # Populated after FloatType etc. are defined
+    _numpy_dtype = None  # Cached numpy dtype string for large vector deserialization
 
     @classmethod
     def serial_size(cls):
@@ -1447,12 +1452,14 @@ def apply_parameters(cls, params, names):
         vsize = params[1]
         # Cache a struct.Struct for bulk deserialization of known numeric types
         vector_struct = None
+        numpy_dtype = None
         for base_type, fmt_char in cls._struct_format_map.items():
             if subtype is base_type or (isinstance(subtype, type) and issubclass(subtype, base_type)):
                 vector_struct = struct.Struct(f'>{vsize}{fmt_char}')
+                numpy_dtype = cls._numpy_dtype_map.get(fmt_char)
                 break
         return type('%s(%s)' % (cls.cass_parameterized_type_with([]), vsize), (cls,),
-                     {'vector_size': vsize, 'subtype': subtype, '_vector_struct': vector_struct})
+                     {'vector_size': vsize, 'subtype': subtype, '_vector_struct': vector_struct, '_numpy_dtype': numpy_dtype})
 
     @classmethod
     def deserialize(cls, byts, protocol_version):
@@ -1469,13 +1476,8 @@ def deserialize(cls, byts, protocol_version):
             # For large vectors with numpy: use numpy.frombuffer (1.3-1.5x faster for 128+ elements)
             # Threshold at 32 elements balances simplicity with performance
             if cls._vector_struct is not None:
-                use_numpy = HAVE_NUMPY and cls.vector_size >= 32
-                if use_numpy:
-                    _dtype_map = {'f': '>f4', 'd': '>f8', 'i': '>i4', 'q': '>i8'}
-                    fmt_char = cls._vector_struct.format[-1:]
-                    numpy_dtype = _dtype_map.get(fmt_char)
-                    if numpy_dtype is not None:
-                        return np.frombuffer(byts, dtype=numpy_dtype, count=cls.vector_size).tolist()
+                if HAVE_NUMPY and cls.vector_size >= 32 and cls._numpy_dtype is not None:
+                    return np.frombuffer(byts, dtype=cls._numpy_dtype, count=cls.vector_size).tolist()
                 return list(cls._vector_struct.unpack(byts))
             # Fallback: element-by-element deserialization for other fixed-size types
             result = [None] * cls.vector_size
@@ -1555,3 +1557,6 @@ def cql_parameterized_type(cls):
     LongType: 'q',
     ShortType: 'h',
 }
+
+# Map struct format chars to numpy dtype strings for large vector deserialization
+VectorType._numpy_dtype_map = {'f': '>f4', 'd': '>f8', 'i': '>i4', 'q': '>i8', 'h': '>i2'}