perf: modernize varint_pack/varint_unpack with int.to_bytes/int.from_bytes

mykaul · mykaul · commit 8071a816f843 · 2026-04-05T22:44:12.000+03:00
Replace the manual string-formatting hex conversion in varint_unpack() and the byte-by-byte bytearray loop in varint_pack() with Python 3 builtins int.from_bytes() and int.to_bytes(). varint_unpack used '%02x' formatting per byte, str.join, then int(..., 16) to parse back — O(n) string allocations. int.from_bytes is a single C-level call. varint_pack used a while loop appending individual bytes to a bytearray, then reversing. int.to_bytes computes the result in one C call. Also fixes the Cython path in cython_marshal.pyx which had the same slow pattern with a TODO comment to optimize. Adapted from PR #689 (varint_unpack) with new varint_pack implementation. varint_pack medium: 643 -> 90 ns/call (7.1x faster) varint_pack large: 1109 -> 96 ns/call (11.6x faster) varint_unpack medium: 1086 -> 115 ns/call (9.4x faster) varint_unpack large: 1940 -> 146 ns/call (13.3x faster)
diff --git a/cassandra/cython_marshal.pyx b/cassandra/cython_marshal.pyx
@@ -55,16 +55,5 @@ cdef varint_unpack(Buffer *term):
     """Unpack a variable-sized integer"""
     return varint_unpack_py3(to_bytes(term))
 
-# TODO: Optimize these two functions
 cdef varint_unpack_py3(bytes term):
-    val = int(''.join(["%02x" % i for i in term]), 16)
-    if (term[0] & 128) != 0:
-        shift = len(term) * 8  # * Note below
-        val -= 1 << shift
-    return val
-
-# * Note *
-# '1 << (len(term) * 8)' Cython tries to do native
-# integer shifts, which overflows. We need this to
-# emulate Python shifting, which will expand the long
-# to accommodate
+    return int.from_bytes(term, byteorder='big', signed=True)
diff --git a/cassandra/marshal.py b/cassandra/marshal.py
@@ -21,59 +21,48 @@ def _make_packer(format_string):
     unpack = lambda s: packer.unpack(s)[0]
     return pack, unpack
 
-int64_pack, int64_unpack = _make_packer('>q')
-int32_pack, int32_unpack = _make_packer('>i')
-int16_pack, int16_unpack = _make_packer('>h')
-int8_pack, int8_unpack = _make_packer('>b')
-uint64_pack, uint64_unpack = _make_packer('>Q')
-uint32_pack, uint32_unpack = _make_packer('>I')
-uint32_le_pack, uint32_le_unpack = _make_packer('<I')
-uint16_pack, uint16_unpack = _make_packer('>H')
-uint8_pack, uint8_unpack = _make_packer('>B')
-float_pack, float_unpack = _make_packer('>f')
-double_pack, double_unpack = _make_packer('>d')
+
+int64_pack, int64_unpack = _make_packer(">q")
+int32_pack, int32_unpack = _make_packer(">i")
+int16_pack, int16_unpack = _make_packer(">h")
+int8_pack, int8_unpack = _make_packer(">b")
+uint64_pack, uint64_unpack = _make_packer(">Q")
+uint32_pack, uint32_unpack = _make_packer(">I")
+uint32_le_pack, uint32_le_unpack = _make_packer("<I")
+uint16_pack, uint16_unpack = _make_packer(">H")
+uint8_pack, uint8_unpack = _make_packer(">B")
+float_pack, float_unpack = _make_packer(">f")
+double_pack, double_unpack = _make_packer(">d")
 
 # in protocol version 3 and higher, the stream ID is two bytes
-v3_header_struct = struct.Struct('>BBhB')
+v3_header_struct = struct.Struct(">BBhB")
 v3_header_pack = v3_header_struct.pack
 v3_header_unpack = v3_header_struct.unpack
 
 
 def varint_unpack(term):
-    val = int(''.join("%02x" % i for i in term), 16)
-    if (term[0] & 128) != 0:
-        len_term = len(term)  # pulling this out of the expression to avoid overflow in cython optimized code
-        val -= 1 << (len_term * 8)
-    return val
+    return int.from_bytes(term, byteorder="big", signed=True)
 
 
 def bit_length(n):
     return int.bit_length(n)
 
 
 def varint_pack(big):
-    pos = True
     if big == 0:
-        return b'\x00'
+        return b"\x00"
     if big < 0:
-        bytelength = bit_length(abs(big) - 1) // 8 + 1
-        big = (1 << bytelength * 8) + big
-        pos = False
-    revbytes = bytearray()
-    while big > 0:
-        revbytes.append(big & 0xff)
-        big >>= 8
-    if pos and revbytes[-1] & 0x80:
-        revbytes.append(0)
-    revbytes.reverse()
-    return bytes(revbytes)
+        byte_length = (-big - 1).bit_length() // 8 + 1
+    else:
+        byte_length = (big.bit_length() + 8) // 8
+    return big.to_bytes(byte_length, byteorder="big", signed=True)
 
 
-point_be = struct.Struct('>dd')
-point_le = struct.Struct('<dd')
+point_be = struct.Struct(">dd")
+point_le = struct.Struct("<dd")
 
-circle_be = struct.Struct('>ddd')
-circle_le = struct.Struct('<ddd')
+circle_be = struct.Struct(">ddd")
+circle_le = struct.Struct("<ddd")
 
 
 def encode_zig_zag(n):
@@ -93,19 +82,20 @@ def vints_unpack(term):  # noqa
         if (first_byte & 128) == 0:
             val = first_byte
         else:
-            num_extra_bytes = 8 - (~first_byte & 0xff).bit_length()
-            val = first_byte & (0xff >> num_extra_bytes)
+            num_extra_bytes = 8 - (~first_byte & 0xFF).bit_length()
+            val = first_byte & (0xFF >> num_extra_bytes)
             end = n + num_extra_bytes
             while n < end:
                 n += 1
                 val <<= 8
-                val |= term[n] & 0xff
+                val |= term[n] & 0xFF
 
         n += 1
         values.append(decode_zig_zag(val))
 
     return tuple(values)
 
+
 def vints_pack(values):
     revbytes = bytearray()
     values = [int(v) for v in values[::-1]]
@@ -120,39 +110,43 @@ def vints_pack(values):
             # ie. with 1 extra byte, the first byte needs to be something like '10XXXXXX' # 2 bits reserved
             # ie. with 8 extra bytes, the first byte needs to be '11111111'  # 8 bits reserved
             reserved_bits = num_extra_bytes + 1
-            while num_bits > (8-(reserved_bits)):
+            while num_bits > (8 - (reserved_bits)):
                 num_extra_bytes += 1
                 num_bits -= 8
                 reserved_bits = min(num_extra_bytes + 1, 8)
-                revbytes.append(v & 0xff)
+                revbytes.append(v & 0xFF)
                 v >>= 8
 
             if num_extra_bytes > 8:
-                raise ValueError('Value %d is too big and cannot be encoded as vint' % value)
+                raise ValueError(
+                    "Value %d is too big and cannot be encoded as vint" % value
+                )
 
             # We can now store the last bits in the first byte
             n = 8 - num_extra_bytes
-            v |= (0xff >> n << n)
+            v |= 0xFF >> n << n
             revbytes.append(abs(v))
 
     revbytes.reverse()
     return bytes(revbytes)
 
+
 def uvint_unpack(bytes):
     first_byte = bytes[0]
 
     if (first_byte & 128) == 0:
-        return (first_byte,1)
+        return (first_byte, 1)
 
-    num_extra_bytes = 8 - (~first_byte & 0xff).bit_length()
-    rv = first_byte & (0xff >> num_extra_bytes)
-    for idx in range(1,num_extra_bytes + 1):
+    num_extra_bytes = 8 - (~first_byte & 0xFF).bit_length()
+    rv = first_byte & (0xFF >> num_extra_bytes)
+    for idx in range(1, num_extra_bytes + 1):
         new_byte = bytes[idx]
         rv <<= 8
-        rv |= new_byte & 0xff
+        rv |= new_byte & 0xFF
 
     return (rv, num_extra_bytes + 1)
 
+
 def uvint_pack(val):
     rv = bytearray()
     if val < 128:
@@ -165,19 +159,19 @@ def uvint_pack(val):
         # ie. with 1 extra byte, the first byte needs to be something like '10XXXXXX' # 2 bits reserved
         # ie. with 8 extra bytes, the first byte needs to be '11111111'  # 8 bits reserved
         reserved_bits = num_extra_bytes + 1
-        while num_bits > (8-(reserved_bits)):
+        while num_bits > (8 - (reserved_bits)):
             num_extra_bytes += 1
             num_bits -= 8
             reserved_bits = min(num_extra_bytes + 1, 8)
-            rv.append(v & 0xff)
+            rv.append(v & 0xFF)
             v >>= 8
 
         if num_extra_bytes > 8:
-            raise ValueError('Value %d is too big and cannot be encoded as vint' % val)
+            raise ValueError("Value %d is too big and cannot be encoded as vint" % val)
 
         # We can now store the last bits in the first byte
         n = 8 - num_extra_bytes
-        v |= (0xff >> n << n)
+        v |= 0xFF >> n << n
         rv.append(abs(v))
 
     rv.reverse()