Skip to content

Commit 37a597b

Browse files
committed
(improvement) query: add Cython-aware serializer path in BoundStatement.bind()
When Cython serializers (from cassandra.serializers) are available and no column encryption policy is active, BoundStatement.bind() now uses pre-built Serializer objects cached on the PreparedStatement instead of calling cqltype classmethods. This avoids per-value Python method dispatch overhead and enables the ~30x vector serialization speedup from the Cython serializers module. The bind loop is split into three paths: 1. Column encryption policy path (unchanged behavior) 2. Cython serializers path (new fast path) 3. Plain Python path (no CE, no Cython -- removes per-value ColDesc/CE check) Depends on PR #748 (Cython serializers module) and PR #630 (CE-policy bind split).
1 parent de14827 commit 37a597b

2 files changed

Lines changed: 401 additions & 31 deletions

File tree

cassandra/query.py

Lines changed: 129 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,12 @@
3333
from cassandra.protocol import _UNSET_VALUE
3434
from cassandra.util import OrderedDict, _sanitize_identifiers
3535

36+
try:
37+
from cassandra.serializers import make_serializers as _cython_make_serializers
38+
_HAVE_CYTHON_SERIALIZERS = True
39+
except ImportError:
40+
_HAVE_CYTHON_SERIALIZERS = False
41+
3642
import logging
3743
log = logging.getLogger(__name__)
3844

@@ -474,6 +480,32 @@ def __init__(self, column_metadata, query_id, routing_key_indexes, query,
474480
self.is_idempotent = False
475481
self._is_lwt = is_lwt
476482

483+
@property
484+
def _serializers(self):
485+
"""Lazily create and cache Cython serializers for column types.
486+
487+
Returns a list of Serializer objects if Cython serializers are available
488+
and there is no column encryption policy, otherwise returns None.
489+
490+
The column_encryption_policy check is performed on every access (not
491+
cached) so that serializers are correctly bypassed if a policy is set
492+
after construction. This means the cache never goes stale: once a CE
493+
policy is present, we always return None and fall through to the
494+
encryption-aware bind path.
495+
"""
496+
if self.column_encryption_policy:
497+
return None
498+
try:
499+
return self._cached_serializers
500+
except AttributeError:
501+
pass
502+
if _HAVE_CYTHON_SERIALIZERS and self.column_metadata:
503+
self._cached_serializers = _cython_make_serializers(
504+
[col.type for col in self.column_metadata])
505+
else:
506+
self._cached_serializers = None
507+
return self._cached_serializers
508+
477509
@classmethod
478510
def from_message(cls, query_id, column_metadata, pk_indexes, cluster_metadata,
479511
query, prepared_keyspace, protocol_version, result_metadata,
@@ -532,6 +564,26 @@ def __str__(self):
532564
__repr__ = __str__
533565

534566

567+
def _raise_bind_serialize_error(col_spec, value, exc):
568+
"""Wrap TypeError, struct.error, or OverflowError with column context.
569+
570+
Called from all three bind loop paths (CE, Cython, plain Python) to
571+
provide a uniform error message that includes the column name and
572+
expected type. struct.error arises from int32 out-of-range values;
573+
OverflowError from float out-of-range values. Other exception types
574+
(e.g. ValueError from VectorType dimension mismatch) propagate
575+
without wrapping.
576+
"""
577+
actual_type = type(value)
578+
if isinstance(exc, (OverflowError, struct.error)):
579+
reason = 'value out of range'
580+
else:
581+
reason = 'invalid type'
582+
message = ('Received an argument with %s for column "%s". '
583+
'Expected: %s, Got: %s; (%s)' % (reason, col_spec.name, col_spec.type, actual_type, exc))
584+
raise TypeError(message) from exc
585+
586+
535587
class BoundStatement(Statement):
536588
"""
537589
A prepared statement that has been bound to a particular set of values.
@@ -635,44 +687,91 @@ def bind(self, values):
635687
(value_len, len(self.prepared_statement.routing_key_indexes)))
636688

637689
self.raw_values = values
638-
self.values = []
639-
for value, col_spec in zip(values, col_meta):
640-
if value is None:
641-
self.values.append(None)
642-
elif value is UNSET_VALUE:
643-
if proto_version >= 4:
644-
self._append_unset_value()
690+
# Pre-allocate to avoid repeated list growth reallocations
691+
self.values = [None] * col_meta_len
692+
idx = 0
693+
if ce_policy:
694+
# Column encryption path: check each column for CE policy
695+
for value, col_spec in zip(values, col_meta):
696+
if value is None:
697+
self.values[idx] = None
698+
elif value is UNSET_VALUE:
699+
if proto_version >= 4:
700+
idx = self._append_unset_value(idx)
701+
continue
702+
else:
703+
raise ValueError("Attempt to bind UNSET_VALUE while using unsuitable protocol version (%d < 4)" % proto_version)
645704
else:
646-
raise ValueError("Attempt to bind UNSET_VALUE while using unsuitable protocol version (%d < 4)" % proto_version)
705+
try:
706+
col_desc = ColDesc(col_spec.keyspace_name, col_spec.table_name, col_spec.name)
707+
uses_ce = ce_policy.contains_column(col_desc)
708+
if uses_ce:
709+
col_type = ce_policy.column_type(col_desc)
710+
col_bytes = col_type.serialize(value, proto_version)
711+
col_bytes = ce_policy.encrypt(col_desc, col_bytes)
712+
else:
713+
col_bytes = col_spec.type.serialize(value, proto_version)
714+
self.values[idx] = col_bytes
715+
# struct.error: int32 out-of-range; OverflowError: float out-of-range
716+
except (TypeError, struct.error, OverflowError) as exc:
717+
_raise_bind_serialize_error(col_spec, value, exc)
718+
idx += 1
719+
else:
720+
# Fast path: no column encryption, use Cython serializers if available
721+
serializers = self.prepared_statement._serializers
722+
if serializers is not None:
723+
for ser, value, col_spec in zip(serializers, values, col_meta):
724+
if value is None:
725+
self.values[idx] = None
726+
elif value is UNSET_VALUE:
727+
if proto_version >= 4:
728+
idx = self._append_unset_value(idx)
729+
continue
730+
else:
731+
raise ValueError("Attempt to bind UNSET_VALUE while using unsuitable protocol version (%d < 4)" % proto_version)
732+
else:
733+
try:
734+
col_bytes = ser.serialize(value, proto_version)
735+
self.values[idx] = col_bytes
736+
# struct.error: int32 out-of-range; OverflowError: float out-of-range
737+
except (TypeError, struct.error, OverflowError) as exc:
738+
_raise_bind_serialize_error(col_spec, value, exc)
739+
idx += 1
647740
else:
648-
try:
649-
col_desc = ColDesc(col_spec.keyspace_name, col_spec.table_name, col_spec.name)
650-
uses_ce = ce_policy and ce_policy.contains_column(col_desc)
651-
col_type = ce_policy.column_type(col_desc) if uses_ce else col_spec.type
652-
col_bytes = col_type.serialize(value, proto_version)
653-
if uses_ce:
654-
col_bytes = ce_policy.encrypt(col_desc, col_bytes)
655-
self.values.append(col_bytes)
656-
except (TypeError, struct.error) as exc:
657-
actual_type = type(value)
658-
message = ('Received an argument of invalid type for column "%s". '
659-
'Expected: %s, Got: %s; (%s)' % (col_spec.name, col_spec.type, actual_type, exc))
660-
raise TypeError(message)
741+
for value, col_spec in zip(values, col_meta):
742+
if value is None:
743+
self.values[idx] = None
744+
elif value is UNSET_VALUE:
745+
if proto_version >= 4:
746+
idx = self._append_unset_value(idx)
747+
continue
748+
else:
749+
raise ValueError("Attempt to bind UNSET_VALUE while using unsuitable protocol version (%d < 4)" % proto_version)
750+
else:
751+
try:
752+
col_bytes = col_spec.type.serialize(value, proto_version)
753+
self.values[idx] = col_bytes
754+
# struct.error: int32 out-of-range; OverflowError: float out-of-range
755+
except (TypeError, struct.error, OverflowError) as exc:
756+
_raise_bind_serialize_error(col_spec, value, exc)
757+
idx += 1
661758

662759
if proto_version >= 4:
663-
diff = col_meta_len - len(self.values)
664-
if diff:
665-
for _ in range(diff):
666-
self._append_unset_value()
760+
# Fill remaining unbound columns with UNSET_VALUE (v4+ feature).
761+
while idx < col_meta_len:
762+
idx = self._append_unset_value(idx)
763+
elif idx < col_meta_len:
764+
# Pre-v4: trim trailing unused slots (no UNSET_VALUE support)
765+
self.values = self.values[:idx]
667766

668767
return self
669768

670-
def _append_unset_value(self):
671-
next_index = len(self.values)
672-
if self.prepared_statement.is_routing_key_index(next_index):
673-
col_meta = self.prepared_statement.column_metadata[next_index]
769+
def _append_unset_value(self, idx):
770+
if self.prepared_statement.is_routing_key_index(idx):
771+
col_meta = self.prepared_statement.column_metadata[idx]
674772
raise ValueError("Cannot bind UNSET_VALUE as a part of the routing key '%s'" % col_meta.name)
675-
self.values.append(UNSET_VALUE)
773+
self.values[idx] = UNSET_VALUE
774+
return idx + 1
676775

677776
@property
678777
def routing_key(self):

0 commit comments

Comments
 (0)