Skip to content

Commit 3015986

Browse files
committed
perf: skip ColDesc creation in bind() when column encryption is disabled
Split BoundStatement.bind() into two code paths: when column_encryption_policy is None (the overwhelmingly common case), skip ColDesc namedtuple creation, ce_policy.contains_column() check, and ce_policy.column_type() lookup per column. Call col_spec.type.serialize() directly instead. When column encryption IS enabled, behavior is unchanged. Benchmark (inner loop only, 200k iters, Python 3.14): 3-col: 1375 -> 523 ns (2.63x, saving 852 ns/bind) 5-col: 2226 -> 1013 ns (2.20x, saving 1213 ns/bind) 8-col: 3495 -> 1317 ns (2.65x, saving 2178 ns/bind)
1 parent 434465b commit 3015986

2 files changed

Lines changed: 147 additions & 21 deletions

File tree

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
# Copyright ScyllaDB, Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""
16+
Micro-benchmark: BoundStatement.bind() fast path without column encryption.
17+
18+
Measures the improvement from skipping ColDesc namedtuple creation and
19+
ce_policy checks when column_encryption_policy is None (the common case).
20+
21+
Run:
22+
python benchmarks/bench_bind_no_encryption.py
23+
"""
24+
25+
import datetime
26+
import sys
27+
import timeit
28+
from unittest.mock import MagicMock
29+
30+
from cassandra.query import BoundStatement, PreparedStatement
31+
from cassandra.cqltypes import (
32+
DateType, Int32Type, DoubleType, FloatType, UTF8Type,
33+
BooleanType, LongType,
34+
)
35+
36+
37+
def make_prepared_statement(col_names, col_types):
38+
"""Build a mock PreparedStatement with the given columns."""
39+
col_meta = []
40+
for name, ctype in zip(col_names, col_types):
41+
cm = MagicMock()
42+
cm.name = name
43+
cm.keyspace_name = 'ks'
44+
cm.table_name = 'metrics'
45+
cm.type = ctype
46+
col_meta.append(cm)
47+
48+
ps = MagicMock(spec=PreparedStatement)
49+
ps.column_metadata = col_meta
50+
ps.routing_key_indexes = None
51+
ps.protocol_version = 4
52+
ps.column_encryption_policy = None
53+
ps.serial_consistency_level = None
54+
ps.retry_policy = None
55+
ps.consistency_level = None
56+
ps.fetch_size = None
57+
ps.custom_payload = None
58+
ps.is_idempotent = False
59+
return ps
60+
61+
62+
def bench():
63+
schemas = [
64+
(
65+
"3-col (int, double, text)",
66+
['id', 'value', 'tag'],
67+
[Int32Type, DoubleType, UTF8Type],
68+
[42, 3.14159, 'sensor-001'],
69+
),
70+
(
71+
"5-col time-series",
72+
['ts', 'sensor_id', 'value', 'quality', 'tag'],
73+
[DateType, Int32Type, DoubleType, FloatType, UTF8Type],
74+
[datetime.datetime(2025, 4, 5, 12, 0, 0, 123456), 42, 3.14, 0.95, 'alpha'],
75+
),
76+
(
77+
"8-col wide row",
78+
['ts', 'id', 'v1', 'v2', 'v3', 'v4', 'flag', 'name'],
79+
[DateType, LongType, DoubleType, DoubleType, FloatType, FloatType, BooleanType, UTF8Type],
80+
[datetime.datetime(2025, 1, 1), 12345678, 1.1, 2.2, 3.3, 4.4, True, 'test-row'],
81+
),
82+
]
83+
84+
n = 200_000
85+
print(f"=== BoundStatement.bind() no-encryption fast path ({n:,} iters) ===\n")
86+
87+
for label, col_names, col_types, row in schemas:
88+
ps = make_prepared_statement(col_names, col_types)
89+
90+
def do_bind():
91+
bs = BoundStatement(ps)
92+
bs.bind(row)
93+
94+
# Warmup
95+
for _ in range(1000):
96+
do_bind()
97+
98+
t = timeit.timeit(do_bind, number=n)
99+
ns_per = t / n * 1e9
100+
print(f" {label}:")
101+
print(f" {ns_per:.1f} ns/call ({n:,} iters)")
102+
103+
104+
if __name__ == "__main__":
105+
print(f"Python {sys.version}\n")
106+
bench()

cassandra/query.py

Lines changed: 41 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -636,28 +636,48 @@ def bind(self, values):
636636

637637
self.raw_values = values
638638
self.values = []
639-
for value, col_spec in zip(values, col_meta):
640-
if value is None:
641-
self.values.append(None)
642-
elif value is UNSET_VALUE:
643-
if proto_version >= 4:
644-
self._append_unset_value()
639+
if ce_policy:
640+
# Column encryption enabled — need ColDesc per column
641+
for value, col_spec in zip(values, col_meta):
642+
if value is None:
643+
self.values.append(None)
644+
elif value is UNSET_VALUE:
645+
if proto_version >= 4:
646+
self._append_unset_value()
647+
else:
648+
raise ValueError("Attempt to bind UNSET_VALUE while using unsuitable protocol version (%d < 4)" % proto_version)
645649
else:
646-
raise ValueError("Attempt to bind UNSET_VALUE while using unsuitable protocol version (%d < 4)" % proto_version)
647-
else:
648-
try:
649-
col_desc = ColDesc(col_spec.keyspace_name, col_spec.table_name, col_spec.name)
650-
uses_ce = ce_policy and ce_policy.contains_column(col_desc)
651-
col_type = ce_policy.column_type(col_desc) if uses_ce else col_spec.type
652-
col_bytes = col_type.serialize(value, proto_version)
653-
if uses_ce:
654-
col_bytes = ce_policy.encrypt(col_desc, col_bytes)
655-
self.values.append(col_bytes)
656-
except (TypeError, struct.error) as exc:
657-
actual_type = type(value)
658-
message = ('Received an argument of invalid type for column "%s". '
659-
'Expected: %s, Got: %s; (%s)' % (col_spec.name, col_spec.type, actual_type, exc))
660-
raise TypeError(message)
650+
try:
651+
col_desc = ColDesc(col_spec.keyspace_name, col_spec.table_name, col_spec.name)
652+
uses_ce = ce_policy.contains_column(col_desc)
653+
col_type = ce_policy.column_type(col_desc) if uses_ce else col_spec.type
654+
col_bytes = col_type.serialize(value, proto_version)
655+
if uses_ce:
656+
col_bytes = ce_policy.encrypt(col_desc, col_bytes)
657+
self.values.append(col_bytes)
658+
except (TypeError, struct.error) as exc:
659+
actual_type = type(value)
660+
message = ('Received an argument of invalid type for column "%s". '
661+
'Expected: %s, Got: %s; (%s)' % (col_spec.name, col_spec.type, actual_type, exc))
662+
raise TypeError(message)
663+
else:
664+
# Fast path — no column encryption (common case)
665+
for value, col_spec in zip(values, col_meta):
666+
if value is None:
667+
self.values.append(None)
668+
elif value is UNSET_VALUE:
669+
if proto_version >= 4:
670+
self._append_unset_value()
671+
else:
672+
raise ValueError("Attempt to bind UNSET_VALUE while using unsuitable protocol version (%d < 4)" % proto_version)
673+
else:
674+
try:
675+
self.values.append(col_spec.type.serialize(value, proto_version))
676+
except (TypeError, struct.error) as exc:
677+
actual_type = type(value)
678+
message = ('Received an argument of invalid type for column "%s". '
679+
'Expected: %s, Got: %s; (%s)' % (col_spec.name, col_spec.type, actual_type, exc))
680+
raise TypeError(message)
661681

662682
if proto_version >= 4:
663683
diff = col_meta_len - len(self.values)

0 commit comments

Comments
 (0)