Skip to content

Commit 580b455

Browse files
committed
perf: cache column_names/column_types on PreparedStatement to avoid per-result list comprehensions
For prepared statements with skip_meta=True (the common case), column_metadata is the same object every time, yet column_names and column_types lists are rebuilt via list comprehension on every result set. Pre-compute and cache these lists on PreparedStatement at prepare time. In _set_result, use the cached lists directly instead of the per-response lists. The cache is invalidated when result_metadata is updated during re-prepare. Benchmark (column_names + column_types extraction): 5 cols: 226 ns -> 30 ns (7.4x) 10 cols: 340 ns -> 28 ns (12.2x) 20 cols: 589 ns -> 31 ns (18.9x) 50 cols: 1160 ns -> 29 ns (39.6x)
1 parent 0a46094 commit 580b455

3 files changed

Lines changed: 94 additions & 3 deletions

File tree

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# Copyright ScyllaDB, Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""
16+
Micro-benchmark: column_names / column_types extraction from metadata.
17+
18+
Measures the cost of building [c[2] for c in metadata] and [c[3] for c in metadata]
19+
vs using pre-cached lists (as done for prepared statements with result_metadata).
20+
21+
Run:
22+
python benchmarks/bench_col_names_cache.py
23+
"""
24+
25+
import sys
26+
import timeit
27+
28+
29+
def make_column_metadata(ncols):
30+
"""Create fake column_metadata tuples like recv_results_metadata produces."""
31+
class FakeType:
32+
pass
33+
return [(f"ks_{i}", f"tbl_{i}", f"col_{i}", FakeType) for i in range(ncols)]
34+
35+
36+
def bench():
37+
for ncols in (5, 10, 20, 50):
38+
metadata = make_column_metadata(ncols)
39+
40+
# Pre-cached (done once at prepare time)
41+
cached_names = [c[2] for c in metadata]
42+
cached_types = [c[3] for c in metadata]
43+
44+
def extract_uncached():
45+
names = [c[2] for c in metadata]
46+
types = [c[3] for c in metadata]
47+
return names, types
48+
49+
def extract_cached():
50+
return cached_names, cached_types
51+
52+
n = 500_000
53+
t_uncached = timeit.timeit(extract_uncached, number=n)
54+
t_cached = timeit.timeit(extract_cached, number=n)
55+
56+
saving_ns = (t_uncached - t_cached) / n * 1e9
57+
speedup = t_uncached / t_cached if t_cached > 0 else float('inf')
58+
print(f" {ncols} cols: uncached={t_uncached / n * 1e9:.1f} ns, "
59+
f"cached={t_cached / n * 1e9:.1f} ns, "
60+
f"saving={saving_ns:.1f} ns ({speedup:.1f}x)")
61+
62+
63+
if __name__ == "__main__":
64+
print(f"Python {sys.version}")
65+
print("\n=== column_names / column_types extraction ===")
66+
bench()

cassandra/cluster.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4773,12 +4773,21 @@ def _set_result(self, host, connection, pool, response):
47734773
self, connection, **response.schema_change_event)
47744774
elif response.kind == RESULT_KIND_ROWS:
47754775
self._paging_state = response.paging_state
4776-
self._col_names = response.column_names
4777-
self._col_types = response.column_types
4776+
# Use pre-cached column names/types from PreparedStatement
4777+
# when available to avoid rebuilding lists from metadata.
4778+
ps = self.prepared_statement
4779+
if ps is not None and ps._result_col_names is not None:
4780+
col_names = ps._result_col_names
4781+
col_types = ps._result_col_types
4782+
else:
4783+
col_names = response.column_names
4784+
col_types = response.column_types
4785+
self._col_names = col_names
4786+
self._col_types = col_types
47784787
if getattr(self.message, 'continuous_paging_options', None):
47794788
self._handle_continuous_paging_first_response(connection, response)
47804789
else:
4781-
self._set_final_result(self.row_factory(response.column_names, response.parsed_rows))
4790+
self._set_final_result(self.row_factory(col_names, response.parsed_rows))
47824791
elif response.kind == RESULT_KIND_VOID:
47834792
self._set_final_result(None)
47844793
else:
@@ -4944,6 +4953,7 @@ def _execute_after_prepare(self, host, connection, pool, response):
49444953
)
49454954
))
49464955
self.prepared_statement.result_metadata = response.column_metadata
4956+
self.prepared_statement._cache_result_metadata_columns(response.column_metadata)
49474957
new_metadata_id = response.result_metadata_id
49484958
if new_metadata_id is not None:
49494959
self.prepared_statement.result_metadata_id = new_metadata_id

cassandra/query.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,11 @@ class PreparedStatement(object):
459459
serial_consistency_level = None # TODO never used?
460460
_is_lwt = False
461461

462+
# Cached column names/types derived from result_metadata, to avoid
463+
# rebuilding [c[2] for c in result_metadata] on every result set.
464+
_result_col_names = None
465+
_result_col_types = None
466+
462467
def __init__(self, column_metadata, query_id, routing_key_indexes, query,
463468
keyspace, protocol_version, result_metadata, result_metadata_id,
464469
is_lwt=False, column_encryption_policy=None):
@@ -469,11 +474,21 @@ def __init__(self, column_metadata, query_id, routing_key_indexes, query,
469474
self.keyspace = keyspace
470475
self.protocol_version = protocol_version
471476
self.result_metadata = result_metadata
477+
self._cache_result_metadata_columns(result_metadata)
472478
self.result_metadata_id = result_metadata_id
473479
self.column_encryption_policy = column_encryption_policy
474480
self.is_idempotent = False
475481
self._is_lwt = is_lwt
476482

483+
def _cache_result_metadata_columns(self, result_metadata):
484+
"""Pre-compute column names and types from result_metadata."""
485+
if result_metadata:
486+
self._result_col_names = [c[2] for c in result_metadata]
487+
self._result_col_types = [c[3] for c in result_metadata]
488+
else:
489+
self._result_col_names = None
490+
self._result_col_types = None
491+
477492
@classmethod
478493
def from_message(cls, query_id, column_metadata, pk_indexes, cluster_metadata,
479494
query, prepared_keyspace, protocol_version, result_metadata,

0 commit comments

Comments
 (0)