Skip to content

Commit 434465b

Browse files
committed
perf: memoize cql_parameterized_type() on all type classes
Cache the computed CQL type string in a _cql_type_str class attribute. The string is computed lazily on first call and returned from cache on subsequent calls. Since type classes are immutable after apply_parameters(), no invalidation logic is needed. All 6 cql_parameterized_type() overrides are covered: _CassandraType (base), TupleType, UserType, CompositeType, DynamicCompositeType, VectorType. Benchmark (500k iters, Python 3.14): Int32Type (simple): 6.9x (157 -> 23 ns) MapType<text, int>: 22.9x (464 -> 20 ns) SetType<float>: 18.2x (371 -> 20 ns) ListType<double>: 17.3x (357 -> 21 ns) TupleType<int,text,bool>: 25.0x (509 -> 20 ns) Nested map/list/tuple: 11.4x (636 -> 56 ns)
1 parent a2ab701 commit 434465b

2 files changed

Lines changed: 132 additions & 10 deletions

File tree

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# Copyright ScyllaDB, Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""
16+
Micro-benchmark: cql_parameterized_type memoization.
17+
18+
Measures the cost of building the CQL type string representation
19+
with and without memoization for various type complexities.
20+
21+
Run:
22+
python benchmarks/bench_cql_parameterized_type.py
23+
"""
24+
25+
import sys
26+
import timeit
27+
28+
from cassandra.cqltypes import (
29+
MapType, SetType, ListType, TupleType,
30+
Int32Type, UTF8Type, FloatType, DoubleType, BooleanType,
31+
_CassandraType,
32+
)
33+
34+
35+
def bench():
36+
# Create parameterized types
37+
map_type = MapType.apply_parameters([UTF8Type, Int32Type])
38+
set_type = SetType.apply_parameters([FloatType])
39+
list_type = ListType.apply_parameters([DoubleType])
40+
tuple_type = TupleType.apply_parameters([Int32Type, UTF8Type, BooleanType])
41+
nested_type = MapType.apply_parameters([
42+
UTF8Type,
43+
ListType.apply_parameters([
44+
TupleType.apply_parameters([Int32Type, FloatType, DoubleType])
45+
])
46+
])
47+
48+
test_types = [
49+
("Int32Type (simple)", Int32Type),
50+
("MapType<text, int>", map_type),
51+
("SetType<float>", set_type),
52+
("ListType<double>", list_type),
53+
("TupleType<int, text, bool>", tuple_type),
54+
("MapType<text, list<tuple<int, float, double>>>", nested_type),
55+
]
56+
57+
n = 500_000
58+
print(f"=== cql_parameterized_type ({n:,} iters) ===\n")
59+
60+
for label, typ in test_types:
61+
# Clear cache to measure uncached
62+
typ._cql_type_str = None
63+
# One call to populate cache
64+
result = typ.cql_parameterized_type()
65+
66+
# Measure cached (warm)
67+
t_cached = timeit.timeit(typ.cql_parameterized_type, number=n)
68+
69+
# Measure uncached (cold)
70+
def uncached():
71+
typ._cql_type_str = None
72+
return typ.cql_parameterized_type()
73+
t_uncached = timeit.timeit(uncached, number=n)
74+
75+
saving_ns = (t_uncached - t_cached) / n * 1e9
76+
speedup = t_uncached / t_cached if t_cached > 0 else float('inf')
77+
print(f" {label}:")
78+
print(f" result: {result}")
79+
print(f" uncached: {t_uncached / n * 1e9:.1f} ns, "
80+
f"cached: {t_cached / n * 1e9:.1f} ns, "
81+
f"saving: {saving_ns:.1f} ns ({speedup:.1f}x)")
82+
83+
84+
if __name__ == "__main__":
85+
print(f"Python {sys.version}\n")
86+
bench()

cassandra/cqltypes.py

Lines changed: 46 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,11 @@ class _CassandraType(object, metaclass=CassandraTypeType):
314314
num_subtypes = 0
315315
empty_binary_ok = False
316316

317+
# Cached result of cql_parameterized_type(). Computed lazily on first
318+
# access and stored as a class attribute. Safe because type classes are
319+
# immutable after creation via apply_parameters().
320+
_cql_type_str = None
321+
317322
support_empty_values = False
318323
"""
319324
Back in the Thrift days, empty strings were used for "null" values of
@@ -428,12 +433,18 @@ def cql_parameterized_type(cls):
428433
Return a CQL type specifier for this type. If this type has parameters,
429434
they are included in standard CQL <> notation.
430435
"""
436+
result = cls._cql_type_str
437+
if result is not None:
438+
return result
431439
if not cls.subtypes:
432-
return cls.typename
433-
return "%s<%s>" % (
434-
cls.typename,
435-
", ".join(styp.cql_parameterized_type() for styp in cls.subtypes),
436-
)
440+
result = cls.typename
441+
else:
442+
result = "%s<%s>" % (
443+
cls.typename,
444+
", ".join(styp.cql_parameterized_type() for styp in cls.subtypes),
445+
)
446+
cls._cql_type_str = result
447+
return result
437448

438449
@classmethod
439450
def cass_parameterized_type(cls, full=False):
@@ -1042,10 +1053,15 @@ def serialize_safe(cls, val, protocol_version):
10421053

10431054
@classmethod
10441055
def cql_parameterized_type(cls):
1056+
result = cls._cql_type_str
1057+
if result is not None:
1058+
return result
10451059
subtypes_string = ", ".join(
10461060
sub.cql_parameterized_type() for sub in cls.subtypes
10471061
)
1048-
return "frozen<tuple<%s>>" % (subtypes_string,)
1062+
result = "frozen<tuple<%s>>" % (subtypes_string,)
1063+
cls._cql_type_str = result
1064+
return result
10491065

10501066

10511067
class UserType(TupleType):
@@ -1102,7 +1118,12 @@ def apply_parameters(cls, subtypes, names):
11021118

11031119
@classmethod
11041120
def cql_parameterized_type(cls):
1105-
return "frozen<%s>" % (cls.typename,)
1121+
result = cls._cql_type_str
1122+
if result is not None:
1123+
return result
1124+
result = "frozen<%s>" % (cls.typename,)
1125+
cls._cql_type_str = result
1126+
return result
11061127

11071128
@classmethod
11081129
def deserialize_safe(cls, byts, protocol_version):
@@ -1182,8 +1203,13 @@ def cql_parameterized_type(cls):
11821203
"""
11831204
There is no CQL notation for Composites, so we override this.
11841205
"""
1206+
result = cls._cql_type_str
1207+
if result is not None:
1208+
return result
11851209
typestring = cls.cass_parameterized_type(full=True)
1186-
return "'%s'" % (typestring,)
1210+
result = "'%s'" % (typestring,)
1211+
cls._cql_type_str = result
1212+
return result
11871213

11881214
@classmethod
11891215
def deserialize_safe(cls, byts, protocol_version):
@@ -1208,11 +1234,16 @@ class DynamicCompositeType(_ParameterizedType):
12081234

12091235
@classmethod
12101236
def cql_parameterized_type(cls):
1237+
result = cls._cql_type_str
1238+
if result is not None:
1239+
return result
12111240
sublist = ", ".join(
12121241
"%s=>%s" % (alias, typ.cass_parameterized_type(full=True))
12131242
for alias, typ in zip(cls.fieldnames, cls.subtypes)
12141243
)
1215-
return "'%s(%s)'" % (cls.typename, sublist)
1244+
result = "'%s(%s)'" % (cls.typename, sublist)
1245+
cls._cql_type_str = result
1246+
return result
12161247

12171248

12181249
class ColumnToCollectionType(_ParameterizedType):
@@ -1655,8 +1686,13 @@ def serialize(cls, v, protocol_version):
16551686

16561687
@classmethod
16571688
def cql_parameterized_type(cls):
1658-
return "%s<%s, %s>" % (
1689+
result = cls._cql_type_str
1690+
if result is not None:
1691+
return result
1692+
result = "%s<%s, %s>" % (
16591693
cls.typename,
16601694
cls.subtype.cql_parameterized_type(),
16611695
cls.vector_size,
16621696
)
1697+
cls._cql_type_str = result
1698+
return result

0 commit comments

Comments
 (0)