Skip to content

Commit 6556867

Browse files
committed
(improvement) cqltypes: cache apply_parameters for parameterized types
## Summary Cache the result of `_CassandraType.apply_parameters()` so that repeated calls with the same base type and subtypes return the same class object instead of calling `type()` every time. `apply_parameters()` is called by `parse_casstype_args()` during column metadata parsing (in `protocol.py:read_type()`) for every parameterized column type (ListType, SetType, MapType, TupleType, FrozenType, ReversedType). For prepared statements the same metadata is parsed repeatedly, making this a hot path. The cache is a class-level dict `_apply_parameters_cache` on `_CassandraType`, keyed by `(cls, subtypes_tuple, names_tuple)`. On cache hit it returns the previously created type directly, avoiding the `type()` call and string formatting in `cass_parameterized_type_with()`. UserType and VectorType have their own `apply_parameters` overrides and are not affected by this change. ## Benchmark results | Scenario | Before (original) | After (with cache) | |---|---|---| | Single call (List\<UTF8\>) | 5,792 ns | 163 ns | | Single call (Map\<UTF8,Int32\>) | 5,719 ns | 248 ns | | Batch 4 types | 23,507 ns | 913 ns | | Batch 8 types | 45,418 ns | 1,629 ns | | Simulated 10-col metadata | 40,096 ns | 1,703 ns | Run benchmarks: pytest benchmarks/test_apply_parameters_benchmark.py -v
1 parent 9c53d78 commit 6556867

2 files changed

Lines changed: 259 additions & 1 deletion

File tree

Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
# Copyright DataStax, Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""
16+
Benchmarks for apply_parameters() with and without caching.
17+
18+
apply_parameters() creates parameterized CQL type classes (e.g.
19+
ListType(UTF8Type), MapType(UTF8Type, Int32Type)) using type(), which
20+
is expensive. Caching makes repeated calls return the same class object
21+
(stable singletons), eliminating the type() overhead.
22+
23+
Run with:
24+
pytest benchmarks/test_apply_parameters_benchmark.py -v
25+
"""
26+
27+
import pytest
28+
29+
from cassandra import cqltypes
30+
31+
32+
# ---------------------------------------------------------------------------
33+
# Reference: original uncached implementation (copied from master)
34+
# ---------------------------------------------------------------------------
35+
36+
37+
def apply_parameters_uncached(cls, subtypes, names=None):
38+
"""Original apply_parameters without caching (baseline)."""
39+
if cls.num_subtypes != "UNKNOWN" and len(subtypes) != cls.num_subtypes:
40+
raise ValueError(
41+
"%s types require %d subtypes (%d given)"
42+
% (cls.typename, cls.num_subtypes, len(subtypes))
43+
)
44+
newname = cls.cass_parameterized_type_with(subtypes)
45+
return type(
46+
newname,
47+
(cls,),
48+
{"subtypes": subtypes, "cassname": cls.cassname, "fieldnames": names},
49+
)
50+
51+
52+
# ---------------------------------------------------------------------------
53+
# Test type combinations (representative of real workloads)
54+
# ---------------------------------------------------------------------------
55+
56+
SIMPLE_TYPES = [
57+
cqltypes.UTF8Type,
58+
cqltypes.Int32Type,
59+
cqltypes.LongType,
60+
cqltypes.FloatType,
61+
cqltypes.DoubleType,
62+
cqltypes.BooleanType,
63+
cqltypes.TimestampType,
64+
cqltypes.UUIDType,
65+
cqltypes.BytesType,
66+
cqltypes.DecimalType,
67+
]
68+
69+
# Parameterized type specs: (base_class, subtypes_tuple)
70+
PARAM_TYPE_SPECS = [
71+
(cqltypes.ListType, (cqltypes.UTF8Type,)),
72+
(cqltypes.ListType, (cqltypes.Int32Type,)),
73+
(cqltypes.SetType, (cqltypes.UTF8Type,)),
74+
(cqltypes.SetType, (cqltypes.UUIDType,)),
75+
(cqltypes.MapType, (cqltypes.UTF8Type, cqltypes.Int32Type)),
76+
(cqltypes.MapType, (cqltypes.UTF8Type, cqltypes.UTF8Type)),
77+
(cqltypes.FrozenType, (cqltypes.ListType.apply_parameters((cqltypes.UTF8Type,)),)),
78+
(cqltypes.ReversedType, (cqltypes.TimestampType,)),
79+
]
80+
81+
82+
# ---------------------------------------------------------------------------
83+
# Benchmark: Single apply_parameters call
84+
# ---------------------------------------------------------------------------
85+
86+
87+
@pytest.mark.parametrize(
88+
"base_cls,subtypes",
89+
[
90+
(cqltypes.ListType, (cqltypes.UTF8Type,)),
91+
(cqltypes.MapType, (cqltypes.UTF8Type, cqltypes.Int32Type)),
92+
(cqltypes.SetType, (cqltypes.UUIDType,)),
93+
(cqltypes.FrozenType, (cqltypes.TimestampType,)),
94+
],
95+
ids=["List<UTF8>", "Map<UTF8,Int32>", "Set<UUID>", "Frozen<Timestamp>"],
96+
)
97+
def test_apply_parameters_uncached(benchmark, base_cls, subtypes):
98+
"""Baseline: call type() every time (original code path)."""
99+
benchmark(apply_parameters_uncached, base_cls, subtypes)
100+
101+
102+
@pytest.mark.parametrize(
103+
"base_cls,subtypes",
104+
[
105+
(cqltypes.ListType, (cqltypes.UTF8Type,)),
106+
(cqltypes.MapType, (cqltypes.UTF8Type, cqltypes.Int32Type)),
107+
(cqltypes.SetType, (cqltypes.UUIDType,)),
108+
(cqltypes.FrozenType, (cqltypes.TimestampType,)),
109+
],
110+
ids=["List<UTF8>", "Map<UTF8,Int32>", "Set<UUID>", "Frozen<Timestamp>"],
111+
)
112+
def test_apply_parameters_cached(benchmark, base_cls, subtypes):
113+
"""Cached: dict lookup on hit (new code path)."""
114+
# Warm the cache
115+
base_cls.apply_parameters(subtypes)
116+
117+
benchmark(base_cls.apply_parameters, subtypes)
118+
119+
120+
# ---------------------------------------------------------------------------
121+
# Benchmark: Batch of apply_parameters (simulating read_type for a result set)
122+
# ---------------------------------------------------------------------------
123+
124+
125+
def _batch_uncached(specs):
126+
"""Apply parameters for a batch of type specs without caching."""
127+
return [apply_parameters_uncached(cls, st) for cls, st in specs]
128+
129+
130+
def _batch_cached(specs):
131+
"""Apply parameters for a batch of type specs with caching."""
132+
return [cls.apply_parameters(st) for cls, st in specs]
133+
134+
135+
@pytest.mark.parametrize("n_specs", [4, 8])
136+
def test_batch_apply_uncached(benchmark, n_specs):
137+
"""Batch: build N parameterized types without caching."""
138+
specs = PARAM_TYPE_SPECS[:n_specs]
139+
benchmark(_batch_uncached, specs)
140+
141+
142+
@pytest.mark.parametrize("n_specs", [4, 8])
143+
def test_batch_apply_cached(benchmark, n_specs):
144+
"""Batch: build N parameterized types with caching."""
145+
specs = PARAM_TYPE_SPECS[:n_specs]
146+
# Warm
147+
_batch_cached(specs)
148+
benchmark(_batch_cached, specs)
149+
150+
151+
# ---------------------------------------------------------------------------
152+
# Benchmark: Simulated read_type column parsing for a typical result set
153+
# with mixed simple and parameterized types
154+
# ---------------------------------------------------------------------------
155+
156+
157+
def _simulate_metadata_uncached():
158+
"""Simulate parsing column metadata for a 10-column result set."""
159+
types = []
160+
# 5 simple types (no apply_parameters needed)
161+
types.extend(SIMPLE_TYPES[:5])
162+
# 5 parameterized types
163+
types.append(apply_parameters_uncached(cqltypes.ListType, (cqltypes.UTF8Type,)))
164+
types.append(
165+
apply_parameters_uncached(
166+
cqltypes.MapType, (cqltypes.UTF8Type, cqltypes.Int32Type)
167+
)
168+
)
169+
types.append(apply_parameters_uncached(cqltypes.SetType, (cqltypes.UUIDType,)))
170+
types.append(
171+
apply_parameters_uncached(cqltypes.FrozenType, (cqltypes.TimestampType,))
172+
)
173+
types.append(apply_parameters_uncached(cqltypes.ReversedType, (cqltypes.LongType,)))
174+
return types
175+
176+
177+
def _simulate_metadata_cached():
178+
"""Simulate parsing column metadata for a 10-column result set (cached)."""
179+
types = []
180+
types.extend(SIMPLE_TYPES[:5])
181+
types.append(cqltypes.ListType.apply_parameters((cqltypes.UTF8Type,)))
182+
types.append(
183+
cqltypes.MapType.apply_parameters((cqltypes.UTF8Type, cqltypes.Int32Type))
184+
)
185+
types.append(cqltypes.SetType.apply_parameters((cqltypes.UUIDType,)))
186+
types.append(cqltypes.FrozenType.apply_parameters((cqltypes.TimestampType,)))
187+
types.append(cqltypes.ReversedType.apply_parameters((cqltypes.LongType,)))
188+
return types
189+
190+
191+
def test_simulate_metadata_uncached(benchmark):
192+
"""Simulate metadata parsing for 10-col result set — uncached."""
193+
benchmark(_simulate_metadata_uncached)
194+
195+
196+
def test_simulate_metadata_cached(benchmark):
197+
"""Simulate metadata parsing for 10-col result set — cached."""
198+
# Warm cache
199+
_simulate_metadata_cached()
200+
benchmark(_simulate_metadata_cached)
201+
202+
203+
# ---------------------------------------------------------------------------
204+
# Correctness tests
205+
# ---------------------------------------------------------------------------
206+
207+
208+
def test_cached_returns_same_object():
209+
"""Cached apply_parameters returns the exact same type object."""
210+
a = cqltypes.ListType.apply_parameters((cqltypes.UTF8Type,))
211+
b = cqltypes.ListType.apply_parameters((cqltypes.UTF8Type,))
212+
assert a is b
213+
214+
215+
def test_different_params_different_types():
216+
"""Different subtypes produce different cached types."""
217+
a = cqltypes.ListType.apply_parameters((cqltypes.UTF8Type,))
218+
b = cqltypes.ListType.apply_parameters((cqltypes.Int32Type,))
219+
assert a is not b
220+
assert a.subtypes == (cqltypes.UTF8Type,)
221+
assert b.subtypes == (cqltypes.Int32Type,)
222+
223+
224+
def test_cached_type_attributes():
225+
"""Cached type has correct attributes."""
226+
t = cqltypes.MapType.apply_parameters((cqltypes.UTF8Type, cqltypes.LongType))
227+
assert t.subtypes == (cqltypes.UTF8Type, cqltypes.LongType)
228+
assert issubclass(t, cqltypes.MapType)
229+
assert t.cassname == cqltypes.MapType.cassname
230+
231+
232+
def test_cached_matches_uncached():
233+
"""Cached version produces equivalent types to uncached."""
234+
for cls, subtypes in PARAM_TYPE_SPECS:
235+
cached = cls.apply_parameters(subtypes)
236+
uncached = apply_parameters_uncached(cls, subtypes)
237+
238+
assert cached.subtypes == uncached.subtypes
239+
assert cached.cassname == uncached.cassname
240+
assert issubclass(cached, cls)
241+
assert issubclass(uncached, cls)
242+
243+
244+
def test_nested_parameterized_types():
245+
"""Nested parameterized types (e.g. List<Map<text,int>>) are cached."""
246+
inner = cqltypes.MapType.apply_parameters((cqltypes.UTF8Type, cqltypes.Int32Type))
247+
outer1 = cqltypes.ListType.apply_parameters((inner,))
248+
outer2 = cqltypes.ListType.apply_parameters((inner,))
249+
assert outer1 is outer2
250+
assert outer1.subtypes == (inner,)

cassandra/cqltypes.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,7 @@ class _CassandraType(object, metaclass=CassandraTypeType):
273273
subtypes = ()
274274
num_subtypes = 0
275275
empty_binary_ok = False
276+
_apply_parameters_cache = {}
276277

277278
support_empty_values = False
278279
"""
@@ -371,8 +372,15 @@ def apply_parameters(cls, subtypes, names=None):
371372
if cls.num_subtypes != 'UNKNOWN' and len(subtypes) != cls.num_subtypes:
372373
raise ValueError("%s types require %d subtypes (%d given)"
373374
% (cls.typename, cls.num_subtypes, len(subtypes)))
375+
subtypes = tuple(subtypes)
376+
cache_key = (cls, subtypes, tuple(names) if names else names)
377+
cached = cls._apply_parameters_cache.get(cache_key)
378+
if cached is not None:
379+
return cached
374380
newname = cls.cass_parameterized_type_with(subtypes)
375-
return type(newname, (cls,), {'subtypes': subtypes, 'cassname': cls.cassname, 'fieldnames': names})
381+
result = type(newname, (cls,), {'subtypes': subtypes, 'cassname': cls.cassname, 'fieldnames': names})
382+
cls._apply_parameters_cache[cache_key] = result
383+
return result
376384

377385
@classmethod
378386
def cql_parameterized_type(cls):

0 commit comments

Comments
 (0)