Skip to content

Commit 6057c76

Browse files
committed
(improvement) cqltypes: cache apply_parameters for parameterized types
Cache the result of apply_parameters() so that parameterized CQL type classes (e.g. ListType(UTF8Type), MapType(UTF8Type, Int32Type)) are created once via type() and returned as stable singletons on subsequent calls. The cache is a class-level dict keyed on (cls, tuple(subtypes), names). Only the base _CassandraType.apply_parameters is cached; UserType and VectorType overrides remain uncached. Stable singletons also benefit downstream caches (deserializer cache, ParseDesc cache) for non-prepared queries, since id()-based lookups now see the same type objects across result sets. ## Benchmark results (median, pytest-benchmark) ### Single apply_parameters call | Type | **Before** (original) | **After** (with cache) | |------------------|-----------------------|------------------------| | List\<UTF8\> | 5,332 ns | 139 ns | | Map\<UTF8,Int32\>| 5,455 ns | 134 ns | | Set\<UUID\> | 5,502 ns | 132 ns | | Frozen\<Timestamp\>| 5,329 ns | 134 ns | ### Batch apply (simulating read_type for result metadata) | Scenario | **Before** (original) | **After** (with cache) | |-----------------------|-----------------------|------------------------| | 4 parameterized types | 21,914 ns | 751 ns | | 8 parameterized types | 43,845 ns | 1,599 ns | | 10-col mixed metadata | 27,673 ns | 1,090 ns | All 116 unit tests pass (1 skipped — pre-existing test_datetype issue).
1 parent 9c53d78 commit 6057c76

2 files changed

Lines changed: 647 additions & 229 deletions

File tree

Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
# Copyright DataStax, Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""
16+
Benchmarks for apply_parameters() with and without caching.
17+
18+
apply_parameters() creates parameterized CQL type classes (e.g.
19+
ListType(UTF8Type), MapType(UTF8Type, Int32Type)) using type(), which
20+
is expensive. Caching makes repeated calls return the same class object
21+
(stable singletons), eliminating the type() overhead.
22+
23+
Run with:
24+
pytest benchmarks/test_apply_parameters_benchmark.py -v
25+
"""
26+
27+
import pytest
28+
29+
from cassandra import cqltypes
30+
31+
32+
# ---------------------------------------------------------------------------
33+
# Reference: original uncached implementation (copied from master)
34+
# ---------------------------------------------------------------------------
35+
36+
37+
def apply_parameters_uncached(cls, subtypes, names=None):
38+
"""Original apply_parameters without caching (baseline)."""
39+
if cls.num_subtypes != "UNKNOWN" and len(subtypes) != cls.num_subtypes:
40+
raise ValueError(
41+
"%s types require %d subtypes (%d given)"
42+
% (cls.typename, cls.num_subtypes, len(subtypes))
43+
)
44+
newname = cls.cass_parameterized_type_with(subtypes)
45+
return type(
46+
newname,
47+
(cls,),
48+
{"subtypes": subtypes, "cassname": cls.cassname, "fieldnames": names},
49+
)
50+
51+
52+
# ---------------------------------------------------------------------------
53+
# Test type combinations (representative of real workloads)
54+
# ---------------------------------------------------------------------------
55+
56+
SIMPLE_TYPES = [
57+
cqltypes.UTF8Type,
58+
cqltypes.Int32Type,
59+
cqltypes.LongType,
60+
cqltypes.FloatType,
61+
cqltypes.DoubleType,
62+
cqltypes.BooleanType,
63+
cqltypes.TimestampType,
64+
cqltypes.UUIDType,
65+
cqltypes.BytesType,
66+
cqltypes.DecimalType,
67+
]
68+
69+
# Parameterized type specs: (base_class, subtypes_tuple)
70+
PARAM_TYPE_SPECS = [
71+
(cqltypes.ListType, (cqltypes.UTF8Type,)),
72+
(cqltypes.ListType, (cqltypes.Int32Type,)),
73+
(cqltypes.SetType, (cqltypes.UTF8Type,)),
74+
(cqltypes.SetType, (cqltypes.UUIDType,)),
75+
(cqltypes.MapType, (cqltypes.UTF8Type, cqltypes.Int32Type)),
76+
(cqltypes.MapType, (cqltypes.UTF8Type, cqltypes.UTF8Type)),
77+
(cqltypes.FrozenType, (cqltypes.ListType.apply_parameters((cqltypes.UTF8Type,)),)),
78+
(cqltypes.ReversedType, (cqltypes.TimestampType,)),
79+
]
80+
81+
82+
# ---------------------------------------------------------------------------
83+
# Benchmark: Single apply_parameters call
84+
# ---------------------------------------------------------------------------
85+
86+
87+
@pytest.mark.parametrize(
88+
"base_cls,subtypes",
89+
[
90+
(cqltypes.ListType, (cqltypes.UTF8Type,)),
91+
(cqltypes.MapType, (cqltypes.UTF8Type, cqltypes.Int32Type)),
92+
(cqltypes.SetType, (cqltypes.UUIDType,)),
93+
(cqltypes.FrozenType, (cqltypes.TimestampType,)),
94+
],
95+
ids=["List<UTF8>", "Map<UTF8,Int32>", "Set<UUID>", "Frozen<Timestamp>"],
96+
)
97+
def test_apply_parameters_uncached(benchmark, base_cls, subtypes):
98+
"""Baseline: call type() every time (original code path)."""
99+
benchmark(apply_parameters_uncached, base_cls, subtypes)
100+
101+
102+
@pytest.mark.parametrize(
103+
"base_cls,subtypes",
104+
[
105+
(cqltypes.ListType, (cqltypes.UTF8Type,)),
106+
(cqltypes.MapType, (cqltypes.UTF8Type, cqltypes.Int32Type)),
107+
(cqltypes.SetType, (cqltypes.UUIDType,)),
108+
(cqltypes.FrozenType, (cqltypes.TimestampType,)),
109+
],
110+
ids=["List<UTF8>", "Map<UTF8,Int32>", "Set<UUID>", "Frozen<Timestamp>"],
111+
)
112+
def test_apply_parameters_cached(benchmark, base_cls, subtypes):
113+
"""Cached: dict lookup on hit (new code path)."""
114+
# Warm the cache
115+
base_cls.apply_parameters(subtypes)
116+
117+
benchmark(base_cls.apply_parameters, subtypes)
118+
119+
120+
# ---------------------------------------------------------------------------
121+
# Benchmark: Batch of apply_parameters (simulating read_type for a result set)
122+
# ---------------------------------------------------------------------------
123+
124+
125+
def _batch_uncached(specs):
126+
"""Apply parameters for a batch of type specs without caching."""
127+
return [apply_parameters_uncached(cls, st) for cls, st in specs]
128+
129+
130+
def _batch_cached(specs):
131+
"""Apply parameters for a batch of type specs with caching."""
132+
return [cls.apply_parameters(st) for cls, st in specs]
133+
134+
135+
@pytest.mark.parametrize("n_specs", [4, 8])
136+
def test_batch_apply_uncached(benchmark, n_specs):
137+
"""Batch: build N parameterized types without caching."""
138+
specs = PARAM_TYPE_SPECS[:n_specs]
139+
benchmark(_batch_uncached, specs)
140+
141+
142+
@pytest.mark.parametrize("n_specs", [4, 8])
143+
def test_batch_apply_cached(benchmark, n_specs):
144+
"""Batch: build N parameterized types with caching."""
145+
specs = PARAM_TYPE_SPECS[:n_specs]
146+
# Warm
147+
_batch_cached(specs)
148+
benchmark(_batch_cached, specs)
149+
150+
151+
# ---------------------------------------------------------------------------
152+
# Benchmark: Simulated read_type column parsing for a typical result set
153+
# with mixed simple and parameterized types
154+
# ---------------------------------------------------------------------------
155+
156+
157+
def _simulate_metadata_uncached():
158+
"""Simulate parsing column metadata for a 10-column result set."""
159+
types = []
160+
# 5 simple types (no apply_parameters needed)
161+
types.extend(SIMPLE_TYPES[:5])
162+
# 5 parameterized types
163+
types.append(apply_parameters_uncached(cqltypes.ListType, (cqltypes.UTF8Type,)))
164+
types.append(
165+
apply_parameters_uncached(
166+
cqltypes.MapType, (cqltypes.UTF8Type, cqltypes.Int32Type)
167+
)
168+
)
169+
types.append(apply_parameters_uncached(cqltypes.SetType, (cqltypes.UUIDType,)))
170+
types.append(
171+
apply_parameters_uncached(cqltypes.FrozenType, (cqltypes.TimestampType,))
172+
)
173+
types.append(apply_parameters_uncached(cqltypes.ReversedType, (cqltypes.LongType,)))
174+
return types
175+
176+
177+
def _simulate_metadata_cached():
178+
"""Simulate parsing column metadata for a 10-column result set (cached)."""
179+
types = []
180+
types.extend(SIMPLE_TYPES[:5])
181+
types.append(cqltypes.ListType.apply_parameters((cqltypes.UTF8Type,)))
182+
types.append(
183+
cqltypes.MapType.apply_parameters((cqltypes.UTF8Type, cqltypes.Int32Type))
184+
)
185+
types.append(cqltypes.SetType.apply_parameters((cqltypes.UUIDType,)))
186+
types.append(cqltypes.FrozenType.apply_parameters((cqltypes.TimestampType,)))
187+
types.append(cqltypes.ReversedType.apply_parameters((cqltypes.LongType,)))
188+
return types
189+
190+
191+
def test_simulate_metadata_uncached(benchmark):
192+
"""Simulate metadata parsing for 10-col result set — uncached."""
193+
benchmark(_simulate_metadata_uncached)
194+
195+
196+
def test_simulate_metadata_cached(benchmark):
197+
"""Simulate metadata parsing for 10-col result set — cached."""
198+
# Warm cache
199+
_simulate_metadata_cached()
200+
benchmark(_simulate_metadata_cached)
201+
202+
203+
# ---------------------------------------------------------------------------
204+
# Correctness tests
205+
# ---------------------------------------------------------------------------
206+
207+
208+
def test_cached_returns_same_object():
209+
"""Cached apply_parameters returns the exact same type object."""
210+
a = cqltypes.ListType.apply_parameters((cqltypes.UTF8Type,))
211+
b = cqltypes.ListType.apply_parameters((cqltypes.UTF8Type,))
212+
assert a is b
213+
214+
215+
def test_different_params_different_types():
216+
"""Different subtypes produce different cached types."""
217+
a = cqltypes.ListType.apply_parameters((cqltypes.UTF8Type,))
218+
b = cqltypes.ListType.apply_parameters((cqltypes.Int32Type,))
219+
assert a is not b
220+
assert a.subtypes == (cqltypes.UTF8Type,)
221+
assert b.subtypes == (cqltypes.Int32Type,)
222+
223+
224+
def test_cached_type_attributes():
225+
"""Cached type has correct attributes."""
226+
t = cqltypes.MapType.apply_parameters((cqltypes.UTF8Type, cqltypes.LongType))
227+
assert t.subtypes == (cqltypes.UTF8Type, cqltypes.LongType)
228+
assert issubclass(t, cqltypes.MapType)
229+
assert t.cassname == cqltypes.MapType.cassname
230+
231+
232+
def test_cached_matches_uncached():
233+
"""Cached version produces equivalent types to uncached."""
234+
for cls, subtypes in PARAM_TYPE_SPECS:
235+
cached = cls.apply_parameters(subtypes)
236+
uncached = apply_parameters_uncached(cls, subtypes)
237+
238+
assert cached.subtypes == uncached.subtypes
239+
assert cached.cassname == uncached.cassname
240+
assert issubclass(cached, cls)
241+
assert issubclass(uncached, cls)
242+
243+
244+
def test_nested_parameterized_types():
245+
"""Nested parameterized types (e.g. List<Map<text,int>>) are cached."""
246+
inner = cqltypes.MapType.apply_parameters((cqltypes.UTF8Type, cqltypes.Int32Type))
247+
outer1 = cqltypes.ListType.apply_parameters((inner,))
248+
outer2 = cqltypes.ListType.apply_parameters((inner,))
249+
assert outer1 is outer2
250+
assert outer1.subtypes == (inner,)

0 commit comments

Comments
 (0)