Skip to content

Commit 2a7f478

Browse files
committed
(improvement) cache deserializer instances in find_deserializer and make_deserializers
Cache find_deserializer() and make_deserializers() results in Cython cdef dict caches keyed on cqltype objects to avoid repeated class lookups and Deserializer object creation on every result set. Using cqltype objects (not id()) as cache keys holds strong references, preventing GC/id-reuse correctness issues with parameterized types. ## Motivation On every result set, make_deserializers(coltypes) is called from row_parser.pyx:37, which in turn calls find_deserializer() for each column type. These functions perform class name lookups and issubclass() chains, then create fresh Deserializer objects -- all redundant work when the same column types appear repeatedly (which is always the case for prepared statements). ## Benchmark results Benchmarks compare the original code (Before) against the new cached implementation (After). find_deserializer (single type lookup): | Variant | Min | Mean | Median | Ops/sec | |---|---|---|---|---| | Before (original) | 266.0 ns | 305.0 ns | 292.0 ns | 3.3 Mops/s | | After (with cache) | 44.0 ns | 49.0 ns | 47.8 ns | 20.4 Mops/s | make_deserializers (5 types): | Variant | Min | Mean | Median | Ops/sec | |---|---|---|---|---| | Before (original) | 1,976 ns | 2,438 ns | 2,435 ns | 410 Kops/s | | After (with cache) | 74.9 ns | 83.5 ns | 81.7 ns | 12,000 Kops/s | make_deserializers (10 types): | Variant | Min | Mean | Median | Ops/sec | |---|---|---|---|---| | Before (original) | 3,553 ns | 3,812 ns | 3,761 ns | 262 Kops/s | | After (with cache) | 89.7 ns | 105.1 ns | 97.6 ns | 9,511 Kops/s | ## Design notes - Caches are cdef dict (C-level, not accessible from Python) for minimal overhead - Cache keys are the cqltype objects themselves, not id(cqltype) -- holds strong references preventing GC and id() reuse - For prepared statements (the hot path), cache hit rate is effectively 100% - Cache is naturally bounded by the number of distinct cqltype objects in use ## Tests All existing unit tests pass (108 passed, 1 skipped).
1 parent efdc08a commit 2a7f478

2 files changed

Lines changed: 234 additions & 3 deletions

File tree

Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
# Copyright DataStax, Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""
16+
Benchmarks for find_deserializer / make_deserializers with and without caching.
17+
18+
Run with: pytest benchmarks/test_deserializer_cache_benchmark.py -v
19+
"""
20+
21+
import pytest
22+
23+
from cassandra import cqltypes
24+
from cassandra.deserializers import (
25+
find_deserializer,
26+
make_deserializers,
27+
)
28+
29+
30+
# ---------------------------------------------------------------------------
31+
# Reference: original uncached implementations (copied from master)
32+
# ---------------------------------------------------------------------------
33+
34+
_classes = {}
35+
36+
37+
def _init_classes():
38+
"""Lazily initialize the class lookup dict from deserializers module."""
39+
if not _classes:
40+
from cassandra import deserializers as mod
41+
42+
for name in dir(mod):
43+
obj = getattr(mod, name)
44+
if isinstance(obj, type):
45+
_classes[name] = obj
46+
47+
48+
def find_deserializer_uncached(cqltype):
49+
"""Original implementation without caching."""
50+
_init_classes()
51+
52+
name = "Des" + cqltype.__name__
53+
if name in _classes:
54+
cls = _classes[name]
55+
elif issubclass(cqltype, cqltypes.ListType):
56+
from cassandra.deserializers import DesListType
57+
58+
cls = DesListType
59+
elif issubclass(cqltype, cqltypes.SetType):
60+
from cassandra.deserializers import DesSetType
61+
62+
cls = DesSetType
63+
elif issubclass(cqltype, cqltypes.MapType):
64+
from cassandra.deserializers import DesMapType
65+
66+
cls = DesMapType
67+
elif issubclass(cqltype, cqltypes.UserType):
68+
from cassandra.deserializers import DesUserType
69+
70+
cls = DesUserType
71+
elif issubclass(cqltype, cqltypes.TupleType):
72+
from cassandra.deserializers import DesTupleType
73+
74+
cls = DesTupleType
75+
elif issubclass(cqltype, cqltypes.DynamicCompositeType):
76+
from cassandra.deserializers import DesDynamicCompositeType
77+
78+
cls = DesDynamicCompositeType
79+
elif issubclass(cqltype, cqltypes.CompositeType):
80+
from cassandra.deserializers import DesCompositeType
81+
82+
cls = DesCompositeType
83+
elif issubclass(cqltype, cqltypes.ReversedType):
84+
from cassandra.deserializers import DesReversedType
85+
86+
cls = DesReversedType
87+
elif issubclass(cqltype, cqltypes.FrozenType):
88+
from cassandra.deserializers import DesFrozenType
89+
90+
cls = DesFrozenType
91+
else:
92+
from cassandra.deserializers import GenericDeserializer
93+
94+
cls = GenericDeserializer
95+
96+
return cls(cqltype)
97+
98+
99+
def make_deserializers_uncached(ctypes):
100+
"""Original implementation without caching."""
101+
from cassandra.deserializers import obj_array
102+
103+
return obj_array([find_deserializer_uncached(ct) for ct in ctypes])
104+
105+
106+
# ---------------------------------------------------------------------------
107+
# Test type sets
108+
# ---------------------------------------------------------------------------
109+
110+
SIMPLE_TYPES = [
111+
cqltypes.Int32Type,
112+
cqltypes.UTF8Type,
113+
cqltypes.BooleanType,
114+
cqltypes.DoubleType,
115+
cqltypes.LongType,
116+
]
117+
118+
MIXED_TYPES = [
119+
cqltypes.Int32Type,
120+
cqltypes.UTF8Type,
121+
cqltypes.BooleanType,
122+
cqltypes.DoubleType,
123+
cqltypes.LongType,
124+
cqltypes.FloatType,
125+
cqltypes.TimestampType,
126+
cqltypes.UUIDType,
127+
cqltypes.InetAddressType,
128+
cqltypes.DecimalType,
129+
]
130+
131+
132+
# ---------------------------------------------------------------------------
133+
# Correctness tests
134+
# ---------------------------------------------------------------------------
135+
136+
137+
class TestDeserializerCacheCorrectness:
138+
"""Verify the cached implementation returns equivalent deserializers."""
139+
140+
@pytest.mark.parametrize("cqltype", SIMPLE_TYPES + MIXED_TYPES)
141+
def test_find_deserializer_returns_correct_type(self, cqltype):
142+
cached = find_deserializer(cqltype)
143+
uncached = find_deserializer_uncached(cqltype)
144+
assert type(cached).__name__ == type(uncached).__name__
145+
146+
def test_find_deserializer_cache_hit_same_object(self):
147+
d1 = find_deserializer(cqltypes.Int32Type)
148+
d2 = find_deserializer(cqltypes.Int32Type)
149+
assert d1 is d2
150+
151+
def test_make_deserializers_returns_correct_length(self):
152+
result = make_deserializers(SIMPLE_TYPES)
153+
assert len(result) == len(SIMPLE_TYPES)
154+
155+
def test_make_deserializers_cache_hit_same_object(self):
156+
r1 = make_deserializers(SIMPLE_TYPES)
157+
r2 = make_deserializers(SIMPLE_TYPES)
158+
# Should be the exact same cached object
159+
assert r1 is r2
160+
161+
162+
# ---------------------------------------------------------------------------
163+
# Benchmarks
164+
# ---------------------------------------------------------------------------
165+
166+
167+
class TestFindDeserializerBenchmark:
168+
"""Benchmark find_deserializer cached vs uncached."""
169+
170+
# --- Single simple type ---
171+
172+
@pytest.mark.benchmark(group="find_deser_simple")
173+
def test_uncached_simple(self, benchmark):
174+
benchmark(find_deserializer_uncached, cqltypes.Int32Type)
175+
176+
@pytest.mark.benchmark(group="find_deser_simple")
177+
def test_cached_simple(self, benchmark):
178+
# Cache is already warm from correctness tests or previous iterations
179+
find_deserializer(cqltypes.Int32Type) # ensure warm
180+
benchmark(find_deserializer, cqltypes.Int32Type)
181+
182+
183+
class TestMakeDeserializersBenchmark:
184+
"""Benchmark make_deserializers cached vs uncached."""
185+
186+
# --- 5 simple types ---
187+
188+
@pytest.mark.benchmark(group="make_deser_5types")
189+
def test_uncached_5types(self, benchmark):
190+
benchmark(make_deserializers_uncached, SIMPLE_TYPES)
191+
192+
@pytest.mark.benchmark(group="make_deser_5types")
193+
def test_cached_5types(self, benchmark):
194+
make_deserializers(SIMPLE_TYPES) # ensure warm
195+
benchmark(make_deserializers, SIMPLE_TYPES)
196+
197+
# --- 10 mixed types ---
198+
199+
@pytest.mark.benchmark(group="make_deser_10types")
200+
def test_uncached_10types(self, benchmark):
201+
benchmark(make_deserializers_uncached, MIXED_TYPES)
202+
203+
@pytest.mark.benchmark(group="make_deser_10types")
204+
def test_cached_10types(self, benchmark):
205+
make_deserializers(MIXED_TYPES) # ensure warm
206+
benchmark(make_deserializers, MIXED_TYPES)

cassandra/deserializers.pyx

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -440,16 +440,39 @@ cdef class GenericDeserializer(Deserializer):
440440
#--------------------------------------------------------------------------
441441
# Helper utilities
442442

443+
# Cache make_deserializers results keyed on the tuple of cqltype objects.
444+
# Using the cqltype objects themselves (rather than id()) as keys ensures
445+
# the dict holds strong references, preventing GC and id() reuse issues
446+
# with non-singleton parameterized types.
447+
cdef dict _make_deserializers_cache = {}
448+
443449
def make_deserializers(cqltypes):
444450
"""Create an array of Deserializers for each given cqltype in cqltypes"""
445-
cdef Deserializer[::1] deserializers
446-
return obj_array([find_deserializer(ct) for ct in cqltypes])
451+
cdef tuple key = tuple(cqltypes)
452+
try:
453+
return _make_deserializers_cache[key]
454+
except KeyError:
455+
pass
456+
result = obj_array([find_deserializer(ct) for ct in cqltypes])
457+
_make_deserializers_cache[key] = result
458+
return result
447459

448460

449461
cdef dict classes = globals()
450462

463+
# Cache deserializer instances keyed on the cqltype object itself to avoid
464+
# repeated class lookups and object creation on every result set.
465+
# Using the object as key (rather than id()) holds a strong reference,
466+
# preventing GC and id() reuse issues with parameterized types.
467+
cdef dict _deserializer_cache = {}
468+
451469
cpdef Deserializer find_deserializer(cqltype):
452470
"""Find a deserializer for a cqltype"""
471+
try:
472+
return <Deserializer>_deserializer_cache[cqltype]
473+
except KeyError:
474+
pass
475+
453476
name = 'Des' + cqltype.__name__
454477

455478
if name in globals():
@@ -477,7 +500,9 @@ cpdef Deserializer find_deserializer(cqltype):
477500
else:
478501
cls = GenericDeserializer
479502

480-
return cls(cqltype)
503+
cdef Deserializer result = cls(cqltype)
504+
_deserializer_cache[cqltype] = result
505+
return result
481506

482507

483508
def obj_array(list objs):

0 commit comments

Comments
 (0)