Skip to content

Commit 9dc1ddf

Browse files
committed
Address CoPilot review: bound caches, add clear API, fix benchmark imports
- Add 256-entry size cap to both _deserializer_cache and _make_deserializers_cache to prevent unbounded growth from non-interned parameterized types in unprepared queries. - Add clear_deserializer_caches() public API so that runtime Des* class overrides (e.g. DesBytesType = DesBytesTypeByteArray for cqlsh) can flush stale cached instances. - Add get_deserializer_cache_sizes() diagnostic helper. - Document override/cache interaction in code comments. - Fix benchmark copyright (DataStax -> ScyllaDB), add pytest.importorskip guards for pytest-benchmark and Cython. - Add 11 unit tests for cache hit/miss, clear, eviction bounds, and size reporting.
1 parent 5e6d4aa commit 9dc1ddf

4 files changed

Lines changed: 790 additions & 244 deletions

File tree

benchmarks/test_deserializer_cache_benchmark.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright DataStax, Inc.
1+
# Copyright ScyllaDB, Inc.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
@@ -16,10 +16,16 @@
1616
Benchmarks for find_deserializer / make_deserializers with and without caching.
1717
1818
Run with: pytest benchmarks/test_deserializer_cache_benchmark.py -v
19+
20+
Requires the ``pytest-benchmark`` plugin and Cython extensions to be built.
21+
Skipped automatically when either dependency is unavailable.
1922
"""
2023

2124
import pytest
2225

26+
pytest.importorskip("pytest_benchmark")
27+
pytest.importorskip("cassandra.deserializers")
28+
2329
from cassandra import cqltypes
2430
from cassandra.deserializers import (
2531
find_deserializer,

cassandra/deserializers.pyx

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -440,20 +440,33 @@ cdef class GenericDeserializer(Deserializer):
440440
#--------------------------------------------------------------------------
441441
# Helper utilities
442442

443+
# Maximum number of entries in each deserializer cache. In practice the
444+
# caches are bounded by the number of distinct column-type signatures in
445+
# the schema (typically dozens to low hundreds), but parameterized types
446+
# created via apply_parameters() for unprepared queries are *not*
447+
# interned, so repeated simple queries could accumulate entries. The cap
448+
# prevents unbounded growth in such edge cases.
449+
cdef int _CACHE_MAX_SIZE = 256
450+
443451
# Cache make_deserializers results keyed on the tuple of cqltype objects.
444452
# Using the cqltype objects themselves (rather than id()) as keys ensures
445453
# the dict holds strong references, preventing GC and id() reuse issues
446454
# with non-singleton parameterized types.
447455
cdef dict _make_deserializers_cache = {}
448456

449457
def make_deserializers(cqltypes):
450-
"""Create an array of Deserializers for each given cqltype in cqltypes"""
458+
"""Create an array of Deserializers for each given cqltype in cqltypes.
459+
460+
The returned array may be a cached object shared across callers.
461+
Callers must not modify the returned array."""
451462
cdef tuple key = tuple(cqltypes)
452463
try:
453464
return _make_deserializers_cache[key]
454465
except KeyError:
455466
pass
456467
result = obj_array([find_deserializer(ct) for ct in cqltypes])
468+
if len(_make_deserializers_cache) >= _CACHE_MAX_SIZE:
469+
_make_deserializers_cache.clear()
457470
_make_deserializers_cache[key] = result
458471
return result
459472

@@ -464,6 +477,11 @@ cdef dict classes = globals()
464477
# repeated class lookups and object creation on every result set.
465478
# Using the object as key (rather than id()) holds a strong reference,
466479
# preventing GC and id() reuse issues with parameterized types.
480+
#
481+
# Note: if a Des* class is overridden at runtime (e.g. DesBytesType =
482+
# DesBytesTypeByteArray for cqlsh), callers must invoke
483+
# clear_deserializer_caches() to flush stale entries so that subsequent
484+
# find_deserializer() calls pick up the new class.
467485
cdef dict _deserializer_cache = {}
468486

469487
cpdef Deserializer find_deserializer(cqltype):
@@ -501,10 +519,29 @@ cpdef Deserializer find_deserializer(cqltype):
501519
cls = GenericDeserializer
502520

503521
cdef Deserializer result = cls(cqltype)
522+
if len(_deserializer_cache) >= _CACHE_MAX_SIZE:
523+
_deserializer_cache.clear()
504524
_deserializer_cache[cqltype] = result
505525
return result
506526

507527

528+
def clear_deserializer_caches():
529+
"""Clear the find_deserializer and make_deserializers caches.
530+
531+
Call this after overriding a Des* class at runtime (e.g.
532+
``deserializers.DesBytesType = deserializers.DesBytesTypeByteArray``)
533+
so that subsequent lookups pick up the new class instead of returning
534+
stale cached instances.
535+
"""
536+
_deserializer_cache.clear()
537+
_make_deserializers_cache.clear()
538+
539+
540+
def get_deserializer_cache_sizes():
541+
"""Return ``(find_cache_size, make_cache_size)`` for diagnostic use."""
542+
return len(_deserializer_cache), len(_make_deserializers_cache)
543+
544+
508545
def obj_array(list objs):
509546
"""Create a (Cython) array of objects given a list of objects"""
510547
cdef object[:] arr

0 commit comments

Comments
 (0)