Skip to content

Commit 203080e

Browse files
committed
(improvement) row_parser: address PR review feedback
- Fix copyright header in benchmark file (DataStax -> ScyllaDB) - Add pytest.importorskip guard for pytest-benchmark in benchmark file - Add unit tests for ParseDesc cache under tests/unit/cython: cache hit, miss, protocol version invalidation, clear, bounded eviction, correctness
1 parent 04e7bef commit 203080e

2 files changed

Lines changed: 215 additions & 1 deletion

File tree

benchmarks/test_parse_desc_cache_benchmark.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright DataStax, Inc.
1+
# Copyright ScyllaDB, Inc.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
@@ -40,6 +40,13 @@
4040
import struct
4141
import pytest
4242

43+
# Skip the entire module when pytest-benchmark is not installed.
44+
# The benchmark fixture is provided by the pytest-benchmark plugin which
45+
# is not in the project's dev dependencies. This guard prevents
46+
# "fixture 'benchmark' not found" errors when running bare `pytest` from
47+
# the repo root.
48+
pytest.importorskip("pytest_benchmark")
49+
4350
from cassandra import cqltypes
4451
from cassandra.policies import ColDesc
4552
from cassandra.parsing import ParseDesc
Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
# Copyright ScyllaDB, Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""
16+
Unit tests for the ParseDesc cache in row_parser.pyx.
17+
18+
Validates cache hit/miss behavior, protocol_version invalidation, cache
19+
clearing, and bounded eviction — all exercised through the actual Cython
20+
_get_or_build_parse_desc function via make_recv_results_rows().
21+
"""
22+
23+
import io
24+
import struct
25+
import unittest
26+
27+
from tests.unit.cython.utils import cythontest
28+
29+
try:
30+
from cassandra.row_parser import (
31+
clear_parse_desc_cache,
32+
get_parse_desc_cache_size,
33+
make_recv_results_rows,
34+
)
35+
from cassandra.obj_parser import ListParser
36+
37+
_HAS_ROW_PARSER = True
38+
_recv_results_rows = make_recv_results_rows(ListParser())
39+
except ImportError:
40+
_HAS_ROW_PARSER = False
41+
_recv_results_rows = None
42+
43+
44+
# ---------------------------------------------------------------------------
45+
# Helpers
46+
# ---------------------------------------------------------------------------
47+
48+
49+
def _build_column_metadata(ncols):
50+
"""Build a column_metadata list like the driver produces."""
51+
from cassandra import cqltypes
52+
53+
return [("ks", "tbl", "col_%d" % i, cqltypes.UTF8Type) for i in range(ncols)]
54+
55+
56+
# NO_METADATA_FLAG as defined in ResultMessage
57+
_NO_METADATA_FLAG = 0x0004
58+
59+
60+
class _MockResultMessage:
61+
"""Minimal mock of ResultMessage for the prepared-statement path."""
62+
63+
column_metadata = None
64+
column_names = None
65+
column_types = None
66+
parsed_rows = None
67+
paging_state = None
68+
continuous_paging_seq = None
69+
continuous_paging_last = None
70+
result_metadata_id = None
71+
72+
def recv_results_metadata(self, f, user_type_map):
73+
"""Simulate the prepared-statement path (NO_METADATA_FLAG is set)."""
74+
_flags = struct.unpack(">i", f.read(4))[0]
75+
_colcount = struct.unpack(">i", f.read(4))[0]
76+
77+
78+
def _build_binary_buf(nrows, ncols, col_value=b"hello world"):
79+
"""Build a full binary buffer for the prepared-statement path."""
80+
parts = []
81+
parts.append(struct.pack(">i", _NO_METADATA_FLAG))
82+
parts.append(struct.pack(">i", ncols))
83+
parts.append(struct.pack(">i", nrows))
84+
col_cell = struct.pack(">i", len(col_value)) + col_value
85+
row_data = col_cell * ncols
86+
for _ in range(nrows):
87+
parts.append(row_data)
88+
return b"".join(parts)
89+
90+
91+
def _recv(binary_buf, col_meta, protocol_version=4, ce_policy=None):
92+
"""Run recv_results_rows and return the MockResultMessage."""
93+
msg = _MockResultMessage()
94+
_recv_results_rows(
95+
msg, io.BytesIO(binary_buf), protocol_version, {}, col_meta, ce_policy
96+
)
97+
return msg
98+
99+
100+
# ---------------------------------------------------------------------------
101+
# Tests
102+
# ---------------------------------------------------------------------------
103+
104+
105+
class ParseDescCacheTest(unittest.TestCase):
106+
"""Tests for the Cython ParseDesc cache in row_parser.pyx."""
107+
108+
def setUp(self):
109+
if _HAS_ROW_PARSER:
110+
clear_parse_desc_cache()
111+
112+
def tearDown(self):
113+
if _HAS_ROW_PARSER:
114+
clear_parse_desc_cache()
115+
116+
@cythontest
117+
def test_cache_hit_returns_same_objects(self):
118+
"""Repeated calls with the same col_meta object should return
119+
identical column_names and column_types objects (cache hit)."""
120+
col_meta = _build_column_metadata(5)
121+
buf = _build_binary_buf(1, 5)
122+
123+
msg1 = _recv(buf, col_meta)
124+
msg2 = _recv(buf, col_meta)
125+
126+
self.assertIs(msg1.column_names, msg2.column_names)
127+
self.assertIs(msg1.column_types, msg2.column_types)
128+
129+
@cythontest
130+
def test_cache_miss_different_metadata(self):
131+
"""Different metadata list objects should produce cache misses."""
132+
buf = _build_binary_buf(1, 5)
133+
col_meta_a = _build_column_metadata(5)
134+
col_meta_b = _build_column_metadata(5)
135+
136+
msg_a = _recv(buf, col_meta_a)
137+
msg_b = _recv(buf, col_meta_b)
138+
139+
self.assertIsNot(msg_a.column_names, msg_b.column_names)
140+
self.assertEqual(msg_a.column_names, msg_b.column_names)
141+
142+
@cythontest
143+
def test_protocol_version_invalidates_cache(self):
144+
"""Changed protocol_version should invalidate the cache entry."""
145+
col_meta = _build_column_metadata(5)
146+
buf = _build_binary_buf(1, 5)
147+
148+
msg_v4 = _recv(buf, col_meta, protocol_version=4)
149+
msg_v5 = _recv(buf, col_meta, protocol_version=5)
150+
151+
self.assertIsNot(msg_v4.column_names, msg_v5.column_names)
152+
153+
@cythontest
154+
def test_clear_cache_invalidates_entries(self):
155+
"""clear_parse_desc_cache() should invalidate cached entries."""
156+
col_meta = _build_column_metadata(5)
157+
buf = _build_binary_buf(1, 5)
158+
159+
msg1 = _recv(buf, col_meta)
160+
clear_parse_desc_cache()
161+
msg2 = _recv(buf, col_meta)
162+
163+
self.assertIsNot(msg1.column_names, msg2.column_names)
164+
self.assertEqual(msg1.column_names, msg2.column_names)
165+
166+
@cythontest
167+
def test_cache_bounded_size(self):
168+
"""Cache should evict entries when exceeding the max size (256)."""
169+
buf = _build_binary_buf(1, 5)
170+
meta_lists = [_build_column_metadata(5) for _ in range(300)]
171+
172+
for meta in meta_lists:
173+
_recv(buf, meta)
174+
175+
cache_size = get_parse_desc_cache_size()
176+
self.assertLessEqual(
177+
cache_size,
178+
256,
179+
"Cache should be bounded to 256 entries, got %d" % cache_size,
180+
)
181+
182+
@cythontest
183+
def test_parsed_rows_correctness(self):
184+
"""Verify parsed row data is correct through the cached path."""
185+
ncols, nrows = 5, 3
186+
col_meta = _build_column_metadata(ncols)
187+
buf = _build_binary_buf(nrows, ncols, col_value=b"test_val")
188+
189+
msg = _recv(buf, col_meta)
190+
191+
self.assertEqual(len(msg.parsed_rows), nrows)
192+
for row in msg.parsed_rows:
193+
self.assertEqual(len(row), ncols)
194+
for val in row:
195+
self.assertEqual(val, "test_val")
196+
self.assertEqual(msg.column_names, ["col_%d" % i for i in range(ncols)])
197+
198+
@cythontest
199+
def test_get_cache_size(self):
200+
"""get_parse_desc_cache_size() reports correct count."""
201+
self.assertEqual(get_parse_desc_cache_size(), 0)
202+
203+
col_meta = _build_column_metadata(5)
204+
buf = _build_binary_buf(1, 5)
205+
_recv(buf, col_meta)
206+
207+
self.assertEqual(get_parse_desc_cache_size(), 1)

0 commit comments

Comments
 (0)