Skip to content

Commit 8880f03

Browse files
committed
DRIVER-153: add tests, defensive warning and docs for SCYLLA_USE_METADATA_ID
- Add unit tests for the _METADATA_ID_FLAG path in recv_results_metadata (ROWS result with METADATA_CHANGED signal) - Add unit tests for _set_result metadata cache update on METADATA_CHANGED: update both result_metadata and result_metadata_id, no-op when id absent, warning when id present but column_metadata empty - Add unit tests for _query per-connection feature gating: skip_meta and result_metadata_id are set only when the connection negotiated SCYLLA_USE_METADATA_ID (or protocol v5) and the prepared statement carries a result_metadata_id - Add defensive log.warning in _set_result when server sends a new result_metadata_id without column_metadata (protocol violation) - Add write-order comment explaining thread-safety rationale for the two assignments to prepared_statement.result_metadata / result_metadata_id - Add SCYLLA_USE_METADATA_ID section to docs/scylla-specific.rst
1 parent 82f16c5 commit 8880f03

4 files changed

Lines changed: 379 additions & 1 deletion

File tree

cassandra/cluster.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4794,7 +4794,20 @@ def _set_result(self, host, connection, pool, response):
47944794
new_result_metadata_id = getattr(response, 'result_metadata_id', None)
47954795
if self.prepared_statement and new_result_metadata_id is not None:
47964796
if response.column_metadata:
4797+
# Write result_metadata before result_metadata_id intentionally:
4798+
# a concurrent reader that still sees the old metadata_id will
4799+
# ask the server for full metadata and recover safely; a reader
4800+
# that sees the new metadata_id together with the new metadata
4801+
# is immediately correct. The opposite write order could expose
4802+
# a window where a reader uses a new metadata_id with stale metadata.
47974803
self.prepared_statement.result_metadata = response.column_metadata
4804+
else:
4805+
log.warning(
4806+
"Server sent a new result_metadata_id but no column metadata "
4807+
"for prepared statement %r. The cached column metadata will not "
4808+
"be updated; only result_metadata_id is refreshed.",
4809+
getattr(self.prepared_statement, 'query_id', None)
4810+
)
47984811
self.prepared_statement.result_metadata_id = new_result_metadata_id
47994812
if getattr(self.message, 'continuous_paging_options', None):
48004813
self._handle_continuous_paging_first_response(connection, response)

docs/scylla-specific.rst

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,3 +156,43 @@ https://github.com/scylladb/scylladb/blob/master/docs/dev/protocol-extensions.md
156156

157157
Details on the sending tablet information to the drivers
158158
https://github.com/scylladb/scylladb/blob/master/docs/dev/protocol-extensions.md#sending-tablet-info-to-the-drivers
159+
160+
161+
Prepared Statement Metadata Caching (``SCYLLA_USE_METADATA_ID``)
162+
----------------------------------------------------------------
163+
164+
When executing prepared SELECT statements, the driver normally requests the server
165+
to skip sending full result metadata with each response (``skip_meta`` optimization),
166+
relying on the metadata cached from the initial ``PREPARE`` call. However, if the
167+
table schema changes after a statement is prepared (e.g., a column is added, removed,
168+
or its type is altered), this cached metadata becomes stale — leading to decoding
169+
errors or incorrect data.
170+
171+
ScyllaDB solves this by backporting the ``metadata_id`` mechanism from CQL native
172+
protocol v5 as a v4 extension: ``SCYLLA_USE_METADATA_ID``. When this extension is
173+
negotiated, the server includes a hash of the result metadata in the ``PREPARE``
174+
response. The driver sends this hash back with every ``EXECUTE`` request. If the
175+
schema has changed, the server sets the ``METADATA_CHANGED`` flag and returns the
176+
new metadata hash together with the updated column definitions. The driver
177+
automatically updates its cache and uses the new metadata to decode the current
178+
response — all transparently, with no application code change required.
179+
180+
**Behaviour summary:**
181+
182+
- Automatically negotiated at connection time when the ScyllaDB node supports it.
183+
- ``skip_meta`` is enabled (metadata omitted from EXECUTE responses) only when it
184+
is safe: the connection must have negotiated ``SCYLLA_USE_METADATA_ID`` (or use
185+
CQL v5), *and* the prepared statement must carry a ``result_metadata_id`` obtained
186+
from PREPARE.
187+
- When a schema change is detected by the server, the driver refreshes both the
188+
cached column metadata and the metadata hash for that prepared statement so that
189+
all subsequent executions benefit immediately.
190+
- Statements prepared before the extension was negotiated (e.g., during a rolling
191+
upgrade) retain ``result_metadata_id=None`` and fall back to always requesting
192+
full metadata, which is the safest option.
193+
194+
**Current scope:** schema-change detection is implemented for SELECT statements.
195+
UPDATE/INSERT coverage is planned in a separate effort.
196+
197+
For full protocol details see the ScyllaDB CQL extensions documentation:
198+
https://opensource.docs.scylladb.com/stable/cql/cql-extensions.html

tests/unit/test_protocol.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@
2424
_PAGING_OPTIONS_FLAG, _WITH_SERIAL_CONSISTENCY_FLAG,
2525
_PAGE_SIZE_FLAG, _WITH_PAGING_STATE_FLAG,
2626
_SKIP_METADATA_FLAG,
27-
BatchMessage, ResultMessage
27+
BatchMessage, ResultMessage,
28+
RESULT_KIND_ROWS
2829
)
2930
from cassandra.protocol_features import ProtocolFeatures
3031
from cassandra.query import BatchType
@@ -153,6 +154,44 @@ def test_recv_results_prepared_no_extension_skips_metadata_id(self):
153154
assert msg.query_id == b'ab'
154155
assert msg.result_metadata_id is None
155156

157+
def test_recv_results_metadata_changed_flag(self):
158+
"""
159+
When _METADATA_ID_FLAG (0x0008) is set in a ROWS result,
160+
recv_results_metadata must read and store the new result_metadata_id
161+
sent by the server (METADATA_CHANGED signal), and still populate
162+
column_metadata normally.
163+
"""
164+
# Wire layout for a ROWS result with METADATA_CHANGED:
165+
# flags: int(0x0008) = _METADATA_ID_FLAG
166+
# colcount: int(0)
167+
# result_metadata_id: short(4) + b'new1'
168+
# (no columns — colcount=0 — to keep the buffer minimal)
169+
buf = io.BytesIO(
170+
struct.pack('>i', 0x0008) # flags: METADATA_ID_FLAG
171+
+ struct.pack('>i', 0) # colcount = 0
172+
+ struct.pack('>H', 4) + b'new1' # result_metadata_id = b'new1'
173+
)
174+
msg = ResultMessage(kind=RESULT_KIND_ROWS)
175+
msg.recv_results_metadata(buf, user_type_map={})
176+
assert msg.result_metadata_id == b'new1'
177+
assert msg.column_metadata == []
178+
179+
def test_recv_results_metadata_no_metadata_flag_skips_metadata_id(self):
180+
"""
181+
When _NO_METADATA_FLAG (0x0004) is set, recv_results_metadata returns
182+
early and must NOT read or set result_metadata_id, even if the caller
183+
mistakenly sets _METADATA_ID_FLAG alongside it.
184+
"""
185+
# flags = _NO_METADATA_FLAG (0x0004), colcount = 0
186+
buf = io.BytesIO(
187+
struct.pack('>i', 0x0004) # flags: NO_METADATA
188+
+ struct.pack('>i', 0) # colcount = 0
189+
)
190+
msg = ResultMessage(kind=RESULT_KIND_ROWS)
191+
msg.recv_results_metadata(buf, user_type_map={})
192+
assert not hasattr(msg, 'result_metadata_id') or msg.result_metadata_id is None
193+
assert not hasattr(msg, 'column_metadata') or msg.column_metadata is None
194+
156195
def test_query_message(self):
157196
"""
158197
Test to check the appropriate calls are made

0 commit comments

Comments
 (0)