Skip to content

Commit 84b599c

Browse files
committed
cluster: add control-connection query fallback
Add an opt-in control-connection fallback for application queries when the driver cannot populate normal node pools, which happens in deployments that expose the cluster through a non-broadcast IP address such as a TCP proxy or a node public IP. In that mode the driver can still execute queries over the single control connection, but throughput is poor and connection churn increases the chance of request errors. This option is intentionally disabled by default and should not be used in production. Also propagate keyspace updates on the fallback path so USE keeps the control connection in sync. Tests: - tests/unit/test_cluster.py::ClusterTest::test_set_keyspace_for_all_pools_reports_all_errors - tests/unit/test_response_future.py::ResponseFutureTests::test_control_connection_fallback_updates_connection_keyspace
1 parent 51dd366 commit 84b599c

7 files changed

Lines changed: 689 additions & 33 deletions

File tree

cassandra/cluster.py

Lines changed: 209 additions & 24 deletions
Large diffs are not rendered by default.

docs/api/cassandra/cluster.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ Clusters and Sessions
4848

4949
.. autoattribute:: control_connection_timeout
5050

51+
.. autoattribute:: allow_control_connection_query_fallback
52+
5153
.. autoattribute:: idle_heartbeat_interval
5254

5355
.. autoattribute:: idle_heartbeat_timeout
@@ -106,6 +108,9 @@ Clusters and Sessions
106108

107109
.. automethod:: set_meta_refresh_enabled
108110

111+
.. autoclass:: ControlConnectionQueryFallback
112+
:members:
113+
109114
.. autoclass:: ExecutionProfile (load_balancing_policy=<object object>, retry_policy=None, consistency_level=ConsistencyLevel.LOCAL_ONE, serial_consistency_level=None, request_timeout=10.0, row_factory=<function named_tuple_factory>, speculative_execution_policy=None)
110115
:members:
111116
:exclude-members: consistency_level

tests/integration/cqlengine/model/test_model.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -259,10 +259,8 @@ class SensitiveModel(Model):
259259
rows[-1]
260260
rows[-1:]
261261

262-
# ignore DeprecationWarning('The loop argument is deprecated since Python 3.8, and scheduled for removal in Python 3.10.')
263-
relevant_warnings = [warn for warn in w if "The loop argument is deprecated" not in str(warn.message)]
262+
warning_messages = [str(warn.message) for warn in w]
264263

265-
assert "__table_name_case_sensitive__ will be removed in 4.0." in str(relevant_warnings[0].message)
266-
assert "__table_name_case_sensitive__ will be removed in 4.0." in str(relevant_warnings[1].message)
267-
assert "ModelQuerySet indexing with negative indices support will be removed in 4.0." in str(relevant_warnings[2].message)
268-
assert "ModelQuerySet slicing with negative indices support will be removed in 4.0." in str(relevant_warnings[3].message)
264+
assert sum("__table_name_case_sensitive__ will be removed in 4.0." in message for message in warning_messages) == 2
265+
assert sum("ModelQuerySet indexing with negative indices support will be removed in 4.0." in message for message in warning_messages) == 1
266+
assert sum("ModelQuerySet slicing with negative indices support will be removed in 4.0." in message for message in warning_messages) == 1

tests/integration/standard/conftest.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
"test_ip_change": 4,
3838
"test_authentication": 4,
3939
"test_authentication_misconfiguration": 4,
40+
"test_control_connection_query_fallback": 4,
4041
"test_custom_cluster": 4,
4142
"test_query": 4,
4243
# Group 5: tablets (destructive — decommissions a node)
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
# Copyright DataStax, Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import unittest
16+
17+
import pytest
18+
19+
from cassandra.cluster import ControlConnectionQueryFallback, NoHostAvailable
20+
21+
from tests.integration import USE_CASS_EXTERNAL, TestCluster, local, remove_cluster, use_cluster
22+
23+
24+
_CLUSTER_NAME = "control_connection_query_fallback"
25+
_UNREACHABLE_BROADCAST_RPC_ADDRESS = "127.255.255.1"
26+
27+
28+
def setup_module():
29+
if USE_CASS_EXTERNAL:
30+
return
31+
32+
remove_cluster()
33+
34+
ccm_cluster = use_cluster(_CLUSTER_NAME, [1], start=False)
35+
ccm_cluster.nodes["node1"].set_configuration_options(values={
36+
"broadcast_rpc_address": _UNREACHABLE_BROADCAST_RPC_ADDRESS,
37+
})
38+
ccm_cluster.start(wait_for_binary_proto=True, wait_other_notice=True)
39+
40+
41+
def teardown_module():
42+
if USE_CASS_EXTERNAL:
43+
return
44+
45+
remove_cluster()
46+
47+
48+
@local
49+
class ControlConnectionQueryFallbackIntegrationTests(unittest.TestCase):
50+
51+
def setUp(self):
52+
self.cluster = None
53+
54+
def tearDown(self):
55+
if self.cluster is not None:
56+
self.cluster.shutdown()
57+
58+
def _assert_unreachable_broadcast_rpc_metadata(self):
59+
hosts = self.cluster.metadata.all_hosts()
60+
assert len(hosts) == 1
61+
62+
host = hosts[0]
63+
assert host.broadcast_rpc_address == _UNREACHABLE_BROADCAST_RPC_ADDRESS
64+
assert host.endpoint.address == _UNREACHABLE_BROADCAST_RPC_ADDRESS
65+
return host
66+
67+
def test_disabled_raises_when_broadcast_rpc_address_is_unreachable(self):
68+
self.cluster = TestCluster(
69+
allow_control_connection_query_fallback=ControlConnectionQueryFallback.Disabled,
70+
connect_timeout=1,
71+
monitor_reporting_enabled=False,
72+
)
73+
74+
with pytest.raises(NoHostAvailable):
75+
self.cluster.connect()
76+
77+
self._assert_unreachable_broadcast_rpc_metadata()
78+
assert self.cluster.control_connection._connection is not None
79+
assert self.cluster.get_all_pools() == []
80+
81+
def test_fallback_executes_queries_when_broadcast_rpc_address_is_unreachable(self):
82+
self.cluster = TestCluster(
83+
allow_control_connection_query_fallback=ControlConnectionQueryFallback.Fallback,
84+
connect_timeout=1,
85+
monitor_reporting_enabled=False,
86+
)
87+
88+
session = self.cluster.connect()
89+
90+
self._assert_unreachable_broadcast_rpc_metadata()
91+
assert session._initial_connect_futures
92+
assert list(session.get_pools()) == []
93+
94+
row = session.execute(
95+
"SELECT release_version, rpc_address FROM system.local WHERE key='local'").one()
96+
assert str(row.rpc_address) == _UNREACHABLE_BROADCAST_RPC_ADDRESS
97+
assert row.release_version
98+
99+
def test_no_node_pool_fallback_executes_queries_without_creating_pools(self):
100+
self.cluster = TestCluster(
101+
allow_control_connection_query_fallback=ControlConnectionQueryFallback.SkipPoolCreation,
102+
connect_timeout=1,
103+
monitor_reporting_enabled=False,
104+
)
105+
106+
session = self.cluster.connect()
107+
108+
self._assert_unreachable_broadcast_rpc_metadata()
109+
assert session._initial_connect_futures == set()
110+
assert list(session.get_pools()) == []
111+
112+
row = session.execute(
113+
"SELECT release_version, rpc_address FROM system.local WHERE key='local'").one()
114+
assert str(row.rpc_address) == _UNREACHABLE_BROADCAST_RPC_ADDRESS
115+
assert row.release_version

tests/unit/test_cluster.py

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414
import unittest
1515

16+
from concurrent.futures import Future
1617
import logging
1718
import socket
1819
from types import SimpleNamespace
@@ -22,9 +23,9 @@
2223

2324
from cassandra import ConsistencyLevel, DriverException, Timeout, Unavailable, RequestExecutionException, ReadTimeout, WriteTimeout, CoordinationFailure, ReadFailure, WriteFailure, FunctionFailure, AlreadyExists,\
2425
InvalidRequest, Unauthorized, AuthenticationFailed, OperationTimedOut, UnsupportedOperation, RequestValidationException, ConfigurationException, ProtocolVersion
25-
from cassandra.cluster import _Scheduler, Session, Cluster, ResultSet, SchemaAgreementScope, default_lbp_factory, \
26+
from cassandra.cluster import _Scheduler, Session, Cluster, ResultSet, SchemaAgreementScope, ControlConnectionQueryFallback, default_lbp_factory, \
2627
ExecutionProfile, _ConfigMode, EXEC_PROFILE_DEFAULT
27-
from cassandra.connection import ConnectionBusy
28+
from cassandra.connection import ConnectionBusy, ConnectionException
2829
from cassandra.pool import Host
2930
from cassandra.policies import HostDistance, RetryPolicy, RoundRobinPolicy, DowngradingConsistencyRetryPolicy, SimpleConvictionPolicy
3031
from cassandra.query import SimpleStatement, named_tuple_factory, tuple_factory
@@ -186,6 +187,52 @@ def test_port_range(self):
186187
with pytest.raises(ValueError):
187188
cluster = Cluster(contact_points=['127.0.0.1'], port=invalid_port)
188189

190+
def test_control_connection_query_fallback_modes(self):
191+
assert Cluster().allow_control_connection_query_fallback is ControlConnectionQueryFallback.Disabled
192+
with pytest.raises(TypeError):
193+
Cluster(allow_control_connection_query_fallback=False)
194+
with pytest.raises(TypeError):
195+
Cluster(allow_control_connection_query_fallback=True)
196+
assert (
197+
Cluster(allow_control_connection_query_fallback=ControlConnectionQueryFallback.Fallback)
198+
.allow_control_connection_query_fallback
199+
is ControlConnectionQueryFallback.Fallback
200+
)
201+
assert Cluster(
202+
allow_control_connection_query_fallback=ControlConnectionQueryFallback.SkipPoolCreation
203+
).allow_control_connection_query_fallback is ControlConnectionQueryFallback.SkipPoolCreation
204+
205+
def test_control_connection_query_fallback_no_node_pool_mode_skips_pool_creation(self):
206+
cluster = Cluster(
207+
allow_control_connection_query_fallback=ControlConnectionQueryFallback.SkipPoolCreation,
208+
monitor_reporting_enabled=False,
209+
)
210+
host = Host("127.0.0.1", SimpleConvictionPolicy, host_id=uuid.uuid4())
211+
212+
with patch.object(Session, "add_or_renew_pool") as mocked_add_or_renew_pool:
213+
session = Session(cluster, [host])
214+
215+
mocked_add_or_renew_pool.assert_not_called()
216+
assert session._initial_connect_futures == set()
217+
assert session._pools == {}
218+
assert session.update_created_pools() == set()
219+
220+
def test_control_connection_query_fallback_fallback_tolerates_empty_initial_pools(self):
221+
cluster = Cluster(
222+
allow_control_connection_query_fallback=ControlConnectionQueryFallback.Fallback,
223+
monitor_reporting_enabled=False,
224+
)
225+
host = Host("127.0.0.1", SimpleConvictionPolicy, host_id=uuid.uuid4())
226+
future = Future()
227+
future.set_result(False)
228+
229+
with patch.object(Session, "add_or_renew_pool", return_value=future) as mocked_add_or_renew_pool:
230+
session = Session(cluster, [host])
231+
232+
mocked_add_or_renew_pool.assert_called_once_with(host, is_host_addition=False)
233+
assert session._initial_connect_futures == {future}
234+
assert session._pools == {}
235+
189236
def test_compression_autodisabled_without_libraries(self):
190237
with patch.dict('cassandra.cluster.locally_supported_compressions', {}, clear=True):
191238
with patch('cassandra.cluster.log') as patched_logger:
@@ -551,6 +598,32 @@ def test_wait_for_schema_agreement_rejects_unknown_scope(self, *_):
551598
with pytest.raises(ValueError):
552599
session.wait_for_schema_agreement(wait_time=1, scope='planet')
553600

601+
@mock_session_pools
602+
def test_set_keyspace_for_all_pools_reports_all_errors(self, *_):
603+
cluster = Cluster()
604+
session = Session(
605+
cluster,
606+
[Host("127.0.0.1", SimpleConvictionPolicy, host_id=uuid.uuid4())],
607+
)
608+
609+
pool1 = Mock(host='host1')
610+
pool2 = Mock(host='host2')
611+
keyspace_error = ConnectionException("boom")
612+
613+
pool1._set_keyspace_for_all_conns.side_effect = (
614+
lambda keyspace, callback: callback(pool1, [keyspace_error])
615+
)
616+
pool2._set_keyspace_for_all_conns.side_effect = (
617+
lambda keyspace, callback: callback(pool2, [])
618+
)
619+
session._pools = {'host1': pool1, 'host2': pool2}
620+
621+
callback = Mock()
622+
session._set_keyspace_for_all_pools('ks', callback)
623+
624+
callback.assert_called_once()
625+
assert callback.call_args.args[0] == {'host1': [keyspace_error]}
626+
554627
class ProtocolVersionTests(unittest.TestCase):
555628

556629
def test_protocol_downgrade_test(self):

0 commit comments

Comments
 (0)