Skip to content

Commit bd6a9c5

Browse files
committed
(improvement) LWT no-shuffle shortcut with determinism tests
Restructure the shuffle block in TokenAwarePolicy.make_query_plan to use a nested 'if' instead of 'and', making it explicit that LWT queries skip both the shuffle and the list copy (Paxos leader optimization). Includes 4 new unit tests verifying: LWT deterministic ordering, LWT skips list copy, non-LWT shuffle works, and LWT+cache determinism. Benchmark (100K queries, 45-node/5-DC topology, Python 3.14, median of 5 runs): Policy | Kops/s | vs master | Mem KB --------------------------------------------------------- DCAware | 204 | +92% | 1.5 RackAware | 180 | +165% | 2.0 TokenAware(DCAware) | 60 | +233% | 207.5 TokenAware(RackAware) | 57 | +235% | 87.1 Default(DCAware) | 132 | +45% | 1.6 HostFilter(DCAware) | 66 | +25% | 1.7 Cumulative improvement over master (all 7 commits): DCAware: +92% (106 -> 204 Kops/s) RackAware: +165% ( 68 -> 180 Kops/s) TokenAware(DCAware): +233% ( 18 -> 60 Kops/s) TokenAware(RackAware):+235% ( 17 -> 57 Kops/s) Default(DCAware): +45% ( 91 -> 132 Kops/s) HostFilter(DCAware): +25% ( 53 -> 66 Kops/s) Note: TokenAware benchmark uses unique keys per query (0% cache hit rate). Real workloads with repeated partition keys will benefit further from the replica cache added in the previous commit. The LWT shortcut itself has no measurable impact on non-LWT queries (which this benchmark exercises); its benefit is deterministic ordering for LWT.
1 parent 5e89ce9 commit bd6a9c5

2 files changed

Lines changed: 104 additions & 3 deletions

File tree

cassandra/policies.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -770,9 +770,10 @@ def make_query_plan(self, working_keyspace=None, query=None):
770770
exc_info=True,
771771
)
772772

773-
if self.shuffle_replicas and not query.is_lwt():
774-
replicas = list(replicas)
775-
shuffle(replicas)
773+
if self.shuffle_replicas:
774+
if not query.is_lwt():
775+
replicas = list(replicas)
776+
shuffle(replicas)
776777

777778
local_rack = []
778779
local = []

tests/unit/test_policies.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1445,6 +1445,106 @@ def test_tablet_path_not_cached(self):
14451445
# Cache should remain empty (tablet results are not cached)
14461446
assert len(policy._replica_cache) == 0
14471447

1448+
# --- LWT determinism tests ---
1449+
1450+
def _make_lwt_query(self, routing_key, keyspace="ks"):
1451+
"""Create a Statement that reports is_lwt()=True."""
1452+
query = Statement(routing_key=routing_key, keyspace=keyspace)
1453+
query.is_lwt = lambda: True
1454+
return query
1455+
1456+
@patch("cassandra.policies.shuffle")
1457+
def test_lwt_no_shuffle(self, patched_shuffle):
1458+
"""LWT queries should yield replicas in deterministic order."""
1459+
cluster, hosts = self._make_cache_cluster()
1460+
1461+
child_policy = Mock()
1462+
child_policy.make_query_plan.return_value = hosts
1463+
child_policy.make_query_plan_with_exclusion.side_effect = lambda k, q, e: [
1464+
h for h in hosts if h not in e
1465+
]
1466+
child_policy.distance.return_value = HostDistance.LOCAL
1467+
1468+
policy = TokenAwarePolicy(child_policy, shuffle_replicas=True)
1469+
policy.populate(cluster, hosts)
1470+
1471+
query = self._make_lwt_query(routing_key=b"key1")
1472+
1473+
plans = [list(policy.make_query_plan(None, query)) for _ in range(5)]
1474+
1475+
# All plans should be identical (deterministic)
1476+
for plan in plans[1:]:
1477+
assert plan == plans[0]
1478+
1479+
# shuffle should never have been called
1480+
assert patched_shuffle.call_count == 0
1481+
1482+
@patch("cassandra.policies.shuffle")
1483+
def test_lwt_replicas_not_copied(self, patched_shuffle):
1484+
"""LWT path should not copy the replicas list (no list() call)."""
1485+
cluster, hosts = self._make_cache_cluster()
1486+
1487+
child_policy = Mock()
1488+
child_policy.make_query_plan.return_value = hosts
1489+
child_policy.make_query_plan_with_exclusion.side_effect = lambda k, q, e: [
1490+
h for h in hosts if h not in e
1491+
]
1492+
child_policy.distance.return_value = HostDistance.LOCAL
1493+
1494+
policy = TokenAwarePolicy(child_policy, shuffle_replicas=True)
1495+
policy.populate(cluster, hosts)
1496+
1497+
query = self._make_lwt_query(routing_key=b"key1")
1498+
list(policy.make_query_plan(None, query))
1499+
1500+
# shuffle was never called, which means list() was also not called
1501+
assert patched_shuffle.call_count == 0
1502+
1503+
@patch("cassandra.policies.shuffle")
1504+
def test_non_lwt_shuffled(self, patched_shuffle):
1505+
"""Non-LWT queries with shuffle_replicas=True should shuffle."""
1506+
cluster, hosts = self._make_cache_cluster()
1507+
1508+
child_policy = Mock()
1509+
child_policy.make_query_plan.return_value = hosts
1510+
child_policy.make_query_plan_with_exclusion.side_effect = lambda k, q, e: [
1511+
h for h in hosts if h not in e
1512+
]
1513+
child_policy.distance.return_value = HostDistance.LOCAL
1514+
1515+
policy = TokenAwarePolicy(child_policy, shuffle_replicas=True)
1516+
policy.populate(cluster, hosts)
1517+
1518+
query = Statement(routing_key=b"key1", keyspace="ks")
1519+
list(policy.make_query_plan(None, query))
1520+
1521+
assert patched_shuffle.call_count == 1
1522+
1523+
@patch("cassandra.policies.shuffle")
1524+
def test_lwt_with_cache_deterministic(self, patched_shuffle):
1525+
"""LWT + cache should produce identical plans on repeated calls."""
1526+
cluster, hosts = self._make_cache_cluster()
1527+
1528+
child_policy = Mock()
1529+
child_policy.make_query_plan.return_value = hosts
1530+
child_policy.make_query_plan_with_exclusion.side_effect = lambda k, q, e: [
1531+
h for h in hosts if h not in e
1532+
]
1533+
child_policy.distance.return_value = HostDistance.LOCAL
1534+
1535+
policy = TokenAwarePolicy(child_policy, shuffle_replicas=True)
1536+
policy.populate(cluster, hosts)
1537+
1538+
query = self._make_lwt_query(routing_key=b"key1")
1539+
1540+
plan1 = list(policy.make_query_plan(None, query))
1541+
plan2 = list(policy.make_query_plan(None, query))
1542+
1543+
assert plan1 == plan2
1544+
assert patched_shuffle.call_count == 0
1545+
# Should have been a cache hit on the second call
1546+
assert cluster.metadata.token_map.get_replicas.call_count == 1
1547+
14481548

14491549
class ConvictionPolicyTest(unittest.TestCase):
14501550
def test_not_implemented(self):

0 commit comments

Comments
 (0)