Skip to content

Commit 9fe9931

Browse files
mykauldkropachev
authored andcommitted
tests: fix flaky tablet tests by increasing trace timeout and polling for invalidation
The tablet tests were intermittently failing because: 1. get_query_trace() used the default 2s max_wait, which is too short under resource pressure (--smp 2). Increased to 10s. 2. test_tablets_invalidation_decommission_non_cc_node used a fixed time.sleep(2) hoping tablet metadata invalidation would complete. Replaced with wait_until polling for the tablet record to be purged (0.5s delay, 20 attempts = 10s budget).
1 parent 4a23f72 commit 9fe9931

1 file changed

Lines changed: 8 additions & 8 deletions

File tree

tests/integration/standard/test_tablets.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
import time
2-
31
import pytest
42

53
from cassandra.cluster import Cluster, EXEC_PROFILE_DEFAULT, ExecutionProfile
@@ -29,7 +27,7 @@ def teardown_class(cls):
2927
cls.cluster.shutdown()
3028

3129
def verify_hosts_in_tracing(self, results, expected):
32-
traces = results.get_query_trace()
30+
traces = results.get_query_trace(max_wait_sec=10)
3331
events = traces.events
3432
host_set = set()
3533
for event in events:
@@ -55,7 +53,7 @@ def get_tablet_record(self, query):
5553
return metadata._tablets.get_tablet_for_key(query.keyspace, query.table, metadata.token_map.token_class.from_key(query.routing_key))
5654

5755
def verify_same_shard_in_tracing(self, results):
58-
traces = results.get_query_trace()
56+
traces = results.get_query_trace(max_wait_sec=10)
5957
events = traces.events
6058
shard_set = set()
6159
for event in events:
@@ -241,8 +239,8 @@ def decommission_non_cc_node(rec):
241239
wait_until(
242240
lambda: len([h for h in self.cluster.metadata.all_hosts() if h.is_up]) < 3,
243241
delay=1, max_attempts=60)
244-
# Allow additional time for tablet metadata invalidation to propagate
245-
time.sleep(2)
242+
# Tablet metadata invalidation may take additional time to propagate;
243+
# run_tablets_invalidation_test will poll for the expected result.
246244

247245
self.run_tablets_invalidation_test(decommission_non_cc_node)
248246

@@ -266,5 +264,7 @@ def run_tablets_invalidation_test(self, invalidate):
266264

267265
invalidate(rec)
268266

269-
# Check if tablets information was purged
270-
assert self.get_tablet_record(bound) is None, "tablet was not deleted, invalidation did not work"
267+
# Wait for tablets information to be purged (invalidation is async)
268+
wait_until(
269+
lambda: self.get_tablet_record(bound) is None,
270+
delay=0.5, max_attempts=20)

0 commit comments

Comments
 (0)