Skip to content

Commit 3d5cfbd

Browse files
committed
perf: batch-filter drop_tablets_by_host_id instead of triple pop
Replace the per-tablet reversed pop() loop (O(k*n) for each of three parallel lists) with a single-pass index filter that rebuilds the lists once. This avoids repeated list element shifting and scales better when many tablets are dropped at once. Benchmark (3 replicas/tablet, ~33% dropped): Tablets Old (triple-pop) New (batch-filter) Speedup 100 123 us 128 us ~1.0x 1,000 1,375 us 1,113 us 1.24x 10,000 25,429 us 13,079 us 1.94x Add 3 unit tests for drop_tablets_by_host_id covering matching, None host_id, and nonexistent host_id.
1 parent ee5f840 commit 3d5cfbd

2 files changed

Lines changed: 45 additions & 9 deletions

File tree

cassandra/tablets.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -96,15 +96,16 @@ def drop_tablets_by_host_id(self, host_id: Optional[UUID]):
9696
return
9797
with self._lock:
9898
for key, tablets in self._tablets.items():
99-
to_be_deleted = []
100-
for tablet_id, tablet in enumerate(tablets):
101-
if tablet.replica_contains_host_id(host_id):
102-
to_be_deleted.append(tablet_id)
103-
104-
for tablet_id in reversed(to_be_deleted):
105-
tablets.pop(tablet_id)
106-
self._first_tokens[key].pop(tablet_id)
107-
self._last_tokens[key].pop(tablet_id)
99+
# Filter in one pass instead of popping one-by-one (O(n) vs O(k*n))
100+
keep = [i for i, t in enumerate(tablets)
101+
if not t.replica_contains_host_id(host_id)]
102+
if len(keep) == len(tablets):
103+
continue # nothing to drop
104+
self._tablets[key] = [tablets[i] for i in keep]
105+
first = self._first_tokens[key]
106+
last = self._last_tokens[key]
107+
self._first_tokens[key] = [first[i] for i in keep]
108+
self._last_tokens[key] = [last[i] for i in keep]
108109

109110
def add_tablet(self, keyspace, table, tablet):
110111
with self._lock:

tests/unit/test_tablets.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,3 +184,38 @@ def gen():
184184
self.assertEqual(t._replica_dict, {u1: 3, u2: 7})
185185
self.assertTrue(t.replica_contains_host_id(u1))
186186
self.assertTrue(t.replica_contains_host_id(u2))
187+
188+
189+
class DropTabletsByHostIdTest(unittest.TestCase):
190+
"""Tests for Tablets.drop_tablets_by_host_id batch-filter path."""
191+
192+
def test_drop_removes_matching_tablets(self):
193+
u1 = UUID('12345678-1234-5678-1234-567812345678')
194+
u2 = UUID('87654321-4321-8765-4321-876543218765')
195+
t1 = Tablet(0, 100, [(u1, 0)])
196+
t2 = Tablet(100, 200, [(u2, 0)])
197+
t3 = Tablet(200, 300, [(u1, 1), (u2, 1)])
198+
tablets = Tablets({("ks", "tb"): [t1, t2, t3]})
199+
200+
tablets.drop_tablets_by_host_id(u1)
201+
202+
remaining = tablets._tablets[("ks", "tb")]
203+
self.assertEqual(len(remaining), 1)
204+
self.assertIs(remaining[0], t2)
205+
# Verify token index lists are in sync
206+
self.assertEqual(tablets._first_tokens[("ks", "tb")], [100])
207+
self.assertEqual(tablets._last_tokens[("ks", "tb")], [200])
208+
209+
def test_drop_none_host_id_is_noop(self):
210+
t1 = Tablet(0, 100, [("host1", 0)])
211+
tablets = Tablets({("ks", "tb"): [t1]})
212+
tablets.drop_tablets_by_host_id(None)
213+
self.assertEqual(len(tablets._tablets[("ks", "tb")]), 1)
214+
215+
def test_drop_nonexistent_host_id_is_noop(self):
216+
u1 = UUID('12345678-1234-5678-1234-567812345678')
217+
u_missing = UUID('aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee')
218+
t1 = Tablet(0, 100, [(u1, 0)])
219+
tablets = Tablets({("ks", "tb"): [t1]})
220+
tablets.drop_tablets_by_host_id(u_missing)
221+
self.assertEqual(len(tablets._tablets[("ks", "tb")]), 1)

0 commit comments

Comments
 (0)