Skip to content

Commit ba78aab

Browse files
committed
perf: eliminate bisect key= callback via parallel token index lists
Maintain parallel _first_tokens and _last_tokens dicts alongside _tablets, each mapping (keyspace, table) to a plain list[int]. This lets bisect_left run entirely in C on native ints instead of calling an attrgetter callback on every comparison during binary search. Follow-up to PR #757 which identified the opportunity: its own benchmarks showed bisect_left without key= is 2.7-5.7x faster than with key=attrgetter. Results (best-of-5, Python 3.14): get_tablet_for_key (hit): Tablets Before After Saved Speedup 10 293ns 216ns 78ns 1.36x 100 351ns 233ns 118ns 1.51x 1,000 448ns 267ns 181ns 1.68x 10,000 537ns 282ns 255ns 1.90x All three dicts are kept in sync by add_tablet, drop_tablets, and drop_tablets_by_host_id. The attrgetter imports are no longer needed and have been removed.
1 parent be2c13e commit ba78aab

1 file changed

Lines changed: 39 additions & 17 deletions

File tree

cassandra/tablets.py

Lines changed: 39 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,8 @@
11
from bisect import bisect_left
2-
from operator import attrgetter
32
from threading import Lock
43
from typing import Optional
54
from uuid import UUID
65

7-
# C-accelerated attrgetter avoids per-call lambda allocation overhead
8-
_get_first_token = attrgetter("first_token")
9-
_get_last_token = attrgetter("last_token")
10-
116

127
class Tablet(object):
138
"""
@@ -45,29 +40,45 @@ def replica_contains_host_id(self, uuid: UUID) -> bool:
4540

4641
class Tablets(object):
4742
_lock = None
48-
_tablets = {}
43+
_tablets = {} # (keyspace, table) -> list[Tablet]
44+
_first_tokens = {} # (keyspace, table) -> list[int]
45+
_last_tokens = {} # (keyspace, table) -> list[int]
4946

5047
def __init__(self, tablets):
5148
self._tablets = tablets
49+
# Build parallel token index lists from any pre-populated data
50+
self._first_tokens = {
51+
key: [t.first_token for t in tlist]
52+
for key, tlist in tablets.items()
53+
}
54+
self._last_tokens = {
55+
key: [t.last_token for t in tlist]
56+
for key, tlist in tablets.items()
57+
}
5258
self._lock = Lock()
5359

5460
def table_has_tablets(self, keyspace, table) -> bool:
5561
return bool(self._tablets.get((keyspace, table), []))
5662

5763
def get_tablet_for_key(self, keyspace, table, t):
58-
tablet = self._tablets.get((keyspace, table), [])
59-
if not tablet:
64+
key = (keyspace, table)
65+
last_tokens = self._last_tokens.get(key)
66+
if not last_tokens:
6067
return None
6168

62-
id = bisect_left(tablet, t.value, key=_get_last_token)
63-
if id < len(tablet) and t.value > tablet[id].first_token:
64-
return tablet[id]
69+
token_value = t.value
70+
id = bisect_left(last_tokens, token_value)
71+
if id < len(last_tokens) and token_value > self._first_tokens[key][id]:
72+
return self._tablets[key][id]
6573
return None
6674

6775
def drop_tablets(self, keyspace: str, table: Optional[str] = None):
6876
with self._lock:
6977
if table is not None:
70-
self._tablets.pop((keyspace, table), None)
78+
key = (keyspace, table)
79+
self._tablets.pop(key, None)
80+
self._first_tokens.pop(key, None)
81+
self._last_tokens.pop(key, None)
7182
return
7283

7384
to_be_deleted = []
@@ -77,6 +88,8 @@ def drop_tablets(self, keyspace: str, table: Optional[str] = None):
7788

7889
for key in to_be_deleted:
7990
del self._tablets[key]
91+
self._first_tokens.pop(key, None)
92+
self._last_tokens.pop(key, None)
8093

8194
def drop_tablets_by_host_id(self, host_id: Optional[UUID]):
8295
if host_id is None:
@@ -90,23 +103,32 @@ def drop_tablets_by_host_id(self, host_id: Optional[UUID]):
90103

91104
for tablet_id in reversed(to_be_deleted):
92105
tablets.pop(tablet_id)
106+
self._first_tokens[key].pop(tablet_id)
107+
self._last_tokens[key].pop(tablet_id)
93108

94109
def add_tablet(self, keyspace, table, tablet):
95110
with self._lock:
96-
tablets_for_table = self._tablets.setdefault((keyspace, table), [])
111+
key = (keyspace, table)
112+
tablets_for_table = self._tablets.setdefault(key, [])
113+
first_tokens = self._first_tokens.setdefault(key, [])
114+
last_tokens = self._last_tokens.setdefault(key, [])
97115

98116
# find first overlapping range
99-
start = bisect_left(tablets_for_table, tablet.first_token, key=_get_first_token)
100-
if start > 0 and tablets_for_table[start - 1].last_token > tablet.first_token:
117+
start = bisect_left(first_tokens, tablet.first_token)
118+
if start > 0 and last_tokens[start - 1] > tablet.first_token:
101119
start = start - 1
102120

103121
# find last overlapping range
104-
end = bisect_left(tablets_for_table, tablet.last_token, key=_get_last_token)
105-
if end < len(tablets_for_table) and tablets_for_table[end].first_token >= tablet.last_token:
122+
end = bisect_left(last_tokens, tablet.last_token)
123+
if end < len(last_tokens) and first_tokens[end] >= tablet.last_token:
106124
end = end - 1
107125

108126
if start <= end:
109127
del tablets_for_table[start:end + 1]
128+
del first_tokens[start:end + 1]
129+
del last_tokens[start:end + 1]
110130

111131
tablets_for_table.insert(start, tablet)
132+
first_tokens.insert(start, tablet.first_token)
133+
last_tokens.insert(start, tablet.last_token)
112134

0 commit comments

Comments
 (0)