Skip to content

Commit 40ccd3d

Browse files
committed
(improvement) metadata: replace dict_factory with lightweight _RowView
Introduce _RowView, a __slots__-based read-only row wrapper that stores data as tuples with a shared column-name-to-index map, and _row_factory that creates these views. Replace dict_factory in _SchemaParser._handle_results and get_column_from_system_local (both reachable from the V4 code path). This eliminates per-row dict allocation during schema parsing. All rows from the same result set share a single index map object. Also refactor SchemaParserV4._build_keyspace_metadata_internal to read from the row without mutating it, since _RowView is read-only. Note: V22-only dict_factory call sites are left unchanged as they do not affect the V3/V4 code path (V3 and V4 fully override _query_all).
1 parent ccc4e40 commit 40ccd3d

1 file changed

Lines changed: 49 additions & 7 deletions

File tree

cassandra/metadata.py

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,46 @@
4646
from cassandra.tablets import Tablets
4747
from cassandra.util import maybe_add_timeout_to_query
4848

49+
50+
class _RowView(object):
51+
"""
52+
Lightweight read-only view over a row tuple, supporting dict-like access.
53+
Shares a single index map across all rows from the same result set,
54+
avoiding per-row dict allocation overhead.
55+
"""
56+
57+
__slots__ = ("_row", "_index_map")
58+
59+
def __init__(self, row, index_map):
60+
self._row = row
61+
self._index_map = index_map
62+
63+
def __getitem__(self, key):
64+
return self._row[self._index_map[key]]
65+
66+
def get(self, key, default=None):
67+
idx = self._index_map.get(key)
68+
if idx is not None:
69+
return self._row[idx]
70+
return default
71+
72+
def __contains__(self, key):
73+
return key in self._index_map
74+
75+
def __repr__(self):
76+
return repr({k: self._row[i] for k, i in self._index_map.items()})
77+
78+
79+
def _row_factory(colnames, rows):
80+
"""
81+
Lightweight replacement for dict_factory used internally by schema parsers.
82+
Returns a list of _RowView objects that support row["key"] and row.get("key")
83+
but store data as tuples with a shared column-name-to-index map.
84+
"""
85+
index_map = {name: i for i, name in enumerate(colnames)}
86+
return [_RowView(row, index_map) for row in rows]
87+
88+
4989
log = logging.getLogger(__name__)
5090

5191
cql_keywords = set((
@@ -1923,7 +1963,7 @@ def get_next_pages():
19231963
yield next_result.parsed_rows
19241964

19251965
result.parsed_rows += itertools.chain(*get_next_pages())
1926-
return dict_factory(result.column_names, result.parsed_rows) if result else []
1966+
return _row_factory(result.column_names, result.parsed_rows) if result else []
19271967
else:
19281968
raise result
19291969

@@ -2928,11 +2968,13 @@ def get_all_keyspaces(self):
29282968

29292969
@staticmethod
29302970
def _build_keyspace_metadata_internal(row):
2931-
# necessary fields that aren't int virtual ks
2932-
row["durable_writes"] = row.get("durable_writes", None)
2933-
row["replication"] = row.get("replication", {})
2934-
row["replication"]["class"] = row["replication"].get("class", None)
2935-
return super(SchemaParserV4, SchemaParserV4)._build_keyspace_metadata_internal(row)
2971+
# necessary fields that aren't in virtual ks — read without mutating the row
2972+
name = row["keyspace_name"]
2973+
durable_writes = row.get("durable_writes", None)
2974+
replication = dict(row.get("replication")) if "replication" in row else {}
2975+
replication_class = replication.pop("class") if "class" in replication else None
2976+
graph_engine = row.get("graph_engine", None)
2977+
return KeyspaceMetadata(name, durable_writes, replication_class, replication, graph_engine)
29362978

29372979

29382980
class SchemaParserDSE67(SchemaParserV4):
@@ -3326,7 +3368,7 @@ def get_column_from_system_local(connection, column_name: str, timeout, metadata
33263368
, timeout=timeout, fail_on_error=False)
33273369
if not success or not local_result.parsed_rows:
33283370
return ""
3329-
local_rows = dict_factory(local_result.column_names, local_result.parsed_rows)
3371+
local_rows = _row_factory(local_result.column_names, local_result.parsed_rows)
33303372
local_row = local_rows[0]
33313373
return local_row.get(column_name)
33323374

0 commit comments

Comments
 (0)