Skip to content

Commit 5d21baf

Browse files
committed
perf(robot): cache KeywordMatcher and add dict-index to KeywordStore
- Cache KeywordMatcher on KeywordDoc via lazy-init _matcher slot, eliminating 7.5M redundant instantiations per analysis run - Add dict-index (_index + _embedded) to KeywordStore for O(1) keyword lookup by normalized name; linear scan only for embedded keywords - Add __getstate__/__setstate__ to KeywordDoc and KeywordStore to exclude transient fields (_matcher, _index, _embedded, parent, _hash_value, _stable_id) from pickle serialization - Restore parent references in LibraryDoc.__setstate__ via _update_keywords after deserialization - Remove unused nosave metadata from argument_definitions, parent, and keyword_doc fields - Fix Application.keyboard_interrupt to use self.exit() for consistent shutdown behavior Measured improvement (cProfile, 1065 Robot files): - Warm no-NS: 37.46s -> 29.25s (-22%) - Cold no-NS: 42.94s -> 33.39s (-22%) - Keyword matching: ~9.0s -> ~0.5s (-94%) - Function calls: 118M -> 81M (-31%)
1 parent fa83b30 commit 5d21baf

File tree

3 files changed

+96
-10
lines changed

3 files changed

+96
-10
lines changed

packages/plugin/src/robotcode/plugin/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,7 @@ def echo_via_pager(
333333

334334
def keyboard_interrupt(self) -> None:
335335
self.verbose("Aborted!", file=sys.stderr)
336-
sys.exit(253)
336+
self.exit(253, fast=True)
337337

338338
def exit(self, code: int = 0, fast: bool = False) -> None:
339339
self.verbose(f"Exit with code {code}")

packages/robot/src/robotcode/robot/diagnostics/entities.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ class CommandLineVariableDefinition(GlobalVariableDefinition):
244244
@dataclass(slots=True, eq=False)
245245
class ArgumentDefinition(LocalVariableDefinition):
246246
type: VariableDefinitionType = VariableDefinitionType.ARGUMENT
247-
keyword_doc: Optional["KeywordDoc"] = field(default=None, compare=False, metadata={"nosave": True})
247+
keyword_doc: Optional["KeywordDoc"] = field(default=None, compare=False)
248248

249249

250250
@dataclass(slots=True, eq=False)

packages/robot/src/robotcode/robot/diagnostics/library_doc.py

Lines changed: 94 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -629,9 +629,7 @@ class KeywordDoc(SourceEntity):
629629
name_token: Optional[Token] = field(default=None, compare=False)
630630
arguments: List[ArgumentInfo] = field(default_factory=list, compare=False)
631631
arguments_spec: Optional[ArgumentSpec] = field(default=None, compare=False)
632-
argument_definitions: Optional[List[ArgumentDefinition]] = field(
633-
default=None, compare=False, metadata={"nosave": True}
634-
)
632+
argument_definitions: Optional[List[ArgumentDefinition]] = field(default=None, compare=False)
635633
doc: str = field(default="", compare=False)
636634
tags: List[str] = field(default_factory=list)
637635
type: str = "keyword"
@@ -648,10 +646,11 @@ class KeywordDoc(SourceEntity):
648646
deprecated: bool = field(default=False, compare=False)
649647
return_type: Optional[str] = field(default=None, compare=False)
650648

651-
parent: Optional[LibraryDoc] = field(default=None, init=False, metadata={"nosave": True})
649+
parent: Optional[LibraryDoc] = field(default=None, init=False)
652650
_hash_value: int = field(default=0, init=False, compare=False, hash=False, repr=False)
653651
_stable_id: str = field(default="", init=False, compare=False, hash=False, repr=False)
654652
_parent_stable_id: str = field(default="", init=False, compare=False, hash=False, repr=False)
653+
_matcher: Optional[KeywordMatcher] = field(default=None, init=False, compare=False, hash=False, repr=False)
655654

656655
def _get_argument_definitions(self) -> Optional[List[ArgumentDefinition]]:
657656
return (
@@ -718,6 +717,40 @@ def __post_init__(self) -> None:
718717
)
719718
)
720719

720+
def _all_slots(self) -> Iterator[str]:
721+
for cls in type(self).__mro__:
722+
yield from getattr(cls, "__slots__", ())
723+
724+
_EXCLUDED_FROM_STATE = frozenset({"_matcher", "parent", "_hash_value", "_stable_id"})
725+
726+
def __getstate__(self) -> Dict[str, Any]:
727+
return {slot: getattr(self, slot) for slot in self._all_slots() if slot not in self._EXCLUDED_FROM_STATE}
728+
729+
def __setstate__(self, state: Dict[str, Any]) -> None:
730+
for slot in self._all_slots():
731+
setattr(self, slot, state.get(slot, None))
732+
self._matcher = None
733+
self.parent = None
734+
self._stable_id = ""
735+
self._hash_value = hash(
736+
(
737+
self.name,
738+
self.longname,
739+
self.source,
740+
self.line_no,
741+
self.col_offset,
742+
self.end_line_no,
743+
self.end_col_offset,
744+
self.type,
745+
self.libname,
746+
self.libtype,
747+
self.is_initializer,
748+
self.is_error_handler,
749+
self.doc_format,
750+
tuple(self.tags) if self.tags else (),
751+
)
752+
)
753+
721754
@property
722755
def stable_id(self) -> str:
723756
if not self._stable_id:
@@ -737,7 +770,9 @@ def is_embedded(self) -> bool:
737770

738771
@property
739772
def matcher(self) -> KeywordMatcher:
740-
return KeywordMatcher(self.name)
773+
if self._matcher is None:
774+
self._matcher = KeywordMatcher(self.name)
775+
return self._matcher
741776

742777
@property
743778
def is_deprecated(self) -> bool:
@@ -985,9 +1020,44 @@ class KeywordStore:
9851020
source: Optional[str] = None
9861021
source_type: Optional[str] = None
9871022
keywords: List[KeywordDoc] = field(default_factory=list)
1023+
_index: Optional[Dict[str, List[KeywordDoc]]] = field(default=None, init=False, compare=False, repr=False)
1024+
_embedded: Optional[List[KeywordDoc]] = field(default=None, init=False, compare=False, repr=False)
1025+
1026+
def _ensure_index(self) -> Tuple[Dict[str, List[KeywordDoc]], List[KeywordDoc]]:
1027+
if self._index is not None:
1028+
return self._index, self._embedded # type: ignore[return-value]
1029+
index: Dict[str, List[KeywordDoc]] = {}
1030+
embedded: List[KeywordDoc] = []
1031+
for kw in self.keywords:
1032+
if kw.matcher.embedded_arguments is not None:
1033+
embedded.append(kw)
1034+
else:
1035+
key = kw.matcher.normalized_name
1036+
bucket = index.get(key)
1037+
if bucket is None:
1038+
index[key] = [kw]
1039+
else:
1040+
bucket.append(kw)
1041+
self._index = index
1042+
self._embedded = embedded
1043+
return index, embedded
1044+
1045+
def __getstate__(self) -> Dict[str, Any]:
1046+
return {
1047+
"source": self.source,
1048+
"source_type": self.source_type,
1049+
"keywords": self.keywords,
1050+
}
1051+
1052+
def __setstate__(self, state: Dict[str, Any]) -> None:
1053+
self.source = state.get("source")
1054+
self.source_type = state.get("source_type")
1055+
self.keywords = state.get("keywords", [])
1056+
self._index = None
1057+
self._embedded = None
9881058

9891059
def __getitem__(self, key: str) -> KeywordDoc:
990-
items = [v for v in self.keywords if v.matcher == key]
1060+
items = list(self.iter_all(key))
9911061

9921062
if not items:
9931063
raise KeyError
@@ -1014,7 +1084,15 @@ def __getitem__(self, key: str) -> KeywordDoc:
10141084
)
10151085

10161086
def __contains__(self, _x: object) -> bool:
1017-
return any(v.matcher == _x for v in self.keywords)
1087+
if type(_x) is KeywordMatcher:
1088+
_x = _x.name
1089+
if type(_x) is not str:
1090+
return False
1091+
index, embedded = self._ensure_index()
1092+
normalized = normalize(_x)
1093+
if index.get(normalized):
1094+
return True
1095+
return any(kw.matcher.match_string(_x) for kw in embedded)
10181096

10191097
def __len__(self) -> int:
10201098
return len(self.keywords)
@@ -1044,7 +1122,13 @@ def get_all(self, key: str) -> List[KeywordDoc]:
10441122
return list(self.iter_all(key))
10451123

10461124
def iter_all(self, key: str) -> Iterable[KeywordDoc]:
1047-
return (v for v in self.keywords if v.matcher.match_string(key))
1125+
index, embedded = self._ensure_index()
1126+
normalized = normalize(key)
1127+
matches = index.get(normalized, [])
1128+
embedded_matches = [kw for kw in embedded if kw.matcher.match_string(key)]
1129+
if embedded_matches:
1130+
return [*embedded_matches, *matches]
1131+
return matches
10481132

10491133

10501134
@dataclass(slots=True)
@@ -1191,6 +1275,8 @@ def __setstate__(self, state: Dict[str, Any]) -> None:
11911275
self.member_name,
11921276
)
11931277
)
1278+
self._update_keywords(self._inits)
1279+
self._update_keywords(self._keywords)
11941280

11951281
def __eq__(self, other: object) -> bool:
11961282
if not isinstance(other, LibraryDoc):

0 commit comments

Comments
 (0)