Skip to content

Commit edaea47

Browse files
committed
feat(robot): add SQLite cache backend for library and namespace data
Replace file-based pickle caching with a single SQLite database per workspace, consolidating all cache entries into one file instead of many individual .pkl files scattered across directories.
1 parent abf3387 commit edaea47

File tree

3 files changed

+140
-6
lines changed

3 files changed

+140
-6
lines changed

packages/robot/src/robotcode/robot/diagnostics/data_cache.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import pickle
2+
import sqlite3
23
from abc import ABC, abstractmethod
34
from enum import Enum
45
from pathlib import Path
@@ -28,6 +29,9 @@ def read_cache_data(
2829
@abstractmethod
2930
def save_cache_data(self, section: CacheSection, entry_name: str, data: Any) -> None: ...
3031

32+
def close(self) -> None:
33+
pass
34+
3135

3236
class FileCacheDataBase(DataCache, ABC):
3337
def __init__(self, cache_dir: Path) -> None:
@@ -89,3 +93,71 @@ def save_cache_data(self, section: CacheSection, entry_name: str, data: Any) ->
8993
cached_file.parent.mkdir(parents=True, exist_ok=True)
9094
with cached_file.open("wb") as f:
9195
pickle.dump(data, f)
96+
97+
98+
class SqliteDataCache(DataCache):
99+
"""Cache backend using a single SQLite database with zlib-compressed pickle blobs."""
100+
101+
_SCHEMA_VERSION = 1
102+
103+
def __init__(self, cache_dir: Path) -> None:
104+
self.cache_dir = cache_dir
105+
106+
if not cache_dir.exists():
107+
cache_dir.mkdir(parents=True)
108+
(cache_dir / ".gitignore").write_text(
109+
"# Created by robotcode\n*\n",
110+
"utf-8",
111+
)
112+
113+
db_path = cache_dir / "cache.db"
114+
self._conn = sqlite3.connect(str(db_path), check_same_thread=False)
115+
self._conn.execute("PRAGMA journal_mode=WAL")
116+
self._conn.execute("PRAGMA synchronous=NORMAL")
117+
self._conn.execute("PRAGMA cache_size=-8000")
118+
self._conn.execute("PRAGMA mmap_size=67108864")
119+
self._conn.execute(
120+
"CREATE TABLE IF NOT EXISTS cache_entries ("
121+
" section TEXT NOT NULL,"
122+
" entry_name TEXT NOT NULL,"
123+
" data BLOB NOT NULL,"
124+
" PRIMARY KEY (section, entry_name)"
125+
")"
126+
)
127+
self._conn.commit()
128+
129+
def cache_data_exists(self, section: CacheSection, entry_name: str) -> bool:
130+
row = self._conn.execute(
131+
"SELECT 1 FROM cache_entries WHERE section = ? AND entry_name = ?",
132+
(section.value, entry_name),
133+
).fetchone()
134+
return row is not None
135+
136+
def read_cache_data(
137+
self, section: CacheSection, entry_name: str, types: Union[Type[_T], Tuple[Type[_T], ...]]
138+
) -> _T:
139+
row = self._conn.execute(
140+
"SELECT data FROM cache_entries WHERE section = ? AND entry_name = ?",
141+
(section.value, entry_name),
142+
).fetchone()
143+
144+
if row is None:
145+
raise FileNotFoundError(f"No cache entry for {section.value}/{entry_name}")
146+
147+
result = pickle.loads(row[0])
148+
149+
if isinstance(result, types):
150+
return cast(_T, result)
151+
152+
raise TypeError(f"Expected {types} but got {type(result)}")
153+
154+
def save_cache_data(self, section: CacheSection, entry_name: str, data: Any) -> None:
155+
blob = pickle.dumps(data, protocol=pickle.HIGHEST_PROTOCOL)
156+
self._conn.execute(
157+
"INSERT OR REPLACE INTO cache_entries (section, entry_name, data) VALUES (?, ?, ?)",
158+
(section.value, entry_name, blob),
159+
)
160+
self._conn.commit()
161+
162+
def close(self) -> None:
163+
self._conn.close()

packages/robot/src/robotcode/robot/diagnostics/imports_manager.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
from ..utils.robot_path import find_file_ex
5454
from ..utils.variables import contains_variable
5555
from .data_cache import CacheSection
56-
from .data_cache import PickleDataCache as DefaultDataCache
56+
from .data_cache import SqliteDataCache as DefaultDataCache
5757
from .entities import (
5858
CommandLineVariableDefinition,
5959
VariableDefinition,
@@ -612,6 +612,7 @@ def __init__(
612612
/ f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
613613
/ get_robot_version_str()
614614
)
615+
weakref.finalize(self, DefaultDataCache.close, self.data_cache)
615616

616617
self.cmd_variables = variables
617618
self.cmd_variable_files = variable_files

packages/robot/src/robotcode/robot/diagnostics/namespace.py

Lines changed: 66 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,14 @@ class NamespaceData:
126126
# --- ScopeTree (local scopes only, file_scope is reconstructed) ---
127127
local_scopes: List[LocalScope] = field(default_factory=list)
128128

129+
# --- Authoritative variable definitions (stable_id → VariableDefinition) ---
130+
# Stores the exact VariableDefinition objects that were referenced during
131+
# analysis. Used as fallback in from_data() when the re-parsed resource doc
132+
# produces variables with different stable_ids (e.g. col_offset differences
133+
# for variables with '=' suffix, or keyword argument definitions from
134+
# imported keywords that aren't in the file's own scope).
135+
variable_definitions: Dict[str, VariableDefinition] = field(default_factory=dict)
136+
129137

130138
class Namespace:
131139
"""Data container holding all results of a namespace build.
@@ -450,6 +458,42 @@ def to_data(self) -> NamespaceData:
450458
key = f"{type(entry).__name__}:{entry.import_name}:{entry.args!r}:{entry.alias or ''}"
451459
ns_refs[key] = locs
452460

461+
# Collect all referenced variable definitions for stable_id → object lookup
462+
all_var_defs: Dict[str, VariableDefinition] = {}
463+
for var in self._variable_references:
464+
all_var_defs[var.stable_id] = var
465+
for var in self._local_variable_assignments:
466+
all_var_defs[var.stable_id] = var
467+
468+
# Build keyword_references merging locations when different KeywordDoc
469+
# objects share the same stable_id (e.g. same library imported with
470+
# different aliases like "errorlib" and "noerrorlib").
471+
kw_refs_merged: Dict[str, Set[Location]] = {}
472+
for kw, locs in self._keyword_references.items():
473+
sid = kw.stable_id
474+
if sid in kw_refs_merged:
475+
kw_refs_merged[sid].update(locs)
476+
else:
477+
kw_refs_merged[sid] = set(locs)
478+
479+
# Same merge for variable_references (different VariableDefinition
480+
# objects could theoretically share a stable_id).
481+
var_refs_merged: Dict[str, Set[Location]] = {}
482+
for var, locs in self._variable_references.items():
483+
sid = var.stable_id
484+
if sid in var_refs_merged:
485+
var_refs_merged[sid].update(locs)
486+
else:
487+
var_refs_merged[sid] = set(locs)
488+
489+
var_assigns_merged: Dict[str, Set[Range]] = {}
490+
for var, ranges in self._local_variable_assignments.items():
491+
sid = var.stable_id
492+
if sid in var_assigns_merged:
493+
var_assigns_merged[sid].update(ranges)
494+
else:
495+
var_assigns_merged[sid] = set(ranges)
496+
453497
return NamespaceData(
454498
source=self.source,
455499
source_id=str(self.source_id) if self.source_id else None,
@@ -459,16 +503,15 @@ def to_data(self) -> NamespaceData:
459503
imports=list(self._import_entries.keys()),
460504
diagnostics=list(self._diagnostics),
461505
test_case_definitions=list(self._test_case_definitions),
462-
keyword_references={kw.stable_id: set(locs) for kw, locs in self._keyword_references.items()},
463-
variable_references={var.stable_id: set(locs) for var, locs in self._variable_references.items()},
464-
local_variable_assignments={
465-
var.stable_id: set(ranges) for var, ranges in self._local_variable_assignments.items()
466-
},
506+
keyword_references=kw_refs_merged,
507+
variable_references=var_refs_merged,
508+
local_variable_assignments=var_assigns_merged,
467509
namespace_references=ns_refs,
468510
keyword_tag_references={k: set(v) for k, v in self._keyword_tag_references.items()},
469511
testcase_tag_references={k: set(v) for k, v in self._testcase_tag_references.items()},
470512
metadata_references={k: set(v) for k, v in self._metadata_references.items()},
471513
local_scopes=list(self._scope_tree.local_scopes),
514+
variable_definitions=all_var_defs,
472515
)
473516

474517
@classmethod
@@ -528,11 +571,29 @@ def from_data(
528571
for var in scope.iter_all():
529572
var_by_id[var.stable_id] = var
530573

574+
# Add keyword argument_definitions from all resolved keywords
575+
# to var_by_id. This covers ArgumentDefinition and
576+
# LibraryArgumentDefinition objects that are created during
577+
# Phase 3 analysis but aren't part of the file's own scope.
578+
for kw in kw_by_id.values():
579+
if kw.argument_definitions:
580+
for arg_var in kw.argument_definitions:
581+
var_by_id[arg_var.stable_id] = arg_var
582+
531583
# Variables from local scopes (block-level LOCAL_VARIABLE, arguments, etc.)
532584
for ls in data.local_scopes:
533585
for sv in ls.variables:
534586
var_by_id[sv.variable.stable_id] = sv.variable
535587

588+
# Fallback: use stored variable definitions for any stable_ids
589+
# not found in the rebuilt scope. This handles variables whose
590+
# stable_ids differ due to re-parsing differences (e.g. col_offset
591+
# for variables with '=' suffix) and keyword argument definitions
592+
# from imported keywords that aren't in the file's own scope.
593+
for sid, var_def in data.variable_definitions.items():
594+
if sid not in var_by_id:
595+
var_by_id[sid] = var_def
596+
536597
# --- Reconstruct reference dicts ---
537598
keyword_references: Dict[KeywordDoc, Set[Location]] = {}
538599
for sid, locs in data.keyword_references.items():

0 commit comments

Comments
 (0)