Skip to content

Commit 23a538c

Browse files
committed
feat(robot): introduce SemanticModel for richer Robot Framework analysis (Part 1)
Introduce the first part of an opt-in SemanticModel that provides deeper static analysis of Robot Framework files, available behind a feature flag. Enable via robot.toml: [tool.robotcode-analyze] semantic-model = true or in VS Code settings: "robotcode.experimental.semanticModel": true - Nested variable resolution: variables like ${DICT_${key}} are now statically resolved where possible, with diagnostics when resolution fails - Richer semantic highlighting: token classification driven by the semantic model instead of syntactic heuristics - RF 7.4 type hint support: KeywordName/KeywordArgument annotations are recognized for run-keyword detection - Log output to verify which analyzer is active - Robust deserialization of cached library data when new fields are added The SemanticModel is disabled by default. Further parts will add additional LSP feature migrations and extended analysis capabilities.
1 parent ceef00e commit 23a538c

File tree

6,553 files changed

+47302
-211923
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

6,553 files changed

+47302
-211923
lines changed

docs/03_reference/cli.md

Lines changed: 232 additions & 62 deletions
Large diffs are not rendered by default.

docs/03_reference/config.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3156,6 +3156,27 @@ Examples:
31563156
warning = ["VariableNotFound", "multiple-keywords"]
31573157
```
31583158

3159+
## tool.robotcode-analyze.semantic-model
3160+
3161+
Type: `bool | None`
3162+
3163+
Enable the experimental Semantic Model for code analysis. When enabled, LSP features
3164+
use the new SemanticAnalyzer instead of the legacy NamespaceAnalyzer. This provides
3165+
richer analysis including static resolution of nested variables and improved semantic
3166+
highlighting.
3167+
3168+
**This is experimental and may change without notice.**
3169+
3170+
Can also be set via VS Code setting `robotcode.experimental.semanticModel`.
3171+
If set in both places, either `true` value enables the feature.
3172+
3173+
Examples:
3174+
3175+
```toml
3176+
[tool.robotcode-analyze]
3177+
semantic-model = true
3178+
```
3179+
31593180
## variable-files
31603181

31613182
Type: `list[str | StringExpression] | None`

etc/robot.toml.json

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,15 @@
144144
"default": null,
145145
"description": "Defines the modifiers for the analysis.\n\nExamples:\n\n```toml\n[tool.robotcode-analyze.modifiers]\nignore = [\"VariableNotFound\"]\nhint = [\"KeywordNotFound\"]\ninformation = [\"MultipleKeywords\"]\n```\n",
146146
"title": "Modifiers"
147+
},
148+
"semantic-model": {
149+
"default": null,
150+
"description": "Enable the experimental Semantic Model for code analysis. When enabled, LSP features\nuse the new SemanticAnalyzer instead of the legacy NamespaceAnalyzer. This provides\nricher analysis including static resolution of nested variables and improved semantic\nhighlighting.\n\n**This is experimental and may change without notice.**\n\nCan also be set via VS Code setting `robotcode.experimental.semanticModel`.\nIf set in both places, either `true` value enables the feature.\n\nExamples:\n\n```toml\n[tool.robotcode-analyze]\nsemantic-model = true\n```\n",
151+
"title": "Semantic model",
152+
"type": [
153+
"boolean",
154+
"null"
155+
]
147156
}
148157
},
149158
"title": "AnalyzeConfig",
@@ -162,6 +171,15 @@
162171
"null"
163172
]
164173
},
174+
"cache-namespaces": {
175+
"default": null,
176+
"description": "Enable or disable caching of fully analyzed namespace data to disk.\nCan speed up startup for large projects by skipping re-analysis of unchanged files.\nDefaults to enabled.\n\nExamples:\n\n```toml\n[tool.robotcode-analyze.cache]\ncache_namespaces = false\n```\n",
177+
"title": "Cache namespaces",
178+
"type": [
179+
"boolean",
180+
"null"
181+
]
182+
},
165183
"extend-ignore-arguments-for-library": {
166184
"default": null,
167185
"description": "Extend the ignore arguments for library settings.",

package.json

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1284,6 +1284,17 @@
12841284
"scope": "resource"
12851285
}
12861286
}
1287+
},
1288+
{
1289+
"id": "robotcode.experimental",
1290+
"properties": {
1291+
"robotcode.experimental.semanticModel": {
1292+
"type": "boolean",
1293+
"default": false,
1294+
"markdownDescription": "Enable the experimental Semantic Model for code analysis. When enabled, LSP features use the new SemanticAnalyzer instead of the legacy NamespaceAnalyzer. **This is experimental and may change without notice.**",
1295+
"scope": "resource"
1296+
}
1297+
}
12871298
}
12881299
],
12891300
"commands": [

packages/analyze/src/robotcode/analyze/config.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,27 @@ class AnalyzeConfig(BaseOptions):
347347
description="Extend the global library search order setting."
348348
)
349349

350+
semantic_model: Optional[bool] = field(
351+
description="""\
352+
Enable the experimental Semantic Model for code analysis. When enabled, LSP features
353+
use the new SemanticAnalyzer instead of the legacy NamespaceAnalyzer. This provides
354+
richer analysis including static resolution of nested variables and improved semantic
355+
highlighting.
356+
357+
**This is experimental and may change without notice.**
358+
359+
Can also be set via VS Code setting `robotcode.experimental.semanticModel`.
360+
If set in both places, either `true` value enables the feature.
361+
362+
Examples:
363+
364+
```toml
365+
[tool.robotcode-analyze]
366+
semantic-model = true
367+
```
368+
""",
369+
)
370+
350371
load_library_timeout: Optional[int] = field(
351372
description="""\
352373
Specifies the timeout in seconds for loading (importing) libraries and variable files during
@@ -381,6 +402,7 @@ class AnalyzeConfig(BaseOptions):
381402
def to_workspace_analysis_config(self) -> WorkspaceAnalysisConfig:
382403
return WorkspaceAnalysisConfig(
383404
exclude_patterns=self.exclude_patterns or [],
405+
semantic_model=self.semantic_model if self.semantic_model is not None else False,
384406
cache=(
385407
WorkspaceCacheConfig(
386408
# TODO savelocation

packages/language_server/src/robotcode/language_server/robotframework/parts/hover.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ def _hover_default(self, nodes: List[ast.AST], document: TextDocument, position:
123123
)
124124
value = None
125125
real_value = None
126-
if found_range is not None:
126+
if found_range is not None and (highlight_range is None or found_range.is_in_range(highlight_range)):
127127
highlight_range = found_range
128128
if variable.has_value or variable.resolvable:
129129
if (

packages/language_server/src/robotcode/language_server/robotframework/parts/semantic_tokens.py

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@
6767
)
6868
from robotcode.robot.diagnostics.model_helper import ModelHelper
6969
from robotcode.robot.diagnostics.namespace import Namespace
70+
from robotcode.robot.diagnostics.semantic_analyzer.enums import TokenKind
71+
from robotcode.robot.diagnostics.semantic_analyzer.model import SemanticModel
7072
from robotcode.robot.utils import RF_VERSION
7173
from robotcode.robot.utils.ast import (
7274
cached_isinstance,
@@ -1032,11 +1034,131 @@ class SemanticTokenGenerator:
10321034
creating and managing its own token mapper and keyword analyzer.
10331035
"""
10341036

1037+
# Mapping from SemanticModel TokenKind to LSP semantic token types.
1038+
# Used by collect_tokens_from_model() for the new model-based path.
1039+
_TOKEN_KIND_TO_SEM_TOKEN: Dict[TokenKind, Tuple[AnyTokenType, Optional[Set[AnyTokenModifier]]]] = {
1040+
TokenKind.KEYWORD: (RobotSemTokenTypes.KEYWORD, None),
1041+
TokenKind.BDD_PREFIX: (RobotSemTokenTypes.BDD_PREFIX, None),
1042+
TokenKind.NAMESPACE: (RobotSemTokenTypes.NAMESPACE, None),
1043+
TokenKind.VARIABLE: (RobotSemTokenTypes.VARIABLE, None),
1044+
TokenKind.VARIABLE_NOT_FOUND: (RobotSemTokenTypes.VARIABLE, None),
1045+
TokenKind.VARIABLE_PREFIX: (RobotSemTokenTypes.VARIABLE_BEGIN, None),
1046+
TokenKind.VARIABLE_OPEN_BRACE: (RobotSemTokenTypes.VARIABLE_BEGIN, None),
1047+
TokenKind.VARIABLE_CLOSE_BRACE: (RobotSemTokenTypes.VARIABLE_END, None),
1048+
TokenKind.VARIABLE_BASE: (RobotSemTokenTypes.VARIABLE, None),
1049+
TokenKind.VARIABLE_EXTENDED: (RobotSemTokenTypes.VARIABLE, None),
1050+
TokenKind.VARIABLE_TYPE_SEPARATOR: (SemanticTokenTypes.OPERATOR, None),
1051+
TokenKind.VARIABLE_TYPE_HINT: (SemanticTokenTypes.TYPE, None),
1052+
TokenKind.VARIABLE_DEFAULT_SEPARATOR: (SemanticTokenTypes.OPERATOR, None),
1053+
TokenKind.VARIABLE_DEFAULT_VALUE: (RobotSemTokenTypes.ARGUMENT, None),
1054+
TokenKind.VARIABLE_PATTERN_SEPARATOR: (SemanticTokenTypes.OPERATOR, None),
1055+
TokenKind.VARIABLE_PATTERN: (RobotSemTokenTypes.ARGUMENT, None),
1056+
TokenKind.VARIABLE_ASSIGN_MARK: (RobotSemTokenTypes.VARIABLE, None),
1057+
TokenKind.VARIABLE_EXPRESSION_OPEN: (RobotSemTokenTypes.EXPRESSION_BEGIN, None),
1058+
TokenKind.VARIABLE_EXPRESSION_CLOSE: (RobotSemTokenTypes.EXPRESSION_END, None),
1059+
TokenKind.PYTHON_EXPRESSION: (RobotSemTokenTypes.VARIABLE_EXPRESSION, None),
1060+
TokenKind.PYTHON_VARIABLE_REF: (RobotSemTokenTypes.VARIABLE, None),
1061+
TokenKind.VARIABLE_INDEX: (RobotSemTokenTypes.VARIABLE, None),
1062+
TokenKind.VARIABLE_INDEX_OPEN: (RobotSemTokenTypes.VARIABLE_BEGIN, None),
1063+
TokenKind.VARIABLE_INDEX_CLOSE: (RobotSemTokenTypes.VARIABLE_END, None),
1064+
TokenKind.VARIABLE_INDEX_CONTENT: (RobotSemTokenTypes.VARIABLE, None),
1065+
TokenKind.TEXT_FRAGMENT: (RobotSemTokenTypes.ARGUMENT, None),
1066+
TokenKind.ARGUMENT: (RobotSemTokenTypes.ARGUMENT, None),
1067+
TokenKind.NAMED_ARGUMENT_NAME: (RobotSemTokenTypes.NAMED_ARGUMENT, None),
1068+
TokenKind.NAMED_ARGUMENT_VALUE: (RobotSemTokenTypes.ARGUMENT, None),
1069+
TokenKind.CONTROL_FLOW: (RobotSemTokenTypes.CONTROL_FLOW, None),
1070+
TokenKind.CONDITION: (RobotSemTokenTypes.ARGUMENT, None),
1071+
TokenKind.TEST_NAME: (RobotSemTokenTypes.TESTCASE_NAME, {SemanticTokenModifiers.DECLARATION}),
1072+
TokenKind.KEYWORD_NAME: (RobotSemTokenTypes.KEYWORD_NAME, {SemanticTokenModifiers.DECLARATION}),
1073+
TokenKind.VARIABLE_NAME: (RobotSemTokenTypes.VARIABLE, {SemanticTokenModifiers.DECLARATION}),
1074+
TokenKind.SETTING_NAME: (RobotSemTokenTypes.SETTING, None),
1075+
TokenKind.IMPORT_NAME: (RobotSemTokenTypes.SETTING_IMPORT, None),
1076+
TokenKind.HEADER: (RobotSemTokenTypes.HEADER, None),
1077+
TokenKind.SEPARATOR: (RobotSemTokenTypes.SEPARATOR, None),
1078+
TokenKind.CONTINUATION: (RobotSemTokenTypes.CONTINUATION, None),
1079+
TokenKind.COMMENT: (SemanticTokenTypes.COMMENT, None),
1080+
TokenKind.TAG: (RobotSemTokenTypes.ARGUMENT, None),
1081+
TokenKind.CONFIG: (RobotSemTokenTypes.CONFIG, None),
1082+
TokenKind.ERROR: (RobotSemTokenTypes.ERROR, None),
1083+
}
1084+
10351085
def __init__(self) -> None:
10361086
"""Initialize the generator with its own dependencies."""
10371087
self.token_mapper = SemanticTokenMapper()
10381088
self.keyword_analyzer = KeywordTokenAnalyzer(self.token_mapper)
10391089

1090+
def collect_tokens_from_model(
1091+
self,
1092+
document: TextDocument,
1093+
semantic_model: SemanticModel,
1094+
range: Optional[Range],
1095+
token_types: Sequence[Enum],
1096+
token_modifiers: Sequence[Enum],
1097+
) -> Union[SemanticTokens, SemanticTokensPartialResult, None]:
1098+
"""Collect semantic tokens from the pre-built SemanticModel.
1099+
1100+
This is the Tier 1 model-based path, used when the semantic model
1101+
feature flag is enabled. It maps SemanticModel TokenKind values
1102+
to LSP semantic token types via _TOKEN_KIND_TO_SEM_TOKEN.
1103+
"""
1104+
data: List[int] = []
1105+
last_line = 0
1106+
last_col = 0
1107+
lines = document.get_lines()
1108+
1109+
for stmt in semantic_model.statements:
1110+
check_current_task_canceled()
1111+
1112+
for token in stmt.tokens:
1113+
if token.length == 0:
1114+
continue
1115+
1116+
# Range filtering
1117+
token_line_0 = token.line - 1
1118+
if range is not None:
1119+
if token_line_0 < range.start.line:
1120+
continue
1121+
if token_line_0 > range.end.line:
1122+
break
1123+
1124+
sem_info = self._TOKEN_KIND_TO_SEM_TOKEN.get(token.kind)
1125+
if sem_info is None:
1126+
continue
1127+
1128+
sem_type, sem_mods = sem_info
1129+
1130+
# Convert to UTF-16 positions
1131+
token_range = range_to_utf16(
1132+
lines,
1133+
Range(
1134+
start=Position(line=token_line_0, character=token.col_offset),
1135+
end=Position(line=token_line_0, character=token.col_offset + token.length),
1136+
),
1137+
)
1138+
1139+
token_col_offset = token_range.start.character
1140+
token_length = token_range.end.character - token_range.start.character
1141+
1142+
current_line = token_line_0
1143+
1144+
data.append(current_line - last_line)
1145+
1146+
if last_line != current_line:
1147+
last_col = token_col_offset
1148+
data.append(last_col)
1149+
else:
1150+
delta = token_col_offset - last_col
1151+
data.append(delta)
1152+
last_col += delta
1153+
1154+
last_line = current_line
1155+
1156+
data.append(token_length)
1157+
data.append(token_types.index(sem_type))
1158+
data.append(reduce(operator.or_, [2 ** token_modifiers.index(e) for e in sem_mods]) if sem_mods else 0)
1159+
1160+
return SemanticTokens(data=data)
1161+
10401162
def _get_tokens_after(self, tokens: List[Token], target_token: Token) -> List[Token]:
10411163
"""Get all tokens after target token efficiently.
10421164
@@ -1640,6 +1762,16 @@ def _collect(
16401762
model = self.parent.documents_cache.get_model(document)
16411763
namespace = self.parent.documents_cache.get_namespace(document)
16421764

1765+
semantic_model = namespace.semantic_model
1766+
if semantic_model is not None:
1767+
return self.token_generator.collect_tokens_from_model(
1768+
document,
1769+
semantic_model,
1770+
range,
1771+
self.parent.semantic_tokens.token_types,
1772+
self.parent.semantic_tokens.token_modifiers,
1773+
)
1774+
16431775
builtin_library_doc = next(
16441776
(
16451777
library.library_doc
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
from dataclasses import dataclass
2+
from typing import TYPE_CHECKING, Dict, List, Optional, Set
3+
4+
from robotcode.core.lsp.types import Diagnostic, Location, Range
5+
6+
from .entities import LibraryEntry, TestCaseDefinition, VariableDefinition
7+
from .library_doc import KeywordDoc
8+
from .scope_tree import ScopeTree
9+
10+
if TYPE_CHECKING:
11+
from .semantic_analyzer.model import SemanticModel
12+
13+
14+
@dataclass(slots=True, frozen=True)
15+
class AnalyzerResult:
16+
diagnostics: List[Diagnostic]
17+
keyword_references: Dict[KeywordDoc, Set[Location]]
18+
variable_references: Dict[VariableDefinition, Set[Location]]
19+
local_variable_assignments: Dict[VariableDefinition, Set[Range]]
20+
namespace_references: Dict[LibraryEntry, Set[Location]]
21+
test_case_definitions: List[TestCaseDefinition]
22+
keyword_tag_references: Dict[str, Set[Location]]
23+
testcase_tag_references: Dict[str, Set[Location]]
24+
metadata_references: Dict[str, Set[Location]]
25+
scope_tree: ScopeTree
26+
semantic_model: Optional["SemanticModel"] = None

packages/robot/src/robotcode/robot/diagnostics/document_cache_helper.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
AnalysisDiagnosticModifiersConfig,
4949
AnalysisRobotConfig,
5050
CacheConfig,
51+
ExperimentalConfig,
5152
RobotConfig,
5253
WorkspaceAnalysisConfig,
5354
)
@@ -437,6 +438,7 @@ def __get_namespace_for_document_type(
437438
model = self.get_model(document)
438439

439440
languages, workspace_languages = self.build_languages_from_model(document, model)
441+
experimental_config = self.workspace.get_configuration(ExperimentalConfig, document.uri)
440442

441443
builder = NamespaceBuilder(
442444
imports_manager,
@@ -447,6 +449,7 @@ def __get_namespace_for_document_type(
447449
languages,
448450
workspace_languages,
449451
)
452+
builder.set_semantic_model_enabled(self.analysis_config.semantic_model or experimental_config.semantic_model)
450453

451454
result = builder.build()
452455

packages/robot/src/robotcode/robot/diagnostics/errors.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ class Error:
4242
OVERRIDES_IMPORTED_VARIABLE = "OverridesImportedVariable"
4343
VARIABLE_ALREADY_DEFINED = "VariableAlreadyDefined"
4444
VARIABLE_OVERRIDDEN = "VariableOverridden"
45+
VARIABLE_NAME_NOT_RESOLVABLE = "VariableNameNotResolvable"
46+
VARIABLE_NAME_NOT_STATICALLY_RESOLVABLE = "VariableNameNotStaticallyResolvable"
47+
VARIABLE_REFERENCE_NOT_STATICALLY_RESOLVABLE = "VariableReferenceNotStaticallyResolvable"
4548
MODEL_ERROR = "ModelError"
4649
TOKEN_ERROR = "TokenError"
4750
ASSIGN_MARK_ALLOWED_ONLY_ON_LAST_VAR = "AssignmentMarkAllowedOnlyOnLastVariable"

0 commit comments

Comments
 (0)