From c39677fb69a69108d88169b1272e32464a37b77c Mon Sep 17 00:00:00 2001 From: Alex Cooke Date: Mon, 30 Mar 2026 16:02:45 +0200 Subject: [PATCH 1/3] Add semantic tokens support (textDocument/semanticTokens/full and /range) Expose VHDL entity classification to the editor via LSP semantic tokens, enabling context-aware coloring for signals, variables, constants, types, functions, ports, generics, and other VHDL constructs. Uses standard LSP token types (variable, parameter, function, type, etc.) for out-of-the-box theme compatibility. Constants and generics are distinguished via the readonly modifier. Results are cached per file and invalidated on change. Closes VHDL-LS/rust_hdl#314 --- vhdl_lang/src/ast/search.rs | 47 +++++ vhdl_lang/src/project.rs | 8 + vhdl_ls/src/stdio_server.rs | 16 ++ vhdl_ls/src/vhdl_server.rs | 221 +++++++++++++++++++++ vhdl_ls/src/vhdl_server/lifecycle.rs | 12 ++ vhdl_ls/src/vhdl_server/semantic_tokens.rs | 203 +++++++++++++++++++ vhdl_ls/src/vhdl_server/text_document.rs | 3 + 7 files changed, 510 insertions(+) create mode 100644 vhdl_ls/src/vhdl_server/semantic_tokens.rs diff --git a/vhdl_lang/src/ast/search.rs b/vhdl_lang/src/ast/search.rs index 2213496a3..c9afb11eb 100644 --- a/vhdl_lang/src/ast/search.rs +++ b/vhdl_lang/src/ast/search.rs @@ -7,6 +7,7 @@ use super::*; use crate::analysis::DesignRoot; +use crate::data::Source; use crate::named_entity::{EntRef, HasEntityId, Reference}; use crate::syntax::{HasTokenSpan, TokenAccess}; @@ -2005,6 +2006,52 @@ impl Searcher for FindAllUnresolved { } } +/// Collects all (position, entity) pairs in a source file for semantic token support. +pub struct SemanticTokenCollector<'a> { + root: &'a DesignRoot, + source: Source, + pub tokens: Vec<(SrcPos, EntRef<'a>)>, +} + +impl<'a> SemanticTokenCollector<'a> { + pub fn new(root: &'a DesignRoot, source: &Source) -> Self { + SemanticTokenCollector { + root, + source: source.clone(), + tokens: Vec::new(), + } + } +} + +impl Searcher for SemanticTokenCollector<'_> { + fn search_pos_with_ref( + &mut self, + _ctx: &dyn TokenAccess, + pos: &SrcPos, + reference: &Reference, + ) -> SearchState { + if let Some(id) = reference.get() { + let ent = self.root.get_ent(id); + self.tokens.push((pos.clone(), ent)); + } + NotFinished + } + + fn search_decl(&mut self, _ctx: &dyn TokenAccess, decl: FoundDeclaration<'_>) -> SearchState { + if let Some(id) = decl.ent_id() { + let ent = self.root.get_ent(id); + if let Some(decl_pos) = ent.decl_pos() { + // decl_pos may point to a different file (e.g. deferred constants), + // filter to only include declarations in the current source file. + if decl_pos.source == self.source { + self.tokens.push((decl_pos.clone(), ent)); + } + } + } + NotFinished + } +} + pub fn clear_references(tree: &mut impl Search, ctx: &dyn TokenAccess) { struct ReferenceClearer; diff --git a/vhdl_lang/src/project.rs b/vhdl_lang/src/project.rs index 2fa24a6b1..a84cd7418 100644 --- a/vhdl_lang/src/project.rs +++ b/vhdl_lang/src/project.rs @@ -335,6 +335,14 @@ impl Project { self.root.find_all_references_in_source(source, ent) } + /// Collect all (position, entity) pairs in a source file for semantic token support. + pub fn semantic_tokens(&self, source: &Source) -> Vec<(SrcPos, EntRef<'_>)> { + use crate::ast::search::SemanticTokenCollector; + let mut collector = SemanticTokenCollector::new(&self.root, source); + let _ = self.root.search_source(source, &mut collector); + collector.tokens + } + /// Get source positions that are not resolved to a declaration /// This is used for development to test where the language server is blind pub fn find_all_unresolved(&self) -> (usize, Vec) { diff --git a/vhdl_ls/src/stdio_server.rs b/vhdl_ls/src/stdio_server.rs index d912127ff..8ed7ee5ca 100644 --- a/vhdl_ls/src/stdio_server.rs +++ b/vhdl_ls/src/stdio_server.rs @@ -231,6 +231,22 @@ impl ConnectionRpcChannel { } Err(request) => request, }; + let request = match extract::(request) { + Ok((id, params)) => { + let result = server.semantic_tokens_full(¶ms); + self.send_response(lsp_server::Response::new_ok(id, result)); + return; + } + Err(request) => request, + }; + let request = match extract::(request) { + Ok((id, params)) => { + let result = server.semantic_tokens_range(¶ms); + self.send_response(lsp_server::Response::new_ok(id, result)); + return; + } + Err(request) => request, + }; debug!("Unhandled request: {request:?}"); self.send_response(lsp_server::Response::new_err( diff --git a/vhdl_ls/src/vhdl_server.rs b/vhdl_ls/src/vhdl_server.rs index 71699cd62..f5e8ae1c7 100644 --- a/vhdl_ls/src/vhdl_server.rs +++ b/vhdl_ls/src/vhdl_server.rs @@ -8,6 +8,7 @@ mod completion; mod diagnostics; mod lifecycle; mod rename; +pub(crate) mod semantic_tokens; mod text_document; mod workspace; @@ -63,6 +64,7 @@ pub struct VHDLServer { use_external_config: bool, project: Project, diagnostic_cache: FnvHashMap>, + semantic_token_cache: FnvHashMap>, init_params: Option, config_file: Option, severity_map: SeverityMap, @@ -78,6 +80,7 @@ impl VHDLServer { use_external_config: true, project: Project::new(VHDLStandard::default()), diagnostic_cache: FnvHashMap::default(), + semantic_token_cache: FnvHashMap::default(), init_params: None, config_file: None, severity_map: SeverityMap::default(), @@ -94,6 +97,7 @@ impl VHDLServer { use_external_config, project: Project::new(VHDLStandard::default()), diagnostic_cache: Default::default(), + semantic_token_cache: Default::default(), init_params: None, config_file: None, severity_map: SeverityMap::default(), @@ -1005,4 +1009,221 @@ lib.files = [ }], }); } + + fn std_lib_config() -> String { + format!( + "[libraries]\nstd.files = ['{}/../vhdl_libraries/std/*.vhd']\nlib.files = ['*.vhd']\n", + std::env::var("CARGO_MANIFEST_DIR").unwrap() + ) + } + + /// Decode delta-encoded semantic tokens to (line, start, length, token_type, modifiers). + fn decode_semantic_tokens(tokens: &[SemanticToken]) -> Vec<(u32, u32, u32, u32, u32)> { + let mut result = Vec::new(); + let mut line = 0u32; + let mut start = 0u32; + for tok in tokens { + if tok.delta_line > 0 { + line += tok.delta_line; + start = tok.delta_start; + } else { + start += tok.delta_start; + } + result.push(( + line, + start, + tok.length, + tok.token_type, + tok.token_modifiers_bitset, + )); + } + result + } + + fn token_at( + decoded: &[(u32, u32, u32, u32, u32)], + line: u32, + character: u32, + ) -> Option<(u32, u32)> { + decoded + .iter() + .find(|(l, s, len, _, _)| *l == line && *s <= character && character < s + len) + .map(|(_, _, _, tt, m)| (*tt, *m)) + } + + fn get_semantic_tokens(server: &mut VHDLServer, uri: &Url) -> Vec<(u32, u32, u32, u32, u32)> { + let result = server + .semantic_tokens_full(&SemanticTokensParams { + text_document: TextDocumentIdentifier { uri: uri.clone() }, + work_done_progress_params: Default::default(), + partial_result_params: Default::default(), + }) + .expect("semantic tokens result"); + match result { + SemanticTokensResult::Tokens(t) => decode_semantic_tokens(&t.data), + _ => panic!("expected full tokens"), + } + } + + #[test] + fn semantic_tokens_constant_is_readonly() { + let (mock, mut server) = setup_server(); + let (_tempdir, root_uri) = temp_root_uri(); + let uri = write_file( + &root_uri, + "test.vhd", + "\ +package pkg is + constant c1 : integer := 5; +end package; +", + ); + let config_uri = write_config(&root_uri, std_lib_config()); + expect_loaded_config_messages(&mock, &config_uri); + initialize_server(&mut server, root_uri); + + let decoded = get_semantic_tokens(&mut server, &uri); + assert_eq!( + token_at(&decoded, 1, " constant ".len() as u32), + Some((0, 1)) + ); + } + + #[test] + fn semantic_tokens_signal_variable_constant_usage() { + let (mock, mut server) = setup_server(); + let (_tempdir, root_uri) = temp_root_uri(); + write_file( + &root_uri, + "pkg.vhd", + "\ +package pkg is + constant c1 : integer := 5; +end package; +", + ); + let ent_uri = write_file( + &root_uri, + "ent.vhd", + "\ +use work.pkg.all; +entity ent is + port (o_val : out integer); +end entity; + +architecture rtl of ent is + signal sig1 : integer; +begin + process + variable v1 : integer; + begin + v1 := c1; + sig1 <= v1; + end process; + o_val <= sig1; +end architecture; +", + ); + let config_uri = write_config(&root_uri, std_lib_config()); + expect_loaded_config_messages(&mock, &config_uri); + initialize_server(&mut server, root_uri); + + let decoded = get_semantic_tokens(&mut server, &ent_uri); + // signal and variable declarations: variable token, no modifiers + assert_eq!( + token_at(&decoded, 6, " signal ".len() as u32), + Some((0, 0)) + ); + assert_eq!( + token_at(&decoded, 9, " variable ".len() as u32), + Some((0, 0)) + ); + // constant usage: variable token + readonly modifier + assert_eq!( + token_at(&decoded, 11, " v1 := ".len() as u32), + Some((0, 1)) + ); + // signal and port usages + assert_eq!(token_at(&decoded, 12, " ".len() as u32), Some((0, 0))); + assert_eq!(token_at(&decoded, 14, " ".len() as u32), Some((0, 0))); + } + + #[test] + fn semantic_tokens_ports_and_generics() { + let (mock, mut server) = setup_server(); + let (_tempdir, root_uri) = temp_root_uri(); + let uri = write_file( + &root_uri, + "test.vhd", + "\ +entity ent is + generic (g_width : integer := 8); + port (i_data : in integer; o_data : out integer); +end entity; + +architecture rtl of ent is +begin + o_data <= i_data + g_width; +end architecture; +", + ); + let config_uri = write_config(&root_uri, std_lib_config()); + expect_loaded_config_messages(&mock, &config_uri); + initialize_server(&mut server, root_uri); + + let decoded = get_semantic_tokens(&mut server, &uri); + // generic: variable + readonly + assert_eq!( + token_at(&decoded, 1, " generic (".len() as u32), + Some((0, 1)) + ); + // port: variable, no modifiers + assert_eq!(token_at(&decoded, 2, " port (".len() as u32), Some((0, 0))); + } + + #[test] + fn semantic_tokens_types_and_functions() { + let (mock, mut server) = setup_server(); + let (_tempdir, root_uri) = temp_root_uri(); + let uri = write_file( + &root_uri, + "test.vhd", + "\ +package pkg is + type my_enum is (val_a, val_b); + type my_rec is record + field1 : integer; + end record; + function add_one(x : integer) return integer; +end package; + +package body pkg is + function add_one(x : integer) return integer is + begin + return x + 1; + end function; +end package body; +", + ); + let config_uri = write_config(&root_uri, std_lib_config()); + expect_loaded_config_messages(&mock, &config_uri); + initialize_server(&mut server, root_uri); + + let decoded = get_semantic_tokens(&mut server, &uri); + assert_eq!(token_at(&decoded, 1, " type ".len() as u32), Some((9, 0))); // enum + assert_eq!( + token_at(&decoded, 1, " type my_enum is (".len() as u32), + Some((3, 0)) + ); // enum_member + assert_eq!(token_at(&decoded, 2, " type ".len() as u32), Some((8, 0))); // struct + assert_eq!(token_at(&decoded, 3, " ".len() as u32), Some((2, 0))); // property + assert_eq!( + token_at(&decoded, 5, " function ".len() as u32), + Some((4, 0)) + ); // function + assert_eq!( + token_at(&decoded, 5, " function add_one(".len() as u32), + Some((1, 0)) + ); // parameter + } } diff --git a/vhdl_ls/src/vhdl_server/lifecycle.rs b/vhdl_ls/src/vhdl_server/lifecycle.rs index a48454ec3..2e0b2f66f 100644 --- a/vhdl_ls/src/vhdl_server/lifecycle.rs +++ b/vhdl_ls/src/vhdl_server/lifecycle.rs @@ -1,3 +1,4 @@ +use crate::vhdl_server::semantic_tokens::{TOKEN_MODIFIERS, TOKEN_TYPES}; use crate::vhdl_server::{NonProjectFileHandling, VHDLServer}; use lsp_types::*; use serde_json::Value; @@ -74,6 +75,17 @@ impl VHDLServer { workspace_symbol_provider: Some(OneOf::Left(true)), document_symbol_provider: Some(OneOf::Left(true)), document_highlight_provider: Some(OneOf::Left(true)), + semantic_tokens_provider: Some( + SemanticTokensServerCapabilities::SemanticTokensOptions(SemanticTokensOptions { + legend: SemanticTokensLegend { + token_types: TOKEN_TYPES.to_vec(), + token_modifiers: TOKEN_MODIFIERS.to_vec(), + }, + full: Some(SemanticTokensFullOptions::Bool(true)), + range: Some(true), + work_done_progress_options: Default::default(), + }), + ), completion_provider: Some(CompletionOptions { resolve_provider: Some(true), trigger_characters: Some(trigger_chars), diff --git a/vhdl_ls/src/vhdl_server/semantic_tokens.rs b/vhdl_ls/src/vhdl_server/semantic_tokens.rs new file mode 100644 index 000000000..b69b9d6ad --- /dev/null +++ b/vhdl_ls/src/vhdl_server/semantic_tokens.rs @@ -0,0 +1,203 @@ +use crate::vhdl_server::{from_lsp_range, uri_to_file_name, VHDLServer}; +use lsp_types::*; +use vhdl_lang::ast::ExternalObjectClass; +use vhdl_lang::{AnyEntKind, Concurrent, Object, Overloaded, Type}; + +// Semantic token type indices — order must match TOKEN_TYPES +const VARIABLE: u32 = 0; +const PARAMETER: u32 = 1; +const PROPERTY: u32 = 2; +const ENUM_MEMBER: u32 = 3; +const FUNCTION: u32 = 4; +const TYPE: u32 = 5; +const CLASS: u32 = 6; +const NAMESPACE: u32 = 7; +const STRUCT: u32 = 8; +const ENUM: u32 = 9; + +// Semantic token modifier bits +const MOD_READONLY: u32 = 1 << 0; + +pub const TOKEN_TYPES: &[SemanticTokenType] = &[ + SemanticTokenType::VARIABLE, // 0: signals, variables, constants, files + SemanticTokenType::PARAMETER, // 1: subprogram parameters + SemanticTokenType::PROPERTY, // 2: attributes, record fields + SemanticTokenType::ENUM_MEMBER, // 3: enum literals + SemanticTokenType::FUNCTION, // 4: functions, procedures + SemanticTokenType::TYPE, // 5: types (general) + SemanticTokenType::CLASS, // 6: protected types, components + SemanticTokenType::NAMESPACE, // 7: libraries, design units, labels + SemanticTokenType::STRUCT, // 8: record types + SemanticTokenType::ENUM, // 9: enum types +]; + +pub const TOKEN_MODIFIERS: &[SemanticTokenModifier] = &[ + SemanticTokenModifier::READONLY, // bit 0: constants, generics +]; + +fn object_token(obj: &Object) -> (u32, u32) { + if obj.is_param() { + return (PARAMETER, 0); + } + if obj.is_generic() || obj.is_constant() { + return (VARIABLE, MOD_READONLY); + } + (VARIABLE, 0) +} + +fn overloaded_token(o: &Overloaded) -> (u32, u32) { + match o { + Overloaded::EnumLiteral(_) => (ENUM_MEMBER, 0), + Overloaded::Alias(inner) => overloaded_token(inner.kind()), + _ => (FUNCTION, 0), + } +} + +fn type_token(t: &Type) -> (u32, u32) { + match t { + Type::Enum(_) => (ENUM, 0), + Type::Record(_) => (STRUCT, 0), + Type::Protected(..) => (CLASS, 0), + Type::Subtype(sub) => type_token(sub.type_mark().kind()), + Type::Alias(t) => type_token(t.kind()), + _ => (TYPE, 0), + } +} + +fn to_semantic_token(kind: &AnyEntKind) -> Option<(u32, u32)> { + let result = match kind { + AnyEntKind::Object(obj) => object_token(obj), + AnyEntKind::DeferredConstant(_) + | AnyEntKind::LoopParameter(_) + | AnyEntKind::PhysicalLiteral(_) => (VARIABLE, MOD_READONLY), + AnyEntKind::Overloaded(o) => overloaded_token(o), + AnyEntKind::Type(t) => type_token(t), + AnyEntKind::Component(_) => (CLASS, 0), + AnyEntKind::Attribute(_) | AnyEntKind::ElementDeclaration(_) => (PROPERTY, 0), + AnyEntKind::Library | AnyEntKind::Design(_) => (NAMESPACE, 0), + AnyEntKind::View(_) => (TYPE, 0), + AnyEntKind::File(_) | AnyEntKind::InterfaceFile(_) => (VARIABLE, 0), + AnyEntKind::ObjectAlias { base_object, .. } => object_token(base_object.object()), + AnyEntKind::ExternalAlias { class, .. } => match class { + ExternalObjectClass::Constant => (VARIABLE, MOD_READONLY), + _ => (VARIABLE, 0), + }, + AnyEntKind::Concurrent(Some(Concurrent::Instance), _) => (CLASS, 0), + AnyEntKind::Concurrent(..) | AnyEntKind::Sequential(..) => return None, + }; + Some(result) +} + +/// Check if a token overlaps the filter range by line. +/// Character-level precision is not needed as clients request full-line ranges. +fn in_range(token_range: &vhdl_lang::Range, filter: &vhdl_lang::Range) -> bool { + token_range.start.line <= filter.end.line && token_range.end.line >= filter.start.line +} + +/// Map and sort raw tokens from the AST walk into cacheable form. +fn map_and_sort( + raw_tokens: Vec<(vhdl_lang::SrcPos, vhdl_lang::EntRef<'_>)>, +) -> Vec<(vhdl_lang::Range, u32, u32)> { + let mut tokens: Vec<_> = raw_tokens + .into_iter() + .filter_map(|(pos, ent)| { + let (token_type, token_modifiers) = to_semantic_token(ent.kind())?; + let range = pos.range(); + Some((range, token_type, token_modifiers)) + }) + .collect(); + + tokens.sort_by(|a, b| { + a.0.start + .line + .cmp(&b.0.start.line) + .then(a.0.start.character.cmp(&b.0.start.character)) + }); + + tokens +} + +/// Delta-encode sorted tokens, optionally filtering to a range. +fn encode( + tokens: &[(vhdl_lang::Range, u32, u32)], + range_filter: Option<&vhdl_lang::Range>, +) -> Vec { + let mut semantic_tokens = Vec::with_capacity(tokens.len()); + let mut prev_line = 0u32; + let mut prev_start = 0u32; + + for (range, token_type, token_modifiers) in tokens { + if let Some(filter) = range_filter { + if !in_range(range, filter) { + continue; + } + } + + let line = range.start.line; + let start = range.start.character; + if range.start.line != range.end.line { + continue; // Skip multi-line tokens; identifiers never span lines + } + let length = range.end.character - range.start.character; + + let delta_line = line - prev_line; + let delta_start = if delta_line == 0 { + start - prev_start + } else { + start + }; + + semantic_tokens.push(SemanticToken { + delta_line, + delta_start, + length, + token_type: *token_type, + token_modifiers_bitset: *token_modifiers, + }); + + prev_line = line; + prev_start = start; + } + + semantic_tokens +} + +impl VHDLServer { + /// Get or compute the cached semantic tokens for a file. + fn cached_semantic_tokens(&mut self, uri: &Url) -> Option<&[(vhdl_lang::Range, u32, u32)]> { + if !self.semantic_token_cache.contains_key(uri) { + let source = self.project.get_source(&uri_to_file_name(uri))?; + let raw_tokens = self.project.semantic_tokens(&source); + let tokens = map_and_sort(raw_tokens); + self.semantic_token_cache.insert(uri.clone(), tokens); + } + self.semantic_token_cache.get(uri).map(|v| v.as_slice()) + } + + pub fn semantic_tokens_full( + &mut self, + params: &SemanticTokensParams, + ) -> Option { + let tokens = self.cached_semantic_tokens(¶ms.text_document.uri)?; + let data = encode(tokens, None); + + Some(SemanticTokensResult::Tokens(SemanticTokens { + result_id: None, + data, + })) + } + + pub fn semantic_tokens_range( + &mut self, + params: &SemanticTokensRangeParams, + ) -> Option { + let filter = from_lsp_range(params.range); + let tokens = self.cached_semantic_tokens(¶ms.text_document.uri)?; + let data = encode(tokens, Some(&filter)); + + Some(SemanticTokensRangeResult::Tokens(SemanticTokens { + result_id: None, + data, + })) + } +} diff --git a/vhdl_ls/src/vhdl_server/text_document.rs b/vhdl_ls/src/vhdl_server/text_document.rs index 1e841093c..7ae615cce 100644 --- a/vhdl_ls/src/vhdl_server/text_document.rs +++ b/vhdl_ls/src/vhdl_server/text_document.rs @@ -16,6 +16,7 @@ impl VHDLServer { if let Some(source) = self.project.get_source(&file_name) { source.change(None, text); self.project.update_source(&source); + self.semantic_token_cache.clear(); self.publish_diagnostics(); } else { match self.settings.non_project_file_handling { @@ -27,6 +28,7 @@ impl VHDLServer { ))); self.project .update_source(&Source::inline(&file_name, text)); + self.semantic_token_cache.clear(); self.publish_diagnostics(); } } @@ -41,6 +43,7 @@ impl VHDLServer { source.change(range.as_ref(), &content_change.text); } self.project.update_source(&source); + self.semantic_token_cache.clear(); self.publish_diagnostics(); } else if self.settings.non_project_file_handling != NonProjectFileHandling::Ignore { self.message(Message::error(format!( From bb77ba1ab9796cc9e3d5e14e51c8e7f0d2c43fc2 Mon Sep 17 00:00:00 2001 From: Alex Cooke Date: Tue, 7 Apr 2026 13:09:21 +0200 Subject: [PATCH 2/3] Address review feedback: introduce structs, macro, and rename API - Replace bare (u32, u32) tuples with TokenClassification and CachedToken structs - Replace DecodedToken test tuple with named struct - Add define_token_types! macro to keep index constants and legend in sync - Use SrcPos::cmp for sorting instead of manual line/character comparison - Move in_range to Range::overlaps_lines in vhdl_lang - Rename Project::semantic_tokens to find_all_entity_references - Add source file filter in search_decl to guard against cross-file decl_pos - Match ExternalObjectClass directly instead of converting to ObjectClass - Skip multi-line tokens in encode instead of computing wrong length --- vhdl_lang/src/data/source.rs | 5 + vhdl_lang/src/project.rs | 4 +- vhdl_ls/src/vhdl_server.rs | 35 ++-- vhdl_ls/src/vhdl_server/semantic_tokens.rs | 226 +++++++++++++-------- vhdl_ls/src/vhdl_server/text_document.rs | 2 + 5 files changed, 172 insertions(+), 100 deletions(-) diff --git a/vhdl_lang/src/data/source.rs b/vhdl_lang/src/data/source.rs index 4ef8c7b08..b1058b6a0 100644 --- a/vhdl_lang/src/data/source.rs +++ b/vhdl_lang/src/data/source.rs @@ -244,6 +244,11 @@ impl Range { pub fn contains(&self, position: Position) -> bool { self.start <= position && self.end >= position } + + /// Check if two ranges overlap by line (ignoring character positions). + pub fn overlaps_lines(&self, other: &Range) -> bool { + self.start.line <= other.end.line && self.end.line >= other.start.line + } } /// A lexical range within a specific source file. diff --git a/vhdl_lang/src/project.rs b/vhdl_lang/src/project.rs index a84cd7418..fcac00db9 100644 --- a/vhdl_lang/src/project.rs +++ b/vhdl_lang/src/project.rs @@ -335,8 +335,8 @@ impl Project { self.root.find_all_references_in_source(source, ent) } - /// Collect all (position, entity) pairs in a source file for semantic token support. - pub fn semantic_tokens(&self, source: &Source) -> Vec<(SrcPos, EntRef<'_>)> { + /// Collect all (position, entity) pairs in a source file. + pub fn find_all_entity_references(&self, source: &Source) -> Vec<(SrcPos, EntRef<'_>)> { use crate::ast::search::SemanticTokenCollector; let mut collector = SemanticTokenCollector::new(&self.root, source); let _ = self.root.search_source(source, &mut collector); diff --git a/vhdl_ls/src/vhdl_server.rs b/vhdl_ls/src/vhdl_server.rs index f5e8ae1c7..7ed7a6c33 100644 --- a/vhdl_ls/src/vhdl_server.rs +++ b/vhdl_ls/src/vhdl_server.rs @@ -64,7 +64,7 @@ pub struct VHDLServer { use_external_config: bool, project: Project, diagnostic_cache: FnvHashMap>, - semantic_token_cache: FnvHashMap>, + semantic_token_cache: FnvHashMap>, init_params: Option, config_file: Option, severity_map: SeverityMap, @@ -1017,8 +1017,15 @@ lib.files = [ ) } - /// Decode delta-encoded semantic tokens to (line, start, length, token_type, modifiers). - fn decode_semantic_tokens(tokens: &[SemanticToken]) -> Vec<(u32, u32, u32, u32, u32)> { + struct DecodedToken { + line: u32, + start: u32, + length: u32, + token_type: u32, + modifiers: u32, + } + + fn decode_semantic_tokens(tokens: &[SemanticToken]) -> Vec { let mut result = Vec::new(); let mut line = 0u32; let mut start = 0u32; @@ -1029,29 +1036,25 @@ lib.files = [ } else { start += tok.delta_start; } - result.push(( + result.push(DecodedToken { line, start, - tok.length, - tok.token_type, - tok.token_modifiers_bitset, - )); + length: tok.length, + token_type: tok.token_type, + modifiers: tok.token_modifiers_bitset, + }); } result } - fn token_at( - decoded: &[(u32, u32, u32, u32, u32)], - line: u32, - character: u32, - ) -> Option<(u32, u32)> { + fn token_at(decoded: &[DecodedToken], line: u32, character: u32) -> Option<(u32, u32)> { decoded .iter() - .find(|(l, s, len, _, _)| *l == line && *s <= character && character < s + len) - .map(|(_, _, _, tt, m)| (*tt, *m)) + .find(|t| t.line == line && t.start <= character && character < t.start + t.length) + .map(|t| (t.token_type, t.modifiers)) } - fn get_semantic_tokens(server: &mut VHDLServer, uri: &Url) -> Vec<(u32, u32, u32, u32, u32)> { + fn get_semantic_tokens(server: &mut VHDLServer, uri: &Url) -> Vec { let result = server .semantic_tokens_full(&SemanticTokensParams { text_document: TextDocumentIdentifier { uri: uri.clone() }, diff --git a/vhdl_ls/src/vhdl_server/semantic_tokens.rs b/vhdl_ls/src/vhdl_server/semantic_tokens.rs index b69b9d6ad..82123ccb6 100644 --- a/vhdl_ls/src/vhdl_server/semantic_tokens.rs +++ b/vhdl_ls/src/vhdl_server/semantic_tokens.rs @@ -3,142 +3,204 @@ use lsp_types::*; use vhdl_lang::ast::ExternalObjectClass; use vhdl_lang::{AnyEntKind, Concurrent, Object, Overloaded, Type}; -// Semantic token type indices — order must match TOKEN_TYPES -const VARIABLE: u32 = 0; -const PARAMETER: u32 = 1; -const PROPERTY: u32 = 2; -const ENUM_MEMBER: u32 = 3; -const FUNCTION: u32 = 4; -const TYPE: u32 = 5; -const CLASS: u32 = 6; -const NAMESPACE: u32 = 7; -const STRUCT: u32 = 8; -const ENUM: u32 = 9; +/// Generates token type index constants and the TOKEN_TYPES legend array +/// from a single declaration, keeping the two in sync automatically. +macro_rules! define_token_types { + ( $( ($const:ident = $lsp_type:expr) ),+ $(,)? ) => { + define_token_types!(@consts 0, $( $const, )+); + + pub const TOKEN_TYPES: &[SemanticTokenType] = &[ + $( $lsp_type, )+ + ]; + }; + + // Base case + (@consts $idx:expr, ) => {}; + // Recursive case: assign current index, increment for the rest + (@consts $idx:expr, $const:ident, $( $rest:ident, )*) => { + const $const: u32 = $idx; + define_token_types!(@consts ($idx + 1), $( $rest, )*); + }; +} + +define_token_types! { + (VARIABLE = SemanticTokenType::VARIABLE), // signals, variables, constants, files + (PARAMETER = SemanticTokenType::PARAMETER), // subprogram parameters + (PROPERTY = SemanticTokenType::PROPERTY), // attributes, record fields + (ENUM_MEMBER = SemanticTokenType::ENUM_MEMBER), // enum literals + (FUNCTION = SemanticTokenType::FUNCTION), // functions, procedures + (TYPE = SemanticTokenType::TYPE), // types (general) + (CLASS = SemanticTokenType::CLASS), // protected types, components + (NAMESPACE = SemanticTokenType::NAMESPACE), // libraries, design units, labels + (STRUCT = SemanticTokenType::STRUCT), // record types + (ENUM = SemanticTokenType::ENUM), // enum types +} // Semantic token modifier bits const MOD_READONLY: u32 = 1 << 0; -pub const TOKEN_TYPES: &[SemanticTokenType] = &[ - SemanticTokenType::VARIABLE, // 0: signals, variables, constants, files - SemanticTokenType::PARAMETER, // 1: subprogram parameters - SemanticTokenType::PROPERTY, // 2: attributes, record fields - SemanticTokenType::ENUM_MEMBER, // 3: enum literals - SemanticTokenType::FUNCTION, // 4: functions, procedures - SemanticTokenType::TYPE, // 5: types (general) - SemanticTokenType::CLASS, // 6: protected types, components - SemanticTokenType::NAMESPACE, // 7: libraries, design units, labels - SemanticTokenType::STRUCT, // 8: record types - SemanticTokenType::ENUM, // 9: enum types -]; - pub const TOKEN_MODIFIERS: &[SemanticTokenModifier] = &[ SemanticTokenModifier::READONLY, // bit 0: constants, generics ]; -fn object_token(obj: &Object) -> (u32, u32) { +/// Classification of a VHDL entity into an LSP semantic token. +struct TokenClassification { + token_type: u32, + modifiers: u32, +} + +/// A resolved semantic token ready for caching and encoding. +pub(crate) struct CachedToken { + pub range: vhdl_lang::Range, + pub token_type: u32, + pub modifiers: u32, +} + +fn object_token(obj: &Object) -> TokenClassification { if obj.is_param() { - return (PARAMETER, 0); + return TokenClassification { + token_type: PARAMETER, + modifiers: 0, + }; } if obj.is_generic() || obj.is_constant() { - return (VARIABLE, MOD_READONLY); + return TokenClassification { + token_type: VARIABLE, + modifiers: MOD_READONLY, + }; + } + TokenClassification { + token_type: VARIABLE, + modifiers: 0, } - (VARIABLE, 0) } -fn overloaded_token(o: &Overloaded) -> (u32, u32) { +fn overloaded_token(o: &Overloaded) -> TokenClassification { match o { - Overloaded::EnumLiteral(_) => (ENUM_MEMBER, 0), + Overloaded::EnumLiteral(_) => TokenClassification { + token_type: ENUM_MEMBER, + modifiers: 0, + }, Overloaded::Alias(inner) => overloaded_token(inner.kind()), - _ => (FUNCTION, 0), + _ => TokenClassification { + token_type: FUNCTION, + modifiers: 0, + }, } } -fn type_token(t: &Type) -> (u32, u32) { +fn type_token(t: &Type) -> TokenClassification { match t { - Type::Enum(_) => (ENUM, 0), - Type::Record(_) => (STRUCT, 0), - Type::Protected(..) => (CLASS, 0), + Type::Enum(_) => TokenClassification { + token_type: ENUM, + modifiers: 0, + }, + Type::Record(_) => TokenClassification { + token_type: STRUCT, + modifiers: 0, + }, + Type::Protected(..) => TokenClassification { + token_type: CLASS, + modifiers: 0, + }, Type::Subtype(sub) => type_token(sub.type_mark().kind()), Type::Alias(t) => type_token(t.kind()), - _ => (TYPE, 0), + _ => TokenClassification { + token_type: TYPE, + modifiers: 0, + }, } } -fn to_semantic_token(kind: &AnyEntKind) -> Option<(u32, u32)> { +fn classify(kind: &AnyEntKind) -> Option { let result = match kind { AnyEntKind::Object(obj) => object_token(obj), AnyEntKind::DeferredConstant(_) | AnyEntKind::LoopParameter(_) - | AnyEntKind::PhysicalLiteral(_) => (VARIABLE, MOD_READONLY), + | AnyEntKind::PhysicalLiteral(_) => TokenClassification { + token_type: VARIABLE, + modifiers: MOD_READONLY, + }, AnyEntKind::Overloaded(o) => overloaded_token(o), AnyEntKind::Type(t) => type_token(t), - AnyEntKind::Component(_) => (CLASS, 0), - AnyEntKind::Attribute(_) | AnyEntKind::ElementDeclaration(_) => (PROPERTY, 0), - AnyEntKind::Library | AnyEntKind::Design(_) => (NAMESPACE, 0), - AnyEntKind::View(_) => (TYPE, 0), - AnyEntKind::File(_) | AnyEntKind::InterfaceFile(_) => (VARIABLE, 0), + AnyEntKind::Component(_) => TokenClassification { + token_type: CLASS, + modifiers: 0, + }, + AnyEntKind::Attribute(_) | AnyEntKind::ElementDeclaration(_) => TokenClassification { + token_type: PROPERTY, + modifiers: 0, + }, + AnyEntKind::Library | AnyEntKind::Design(_) => TokenClassification { + token_type: NAMESPACE, + modifiers: 0, + }, + AnyEntKind::View(_) => TokenClassification { + token_type: TYPE, + modifiers: 0, + }, + AnyEntKind::File(_) | AnyEntKind::InterfaceFile(_) => TokenClassification { + token_type: VARIABLE, + modifiers: 0, + }, AnyEntKind::ObjectAlias { base_object, .. } => object_token(base_object.object()), AnyEntKind::ExternalAlias { class, .. } => match class { - ExternalObjectClass::Constant => (VARIABLE, MOD_READONLY), - _ => (VARIABLE, 0), + ExternalObjectClass::Constant => TokenClassification { + token_type: VARIABLE, + modifiers: MOD_READONLY, + }, + _ => TokenClassification { + token_type: VARIABLE, + modifiers: 0, + }, + }, + AnyEntKind::Concurrent(Some(Concurrent::Instance), _) => TokenClassification { + token_type: CLASS, + modifiers: 0, }, - AnyEntKind::Concurrent(Some(Concurrent::Instance), _) => (CLASS, 0), AnyEntKind::Concurrent(..) | AnyEntKind::Sequential(..) => return None, }; Some(result) } -/// Check if a token overlaps the filter range by line. -/// Character-level precision is not needed as clients request full-line ranges. -fn in_range(token_range: &vhdl_lang::Range, filter: &vhdl_lang::Range) -> bool { - token_range.start.line <= filter.end.line && token_range.end.line >= filter.start.line -} - /// Map and sort raw tokens from the AST walk into cacheable form. fn map_and_sort( - raw_tokens: Vec<(vhdl_lang::SrcPos, vhdl_lang::EntRef<'_>)>, -) -> Vec<(vhdl_lang::Range, u32, u32)> { - let mut tokens: Vec<_> = raw_tokens + mut raw_tokens: Vec<(vhdl_lang::SrcPos, vhdl_lang::EntRef<'_>)>, +) -> Vec { + raw_tokens.sort_by(|(pos_a, _), (pos_b, _)| pos_a.cmp(pos_b)); + + raw_tokens .into_iter() .filter_map(|(pos, ent)| { - let (token_type, token_modifiers) = to_semantic_token(ent.kind())?; - let range = pos.range(); - Some((range, token_type, token_modifiers)) + let cls = classify(ent.kind())?; + Some(CachedToken { + range: pos.range(), + token_type: cls.token_type, + modifiers: cls.modifiers, + }) }) - .collect(); - - tokens.sort_by(|a, b| { - a.0.start - .line - .cmp(&b.0.start.line) - .then(a.0.start.character.cmp(&b.0.start.character)) - }); - - tokens + .collect() } /// Delta-encode sorted tokens, optionally filtering to a range. -fn encode( - tokens: &[(vhdl_lang::Range, u32, u32)], - range_filter: Option<&vhdl_lang::Range>, -) -> Vec { +fn encode(tokens: &[CachedToken], range_filter: Option<&vhdl_lang::Range>) -> Vec { let mut semantic_tokens = Vec::with_capacity(tokens.len()); let mut prev_line = 0u32; let mut prev_start = 0u32; - for (range, token_type, token_modifiers) in tokens { + for token in tokens { if let Some(filter) = range_filter { - if !in_range(range, filter) { + if !token.range.overlaps_lines(filter) { continue; } } - let line = range.start.line; - let start = range.start.character; - if range.start.line != range.end.line { + let line = token.range.start.line; + let start = token.range.start.character; + if token.range.start.line != token.range.end.line { continue; // Skip multi-line tokens; identifiers never span lines } - let length = range.end.character - range.start.character; + let length = token.range.end.character - token.range.start.character; let delta_line = line - prev_line; let delta_start = if delta_line == 0 { @@ -151,8 +213,8 @@ fn encode( delta_line, delta_start, length, - token_type: *token_type, - token_modifiers_bitset: *token_modifiers, + token_type: token.token_type, + token_modifiers_bitset: token.modifiers, }); prev_line = line; @@ -164,10 +226,10 @@ fn encode( impl VHDLServer { /// Get or compute the cached semantic tokens for a file. - fn cached_semantic_tokens(&mut self, uri: &Url) -> Option<&[(vhdl_lang::Range, u32, u32)]> { + fn cached_semantic_tokens(&mut self, uri: &Url) -> Option<&[CachedToken]> { if !self.semantic_token_cache.contains_key(uri) { let source = self.project.get_source(&uri_to_file_name(uri))?; - let raw_tokens = self.project.semantic_tokens(&source); + let raw_tokens = self.project.find_all_entity_references(&source); let tokens = map_and_sort(raw_tokens); self.semantic_token_cache.insert(uri.clone(), tokens); } diff --git a/vhdl_ls/src/vhdl_server/text_document.rs b/vhdl_ls/src/vhdl_server/text_document.rs index 7ae615cce..84556f13c 100644 --- a/vhdl_ls/src/vhdl_server/text_document.rs +++ b/vhdl_ls/src/vhdl_server/text_document.rs @@ -16,6 +16,8 @@ impl VHDLServer { if let Some(source) = self.project.get_source(&file_name) { source.change(None, text); self.project.update_source(&source); + // Clear all cached semantic tokens: cross-file references mean a + // change in one file can affect resolved entities in other files. self.semantic_token_cache.clear(); self.publish_diagnostics(); } else { From 148a3699ad98e6de6ba4ed22feba7743d10cd04c Mon Sep 17 00:00:00 2001 From: Lukas Scheller Date: Fri, 24 Apr 2026 01:03:25 +0200 Subject: [PATCH 3/3] Minor cleanups --- vhdl_ls/src/vhdl_server.rs | 31 ++++++++++++++-------- vhdl_ls/src/vhdl_server/semantic_tokens.rs | 9 ++++--- 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/vhdl_ls/src/vhdl_server.rs b/vhdl_ls/src/vhdl_server.rs index 7ed7a6c33..2928c7e8f 100644 --- a/vhdl_ls/src/vhdl_server.rs +++ b/vhdl_ls/src/vhdl_server.rs @@ -511,7 +511,10 @@ mod tests { use std::rc::Rc; use super::*; - use crate::rpc_channel::test_support::*; + use crate::{ + rpc_channel::test_support::*, + vhdl_server::semantic_tokens::{ENUM_MEMBER, FUNCTION, MOD_READONLY, PARAMETER, VARIABLE}, + }; pub(crate) fn initialize_server(server: &mut VHDLServer, root_uri: Url) { let capabilities = ClientCapabilities::default(); @@ -1088,7 +1091,7 @@ end package; let decoded = get_semantic_tokens(&mut server, &uri); assert_eq!( token_at(&decoded, 1, " constant ".len() as u32), - Some((0, 1)) + Some((VARIABLE, MOD_READONLY)) ); } @@ -1135,20 +1138,26 @@ end architecture; // signal and variable declarations: variable token, no modifiers assert_eq!( token_at(&decoded, 6, " signal ".len() as u32), - Some((0, 0)) + Some((VARIABLE, 0)) ); assert_eq!( token_at(&decoded, 9, " variable ".len() as u32), - Some((0, 0)) + Some((VARIABLE, 0)) ); // constant usage: variable token + readonly modifier assert_eq!( token_at(&decoded, 11, " v1 := ".len() as u32), - Some((0, 1)) + Some((VARIABLE, MOD_READONLY)) ); // signal and port usages - assert_eq!(token_at(&decoded, 12, " ".len() as u32), Some((0, 0))); - assert_eq!(token_at(&decoded, 14, " ".len() as u32), Some((0, 0))); + assert_eq!( + token_at(&decoded, 12, " ".len() as u32), + Some((VARIABLE, 0)) + ); + assert_eq!( + token_at(&decoded, 14, " ".len() as u32), + Some((VARIABLE, 0)) + ); } #[test] @@ -1178,7 +1187,7 @@ end architecture; // generic: variable + readonly assert_eq!( token_at(&decoded, 1, " generic (".len() as u32), - Some((0, 1)) + Some((VARIABLE, MOD_READONLY)) ); // port: variable, no modifiers assert_eq!(token_at(&decoded, 2, " port (".len() as u32), Some((0, 0))); @@ -1216,17 +1225,17 @@ end package body; assert_eq!(token_at(&decoded, 1, " type ".len() as u32), Some((9, 0))); // enum assert_eq!( token_at(&decoded, 1, " type my_enum is (".len() as u32), - Some((3, 0)) + Some((ENUM_MEMBER, 0)) ); // enum_member assert_eq!(token_at(&decoded, 2, " type ".len() as u32), Some((8, 0))); // struct assert_eq!(token_at(&decoded, 3, " ".len() as u32), Some((2, 0))); // property assert_eq!( token_at(&decoded, 5, " function ".len() as u32), - Some((4, 0)) + Some((FUNCTION, 0)) ); // function assert_eq!( token_at(&decoded, 5, " function add_one(".len() as u32), - Some((1, 0)) + Some((PARAMETER, 0)) ); // parameter } } diff --git a/vhdl_ls/src/vhdl_server/semantic_tokens.rs b/vhdl_ls/src/vhdl_server/semantic_tokens.rs index 82123ccb6..295a711e3 100644 --- a/vhdl_ls/src/vhdl_server/semantic_tokens.rs +++ b/vhdl_ls/src/vhdl_server/semantic_tokens.rs @@ -18,7 +18,7 @@ macro_rules! define_token_types { (@consts $idx:expr, ) => {}; // Recursive case: assign current index, increment for the rest (@consts $idx:expr, $const:ident, $( $rest:ident, )*) => { - const $const: u32 = $idx; + pub(crate) const $const: u32 = $idx; define_token_types!(@consts ($idx + 1), $( $rest, )*); }; } @@ -37,7 +37,7 @@ define_token_types! { } // Semantic token modifier bits -const MOD_READONLY: u32 = 1 << 0; +pub(crate) const MOD_READONLY: u32 = 1 << 0; pub const TOKEN_MODIFIERS: &[SemanticTokenModifier] = &[ SemanticTokenModifier::READONLY, // bit 0: constants, generics @@ -158,7 +158,10 @@ fn classify(kind: &AnyEntKind) -> Option { token_type: CLASS, modifiers: 0, }, - AnyEntKind::Concurrent(..) | AnyEntKind::Sequential(..) => return None, + AnyEntKind::Concurrent(..) | AnyEntKind::Sequential(..) => TokenClassification { + token_type: NAMESPACE, + modifiers: 0, + }, }; Some(result) }