diff --git a/crates/ark/src/lsp.rs b/crates/ark/src/lsp.rs index 05acf233a..a5885c17e 100644 --- a/crates/ark/src/lsp.rs +++ b/crates/ark/src/lsp.rs @@ -5,12 +5,14 @@ // // +pub(crate) mod ark_file; pub mod backend; pub mod capabilities; pub mod code_action; pub mod comm; pub mod completions; mod config; +pub(crate) mod db; mod declarations; pub mod diagnostics; pub mod diagnostics_syntax; diff --git a/crates/ark/src/lsp/ark_file.rs b/crates/ark/src/lsp/ark_file.rs new file mode 100644 index 000000000..8c4245ecd --- /dev/null +++ b/crates/ark/src/lsp/ark_file.rs @@ -0,0 +1,144 @@ +// +// ark_file.rs +// +// Copyright (C) 2026 Posit Software, PBC. All rights reserved. +// +// + +use aether_lsp_utils::proto::from_proto; +use aether_lsp_utils::proto::to_proto; +use aether_lsp_utils::proto::PositionEncoding; +use oak_db::File; +use tower_lsp::lsp_types; +use url::Url; + +use crate::lsp::config::DocumentConfig; +use crate::lsp::db::ArkDb; +use crate::lsp::db::FileArkExt; + +/// Editor-managed buffer state, paired with its `oak_db::File`. +/// +/// `ArkFile` and `OakDatabase` are sibling fields on `WorldState`, so an +/// `ArkFile` cannot hold a reference to the database. That's why the methods +/// below take `db` as an argument instead of storing a reference, which is the +/// Salsa convention anyway. +#[derive(Debug)] +pub(crate) struct ArkFile { + pub(crate) file: File, + pub(crate) version: Option, + pub(crate) config: DocumentConfig, + pub(crate) url: Url, + pub(crate) encoding: PositionEncoding, +} + +impl ArkFile { + pub(crate) fn tree_sitter<'db>(&self, db: &'db dyn ArkDb) -> &'db tree_sitter::Tree { + self.file.tree_sitter(db) + } + + pub(crate) fn line_index<'db>(&self, db: &'db dyn ArkDb) -> &'db biome_line_index::LineIndex { + self.file.line_index(db) + } + + pub(crate) fn contents<'db>(&self, db: &'db dyn ArkDb) -> &'db str { + self.file.contents(db).as_str() + } + + pub(crate) fn get_line<'db>(&self, db: &'db dyn ArkDb, line: usize) -> Option<&'db str> { + let line_index = self.line_index(db); + let contents = self.contents(db); + + let Some(line_start) = line_index.newlines.get(line) else { + // Forcing a full capture so we can learn the situations in which this occurs + log::error!( + "Requesting line {line} but only {n} lines exist.\n\nContents:\n{contents}\n\nBacktrace:\n{trace}", + n = line_index.len(), + line = line + 1, + trace = std::backtrace::Backtrace::force_capture(), + ); + return None; + }; + + let line_end = line_index + .newlines + .get(line + 1) + .copied() + // if `line` is last, extract text until end of buffer + .unwrap_or_else(|| (contents.len() as u32).into()); + + let line_start_byte: usize = line_start.to_owned().into(); + let line_end_byte: usize = line_end.into(); + + contents.get(line_start_byte..line_end_byte) + } + + pub(crate) fn tree_sitter_point_from_lsp_position( + &self, + db: &dyn ArkDb, + position: lsp_types::Position, + ) -> anyhow::Result { + let line_col = + from_proto::line_col_from_position(position, self.line_index(db), self.encoding); + Ok(tree_sitter::Point::new( + line_col.line as usize, + line_col.col as usize, + )) + } + + pub(crate) fn lsp_position_from_tree_sitter_point( + &self, + db: &dyn ArkDb, + point: tree_sitter::Point, + ) -> anyhow::Result { + let line_col = biome_line_index::LineCol { + line: point.row as u32, + col: point.column as u32, + }; + to_proto::position_from_line_col(line_col, self.line_index(db), self.encoding) + } + + pub(crate) fn lsp_range_from_tree_sitter_range( + &self, + db: &dyn ArkDb, + range: tree_sitter::Range, + ) -> anyhow::Result { + let start = self.lsp_position_from_tree_sitter_point(db, range.start_point)?; + let end = self.lsp_position_from_tree_sitter_point(db, range.end_point)?; + Ok(lsp_types::Range::new(start, end)) + } + + pub(crate) fn tree_sitter_range_from_lsp_range( + &self, + db: &dyn ArkDb, + range: lsp_types::Range, + ) -> anyhow::Result { + let start_point = self.tree_sitter_point_from_lsp_position(db, range.start)?; + let end_point = self.tree_sitter_point_from_lsp_position(db, range.end)?; + + let text_range = from_proto::text_range(range, self.line_index(db), self.encoding)?; + + Ok(tree_sitter::Range { + start_byte: text_range.start().into(), + end_byte: text_range.end().into(), + start_point, + end_point, + }) + } +} + +#[cfg(test)] +pub(crate) fn test_ark_file(code: &str) -> (oak_db::OakDatabase, ArkFile) { + use aether_path::FilePath; + + let db = oak_db::OakDatabase::new(); + let url = Url::parse("file:///test.R").unwrap(); + let key = FilePath::from_url(&url); + let file = ArkFile { + file: File::new(&db, key, code.to_string(), None), + version: None, + config: DocumentConfig::default(), + url, + encoding: PositionEncoding::Wide(biome_line_index::WideEncoding::Utf16), + }; + (db, file) +} diff --git a/crates/ark/src/lsp/code_action.rs b/crates/ark/src/lsp/code_action.rs index e91ea9665..f20f0b0a5 100644 --- a/crates/ark/src/lsp/code_action.rs +++ b/crates/ark/src/lsp/code_action.rs @@ -13,9 +13,10 @@ use tower_lsp::lsp_types; use tree_sitter::Range; use url::Url; +use crate::lsp::ark_file::ArkFile; use crate::lsp::capabilities::Capabilities; use crate::lsp::code_action::roxygen::roxygen_documentation; -use crate::lsp::document::Document; +use crate::lsp::db::ArkDb; mod roxygen; @@ -25,14 +26,14 @@ pub(crate) struct CodeActions { } pub(crate) fn code_actions( - uri: &Url, - document: &Document, + db: &dyn ArkDb, + file: &ArkFile, range: Range, capabilities: &Capabilities, ) -> lsp_types::CodeActionResponse { let mut actions = CodeActions::new(); - roxygen_documentation(&mut actions, uri, document, range, capabilities); + roxygen_documentation(db, file, &mut actions, range, capabilities); actions.into_response() } diff --git a/crates/ark/src/lsp/code_action/roxygen.rs b/crates/ark/src/lsp/code_action/roxygen.rs index ea4ea48d2..fcc16f223 100644 --- a/crates/ark/src/lsp/code_action/roxygen.rs +++ b/crates/ark/src/lsp/code_action/roxygen.rs @@ -1,19 +1,19 @@ use tower_lsp::lsp_types; -use url::Url; +use crate::lsp::ark_file::ArkFile; use crate::lsp::capabilities::Capabilities; use crate::lsp::code_action::code_action; use crate::lsp::code_action::code_action_workspace_text_edit; use crate::lsp::code_action::CodeActions; -use crate::lsp::document::Document; +use crate::lsp::db::ArkDb; use crate::lsp::traits::node::NodeExt; use crate::treesitter::BinaryOperatorType; use crate::treesitter::NodeTypeExt; pub(crate) fn roxygen_documentation( + db: &dyn ArkDb, + file: &ArkFile, actions: &mut CodeActions, - uri: &Url, - document: &Document, range: tree_sitter::Range, capabilities: &Capabilities, ) -> Option<()> { @@ -26,8 +26,8 @@ pub(crate) fn roxygen_documentation( // For selections, we require that the start point be touching the function name. let start = range.start_point; - let node = document - .ast + let node = file + .tree_sitter(db) .root_node() .named_descendant_for_point_range(start, start)?; @@ -63,7 +63,7 @@ pub(crate) fn roxygen_documentation( // Fairly simple detection of existing `#'` on the previous line (but starting at the // same `column` offset), which tells us not to provide this code action - if let Some(previous_line) = document.get_line(position.row.saturating_sub(1)) { + if let Some(previous_line) = file.get_line(db, position.row.saturating_sub(1)) { if let Some(previous_line) = previous_line.get(position.column..) { let mut previous_line = previous_line.bytes(); @@ -90,7 +90,7 @@ pub(crate) fn roxygen_documentation( for child in parameters.children_by_field_name("parameter", &mut cursor) { let parameter_name = child.child_by_field_name("name")?; - let parameter_name = parameter_name.node_to_string(&document.contents).ok()?; + let parameter_name = parameter_name.node_to_string(file.contents(db)).ok()?; parameter_names.push(parameter_name); } @@ -100,13 +100,13 @@ pub(crate) fn roxygen_documentation( // We insert the documentation string at the start position of the function name. // This handles the indentation of the first documentation line, and makes new line // handling trivial (we just add a new line to every documentation line). - let position = document - .lsp_position_from_tree_sitter_point(position) + let position = file + .lsp_position_from_tree_sitter_point(db, position) .ok()?; let range = lsp_types::Range::new(position, position); let edit = lsp_types::TextEdit::new(range, documentation); let edit = - code_action_workspace_text_edit(uri.clone(), document.version, vec![edit], capabilities); + code_action_workspace_text_edit(file.url.clone(), file.version, vec![edit], capabilities); actions.add_action(code_action( "Generate a roxygen template".to_string(), @@ -169,10 +169,10 @@ mod tests { use url::Url; use crate::fixtures::point_and_offset_from_cursor; + use crate::lsp::ark_file::test_ark_file; use crate::lsp::capabilities::Capabilities; use crate::lsp::code_action::roxygen::roxygen_documentation; use crate::lsp::code_action::CodeActions; - use crate::lsp::document::Document; fn point_range(point: Point, byte: usize) -> Range { Range { @@ -190,19 +190,17 @@ mod tests { fn roxygen_documentation_test(text: &str, position: Position) -> String { let mut actions = CodeActions::new(); - let uri = Url::parse("file:///test.R").unwrap(); - let capabilities = Capabilities::default() .with_code_action_literal_support(true) .with_workspace_edit_document_changes(true); let (text, point, offset) = roxygen_point_and_offset_from_cursor(text); - let document = Document::new(&text, None); + let (db, file) = test_ark_file(&text); roxygen_documentation( + &db, + &file, &mut actions, - &uri, - &document, point_range(point, offset), &capabilities, ); @@ -222,7 +220,7 @@ mod tests { assert_eq!(text_document_edits.len(), 1); let mut text_document_edit = text_document_edits.pop().unwrap(); - assert_eq!(text_document_edit.text_document.uri, uri); + assert_eq!(text_document_edit.text_document.uri, file.url); assert_eq!(text_document_edit.edits.len(), 1); let OneOf::Left(text_edit) = text_document_edit.edits.pop().unwrap() else { @@ -290,8 +288,6 @@ mod tests { fn test_no_action_when_on_local_function() { let mut actions = CodeActions::new(); - let uri = Url::parse("file:///test.R").unwrap(); - let capabilities = Capabilities::default() .with_code_action_literal_support(true) .with_workspace_edit_document_changes(true); @@ -303,12 +299,12 @@ outer <- function(a, b = 2) { "; let (text, point, offset) = roxygen_point_and_offset_from_cursor(text); - let document = Document::new(&text, None); + let (db, file) = test_ark_file(&text); roxygen_documentation( + &db, + &file, &mut actions, - &uri, - &document, point_range(point, offset), &capabilities, ); @@ -320,8 +316,6 @@ outer <- function(a, b = 2) { fn test_no_action_when_documentation_on_previous_line() { let mut actions = CodeActions::new(); - let uri = Url::parse("file:///test.R").unwrap(); - let capabilities = Capabilities::default() .with_code_action_literal_support(true) .with_workspace_edit_document_changes(true); @@ -332,12 +326,12 @@ f@n <- function(a, b) {} "; let (text, point, offset) = roxygen_point_and_offset_from_cursor(text); - let document = Document::new(&text, None); + let (db, file) = test_ark_file(&text); roxygen_documentation( + &db, + &file, &mut actions, - &uri, - &document, point_range(point, offset), &capabilities, ); @@ -349,8 +343,6 @@ f@n <- function(a, b) {} fn test_no_action_when_cursor_is_after_function_name() { let mut actions = CodeActions::new(); - let uri = Url::parse("file:///test.R").unwrap(); - let capabilities = Capabilities::default() .with_code_action_literal_support(true) .with_workspace_edit_document_changes(true); @@ -361,12 +353,12 @@ fn@ <- function(a, b) {} "; let (text, point, offset) = roxygen_point_and_offset_from_cursor(text); - let document = Document::new(&text, None); + let (db, ark_file) = test_ark_file(&text); roxygen_documentation( + &db, + &ark_file, &mut actions, - &uri, - &document, point_range(point, offset), &capabilities, ); @@ -378,8 +370,6 @@ fn@ <- function(a, b) {} fn test_no_action_without_code_action_literal_support() { let mut actions = CodeActions::new(); - let uri = Url::parse("file:///test.R").unwrap(); - // NOTE: `with_code_action_literal_support(false)` let capabilities = Capabilities::default() .with_code_action_literal_support(false) @@ -390,12 +380,12 @@ f@n <- function(a, b) {} "; let (text, point, offset) = roxygen_point_and_offset_from_cursor(text); - let document = Document::new(&text, None); + let (db, ark_file) = test_ark_file(&text); roxygen_documentation( + &db, + &ark_file, &mut actions, - &uri, - &document, point_range(point, offset), &capabilities, ); @@ -419,12 +409,12 @@ f@n <- function(a, b) {} "; let (text, point, offset) = roxygen_point_and_offset_from_cursor(text); - let document = Document::new(&text, None); + let (db, ark_file) = test_ark_file(&text); roxygen_documentation( + &db, + &ark_file, &mut actions, - &uri, - &document, point_range(point, offset), &capabilities, ); diff --git a/crates/ark/src/lsp/db.rs b/crates/ark/src/lsp/db.rs new file mode 100644 index 000000000..067740824 --- /dev/null +++ b/crates/ark/src/lsp/db.rs @@ -0,0 +1,78 @@ +use oak_db::File; +use oak_db::OakDatabase; + +/// Ark's salsa database view. +/// +/// Extends oak_db's `Db` with queries we keep on the ark side. Right now that's +/// just the legacy tree-sitter tree. Defining it here keeps the `tree-sitter` +/// dependency out of `oak_db`, which is meant to stay a small shared crate. The +/// query still memoizes into the same `OakDatabase` storage, keyed on the `File`. +/// +/// This mirrors how rust-analyzer layers database traits across crates: the +/// base crate owns `SourceDatabase`, downstream crates add their own traits on +/// top. +#[salsa::db] +pub(crate) trait ArkDb: oak_db::Db {} + +#[salsa::db] +impl ArkDb for OakDatabase {} + +/// Extension trait that adds ark-side query methods to `oak_db::File`. +pub(crate) trait FileArkExt { + fn tree_sitter(self, db: &dyn ArkDb) -> &tree_sitter::Tree; +} + +impl FileArkExt for File { + fn tree_sitter(self, db: &dyn ArkDb) -> &tree_sitter::Tree { + &tree_sitter_query(db, self).0 + } +} + +/// Salsa wrapper around a tree-sitter `Tree`. +/// +/// Salsa stores the memoized value and may overwrite it in place across +/// revisions, so the value has to implement `salsa::Update`. `Tree` doesn't, +/// and we can't implement a foreign trait on a foreign type, so we wrap it. +#[derive(Clone, Debug)] +#[allow(dead_code)] +struct TreeSitterTree(tree_sitter::Tree); + +// SAFETY: `Tree` is fully owned, with no 'db references, so overwriting the old +// value in place is sound. We always report "changed" because tree-sitter trees +// aren't comparable. That pairs with `no_eq`, which already tells salsa never +// to backdate this query. +unsafe impl salsa::Update for TreeSitterTree { + unsafe fn maybe_update(old_pointer: *mut Self, new_value: Self) -> bool { + unsafe { *old_pointer = new_value }; + true + } +} + +/// Parse a file with tree-sitter. +/// +/// `no_eq` because `tree_sitter::Tree` isn't `PartialEq`, so salsa can't +/// backdate it (same as `oak_db`'s `semantic_index`). `lru = 128` mirrors +/// `oak_db::File::parse`. +#[salsa::tracked(returns(ref), no_eq, lru = 128)] +fn tree_sitter_query(db: &dyn ArkDb, file: File) -> TreeSitterTree { + TreeSitterTree(parse_tree_sitter(file.contents(db))) +} + +/// Parse R source with tree-sitter. +/// +/// The one place we build a tree-sitter parser. `tree_sitter_query()` runs it +/// over editor files; `statement_range` runs it over standalone snippets that +/// have no `oak_db::File`. +pub(crate) fn parse_tree_sitter(text: &str) -> tree_sitter::Tree { + let mut parser = tree_sitter::Parser::new(); + + // Unwrap Safety: `tree-sitter-r` is a valid grammar; `set_language` only + // fails on an ABI version mismatch, which is a build-time invariant. + parser + .set_language(&tree_sitter_r::LANGUAGE.into()) + .unwrap(); + + // Unwrap Safety: parsing without a timeout or cancellation flag never + // returns `None`. + parser.parse(text, None).unwrap() +} diff --git a/crates/ark/src/lsp/diagnostics.rs b/crates/ark/src/lsp/diagnostics.rs index 5abc069b4..c87fff33d 100644 --- a/crates/ark/src/lsp/diagnostics.rs +++ b/crates/ark/src/lsp/diagnostics.rs @@ -24,9 +24,10 @@ use tree_sitter::Point; use tree_sitter::Range; use crate::lsp; +use crate::lsp::ark_file::ArkFile; +use crate::lsp::db::ArkDb; use crate::lsp::declarations::top_level_declare; use crate::lsp::diagnostics_syntax::syntax_diagnostics; -use crate::lsp::document::Document; use crate::lsp::indexer; use crate::lsp::inputs::source_root::SourceRoot; use crate::lsp::state::WorldState; @@ -44,8 +45,8 @@ pub struct DiagnosticsConfig { #[derive(Clone)] pub struct DiagnosticContext<'a> { - /// The document under analysis - pub doc: &'a Document, + pub(crate) db: &'a dyn ArkDb, + pub(crate) file: &'a ArkFile, /// The symbols currently defined and available in the session. pub session_symbols: HashSet, @@ -84,9 +85,19 @@ impl Default for DiagnosticsConfig { } impl<'a> DiagnosticContext<'a> { - pub fn new(doc: &'a Document, root: &'a Option, library: &'a Library) -> Self { + pub(crate) fn contents(&self) -> &'a str { + self.file.contents(self.db) + } + + pub(crate) fn new( + db: &'a dyn ArkDb, + root: &'a Option, + library: &'a Library, + file: &'a ArkFile, + ) -> Self { Self { - doc, + file, + db, document_symbols: Vec::new(), session_symbols: HashSet::new(), workspace_symbols: HashSet::new(), @@ -133,7 +144,7 @@ impl<'a> DiagnosticContext<'a> { } pub(crate) fn generate_diagnostics( - doc: Document, + file: ArkFile, state: WorldState, testthat: bool, ) -> Vec { @@ -143,14 +154,18 @@ pub(crate) fn generate_diagnostics( return diagnostics; } + let db = &state.db; + // Check that diagnostics are not disabled in top-level declarations for // this document - let decls = top_level_declare(&doc.ast, &doc.contents); + let tree = file.tree_sitter(db); + let contents = file.contents(db); + let decls = top_level_declare(tree, contents); if !decls.diagnostics { return diagnostics; } - let mut context = DiagnosticContext::new(&doc, &state.root, &state.library); + let mut context = DiagnosticContext::new(db, &state.root, &state.library, &file); // Add a 'root' context for the document. context.document_symbols.push(HashMap::new()); @@ -220,7 +235,7 @@ pub(crate) fn generate_diagnostics( } // Start iterating through the nodes. - let root = doc.ast.root_node(); + let root = file.tree_sitter(db).root_node(); // Collect syntax related diagnostics for `ERROR` and `MISSING` nodes match syntax_diagnostics(root, &context) { @@ -366,7 +381,7 @@ fn recurse_for( }); if variable.is_identifier() { - let name = variable.node_as_str(&context.doc.contents)?; + let name = variable.node_as_str(context.contents())?; let range = variable.range(); context.add_defined_variable(name, range); } @@ -553,7 +568,7 @@ fn handle_assignment_variable( return Ok(()); } - let name = identifier.node_as_str(&context.doc.contents)?; + let name = identifier.node_as_str(context.contents())?; let range = identifier.range(); context.add_defined_variable(name, range); @@ -585,7 +600,7 @@ fn handle_assignment_dotty( return Ok(()); }; - let dot = dot.node_as_str(&context.doc.contents)?; + let dot = dot.node_as_str(context.contents())?; if dot != "." { return Ok(()); }; @@ -609,7 +624,7 @@ fn handle_assignment_dotty( // so we don't want to define a variable for `x` there. if let Some(name) = child.child_by_field_name("name") { let range = name.range(); - let name = name.node_as_str(&context.doc.contents)?; + let name = name.node_as_str(context.contents())?; context.add_defined_variable(name, range); continue; }; @@ -621,7 +636,7 @@ fn handle_assignment_dotty( // i.e. `.[x, y]` where `value` is just a name that dotty assigns to if value.is_identifier() { let range = value.range(); - let name = value.node_as_str(&context.doc.contents)?; + let name = value.node_as_str(context.contents())?; context.add_defined_variable(name, range); continue; } @@ -652,7 +667,7 @@ fn node_find_magrittr_pipe<'tree>( node: &Node<'tree>, context: &DiagnosticContext, ) -> anyhow::Result>> { - if node.is_magrittr_pipe_operator(&context.doc.contents)? { + if node.is_magrittr_pipe_operator(context.contents())? { // Found one! return Ok(Some(*node)); } @@ -689,10 +704,12 @@ fn recurse_namespace( }); // Check for a valid package name. - let package = lhs.node_as_str(&context.doc.contents)?; + let package = lhs.node_as_str(context.contents())?; if !context.installed_packages.contains(package) { let range = lhs.range(); - let range = context.doc.lsp_range_from_tree_sitter_range(range)?; + let range = context + .file + .lsp_range_from_tree_sitter_range(context.db, range)?; let message = format!("Package '{}' is not installed.", package); let diagnostic = Diagnostic::new_simple(range, message); diagnostics.push(diagnostic); @@ -725,7 +742,7 @@ fn recurse_parameters( bail!("Missing a `name` field in a `parameter` node."); }); - let symbol = name.node_as_str(&context.doc.contents)?; + let symbol = name.node_as_str(context.contents())?; let location = name.range(); context.add_defined_variable(symbol, location); @@ -840,7 +857,7 @@ fn recurse_call( // // TODO: Handle certain 'scope-generating' function calls, e.g. // things like 'local({ ... })'. - let fun = callee.node_as_str(&context.doc.contents)?; + let fun = callee.node_as_str(context.contents())?; match fun { "library" | "require" => { @@ -869,14 +886,14 @@ fn handle_package_attach_call(node: Node, context: &mut DiagnosticContext) -> an // We'll do better when we have a more capable argument inspection // infrastructure. if node - .arguments_names_as_string(&context.doc.contents) + .arguments_names_as_string(context.contents()) .flatten() .any(|n| n == "character.only") { return Ok(()); } - let package_name = package_node.get_identifier_or_string_text(&context.doc.contents)?; + let package_name = package_node.get_identifier_or_string_text(context.contents())?; let attach_pos = node.end_position(); let package = insert_package_exports(package_name, attach_pos, context)?; @@ -1025,7 +1042,7 @@ fn check_invalid_na_comparison( let mut cursor = node.walk(); for child in node.children(&mut cursor) { - let contents = child.node_as_str(&context.doc.contents)?; + let contents = child.node_as_str(context.contents())?; if matches!(contents, "NA" | "NaN" | "NULL") { let message = match contents { @@ -1035,7 +1052,9 @@ fn check_invalid_na_comparison( _ => continue, }; let range = child.range(); - let range = context.doc.lsp_range_from_tree_sitter_range(range)?; + let range = context + .file + .lsp_range_from_tree_sitter_range(context.db, range)?; let mut diagnostic = Diagnostic::new_simple(range, message.into()); diagnostic.severity = Some(DiagnosticSeverity::INFORMATION); diagnostics.push(diagnostic); @@ -1069,7 +1088,9 @@ fn check_unexpected_assignment_in_if_conditional( } let range = condition.range(); - let range = context.doc.lsp_range_from_tree_sitter_range(range)?; + let range = context + .file + .lsp_range_from_tree_sitter_range(context.db, range)?; let message = "Unexpected '='; use '==' to compare values for equality."; let diagnostic = Diagnostic::new_simple(range, message.into()); diagnostics.push(diagnostic); @@ -1110,15 +1131,17 @@ fn check_symbol_in_scope( } // Skip if a symbol with this name is in scope. - let name = node.node_as_str(&context.doc.contents)?; + let name = node.node_as_str(context.contents())?; if context.has_definition(name, node.start_position()) { return false.ok(); } // No symbol in scope; provide a diagnostic. let range = node.range(); - let range = context.doc.lsp_range_from_tree_sitter_range(range)?; - let identifier = node.node_as_str(&context.doc.contents)?; + let range = context + .file + .lsp_range_from_tree_sitter_range(context.db, range)?; + let identifier = node.node_as_str(context.contents())?; let message = format!("No symbol named '{}' in scope.", identifier); let mut diagnostic = Diagnostic::new_simple(range, message); diagnostic.severity = Some(DiagnosticSeverity::WARNING); @@ -1131,6 +1154,7 @@ fn check_symbol_in_scope( mod tests { use std::path::PathBuf; + use aether_path::FilePath; use harp::eval::RParseEvalOptions; use oak_package_metadata::dcf::Dcf; use oak_package_metadata::description::Description; @@ -1142,12 +1166,24 @@ mod tests { use tower_lsp::lsp_types::Position; use crate::console::console_inputs; - use crate::lsp::document::Document; + use crate::lsp::ark_file::ArkFile; + use crate::lsp::config::DocumentConfig; use crate::lsp::state::WorldState; use crate::r_task; - fn generate_diagnostics(doc: Document, state: WorldState) -> Vec { - super::generate_diagnostics(doc, state, false) + fn generate_diagnostics(code: &str, state: WorldState) -> Vec { + let url = url::Url::parse("file:///test.R").unwrap(); + let file = oak_db::File::new(&state.db, FilePath::from_url(&url), code.to_string(), None); + let ark_file = ArkFile { + file, + version: None, + config: DocumentConfig::default(), + url, + encoding: aether_lsp_utils::proto::PositionEncoding::Wide( + biome_line_index::WideEncoding::Utf16, + ), + }; + super::generate_diagnostics(ark_file, state, false) } fn current_state() -> WorldState { @@ -1169,8 +1205,7 @@ mod tests { foo 1 }"; - let document = Document::new(text, None); - let diagnostics = generate_diagnostics(document, current_state()); + let diagnostics = generate_diagnostics(text, current_state()); assert_eq!(diagnostics.len(), 2); let diagnostic = diagnostics.first().unwrap(); @@ -1193,8 +1228,7 @@ foo 1, 2 # hi there )"; - let document = Document::new(text, None); - let diagnostics = generate_diagnostics(document, current_state()); + let diagnostics = generate_diagnostics(text, current_state()); assert!(diagnostics.is_empty()); }) } @@ -1203,8 +1237,7 @@ foo fn test_missing_namespace_rhs() { r_task(|| { let text = "base::"; - let document = Document::new(text, None); - let diagnostics = generate_diagnostics(document, current_state()); + let diagnostics = generate_diagnostics(text, current_state()); assert_eq!(diagnostics.len(), 1); let diagnostic = diagnostics.first().unwrap(); insta::assert_snapshot!(diagnostic.message); @@ -1215,9 +1248,8 @@ foo fn test_no_diagnostic_for_dot_dot_i() { r_task(|| { let text = "..1 + ..2 + 3"; - let document = Document::new(text, None); - let diagnostics = generate_diagnostics(document, current_state()); + let diagnostics = generate_diagnostics(text, current_state()); assert!(diagnostics.is_empty()); }) @@ -1236,13 +1268,11 @@ foo let state = current_state(); let text = "x$foo"; - let document = Document::new(text, None); - let diagnostics = generate_diagnostics(document.clone(), state.clone()); + let diagnostics = generate_diagnostics(text, state.clone()); assert!(diagnostics.is_empty()); let text = "x@foo"; - let document = Document::new(text, None); - let diagnostics = generate_diagnostics(document.clone(), state.clone()); + let diagnostics = generate_diagnostics(text, state.clone()); assert!(diagnostics.is_empty()); // Clean up @@ -1259,8 +1289,7 @@ foo z = 3 y + x + z "; - let document = Document::new(text, None); - let diagnostics = generate_diagnostics(document.clone(), current_state()); + let diagnostics = generate_diagnostics(text, current_state()); assert!(diagnostics.is_empty()); }) } @@ -1273,8 +1302,7 @@ foo 2 ->> y y + x "; - let document = Document::new(text, None); - let diagnostics = generate_diagnostics(document.clone(), current_state()); + let diagnostics = generate_diagnostics(text, current_state()); assert!(diagnostics.is_empty()); }) } @@ -1287,9 +1315,8 @@ foo x <- 1 x + 1 "; - let document = Document::new(text, None); - let diagnostics = generate_diagnostics(document.clone(), current_state()); + let diagnostics = generate_diagnostics(text, current_state()); assert_eq!(diagnostics.len(), 1); // Only marks the `x` before the `x <- 1` @@ -1307,8 +1334,7 @@ foo identity(foo ~ bar) identity(~foo) "; - let document = Document::new(text, None); - let diagnostics = generate_diagnostics(document, current_state()); + let diagnostics = generate_diagnostics(text, current_state()); assert!(diagnostics.is_empty()); }) } @@ -1323,9 +1349,7 @@ foo cherry "; - let document = Document::new(code, None); - - let diagnostics = generate_diagnostics(document.clone(), current_state()); + let diagnostics = generate_diagnostics(code, current_state()); assert_eq!(diagnostics.len(), 1); let diagnostic = diagnostics.first().unwrap(); @@ -1343,9 +1367,7 @@ foo cherry "; - let document = Document::new(code, None); - - let diagnostics = generate_diagnostics(document.clone(), current_state()); + let diagnostics = generate_diagnostics(code, current_state()); assert_eq!(diagnostics.len(), 1); let diagnostic = diagnostics.first().unwrap(); @@ -1364,9 +1386,7 @@ foo x "; - let document = Document::new(code, None); - - let diagnostics = generate_diagnostics(document.clone(), current_state()); + let diagnostics = generate_diagnostics(code, current_state()); assert_eq!(diagnostics.len(), 1); let diagnostic = diagnostics.first().unwrap(); @@ -1384,9 +1404,7 @@ foo cherry "; - let document = Document::new(code, None); - - let diagnostics = generate_diagnostics(document.clone(), current_state()); + let diagnostics = generate_diagnostics(code, current_state()); assert_eq!(diagnostics.len(), 1); let diagnostic = diagnostics.first().unwrap(); @@ -1402,9 +1420,7 @@ foo apple "; - let document = Document::new(code, None); - - let diagnostics = generate_diagnostics(document.clone(), current_state()); + let diagnostics = generate_diagnostics(code, current_state()); assert_eq!(diagnostics.len(), 0); }) } @@ -1417,9 +1433,7 @@ foo apple "; - let document = Document::new(code, None); - - let diagnostics = generate_diagnostics(document.clone(), current_state()); + let diagnostics = generate_diagnostics(code, current_state()); assert_eq!(diagnostics.len(), 0); }) } @@ -1438,21 +1452,13 @@ foo list(x <- 1) x "; - let document = Document::new(code, None); - assert_eq!( - generate_diagnostics(document.clone(), current_state()).len(), - 0 - ); + assert_eq!(generate_diagnostics(code, current_state()).len(), 0); let code = " list({ x <- 1 }) x "; - let document = Document::new(code, None); - assert_eq!( - generate_diagnostics(document.clone(), current_state()).len(), - 0 - ); + assert_eq!(generate_diagnostics(code, current_state()).len(), 0); }); // Subset @@ -1462,22 +1468,14 @@ foo foo[x <- 1] x "; - let document = Document::new(code, None); - assert_eq!( - generate_diagnostics(document.clone(), current_state()).len(), - 0 - ); + assert_eq!(generate_diagnostics(code, current_state()).len(), 0); let code = " foo <- list() foo[{x <- 1}] x "; - let document = Document::new(code, None); - assert_eq!( - generate_diagnostics(document.clone(), current_state()).len(), - 0 - ); + assert_eq!(generate_diagnostics(code, current_state()).len(), 0); }); // Subset2 @@ -1487,22 +1485,14 @@ foo foo[[x <- 1]] x "; - let document = Document::new(code, None); - assert_eq!( - generate_diagnostics(document.clone(), current_state()).len(), - 0 - ); + assert_eq!(generate_diagnostics(code, current_state()).len(), 0); let code = " foo <- list() foo[[{x <- 1}]] x "; - let document = Document::new(code, None); - assert_eq!( - generate_diagnostics(document.clone(), current_state()).len(), - 0 - ); + assert_eq!(generate_diagnostics(code, current_state()).len(), 0); }); } @@ -1517,11 +1507,7 @@ foo let code = " list(x) "; - let document = Document::new(code, None); - assert_eq!( - generate_diagnostics(document.clone(), current_state()).len(), - 0 - ); + assert_eq!(generate_diagnostics(code, current_state()).len(), 0); // Important to test nested case. We have a dynamic stack of state // variable to keep track of whether we are in a call. The inner @@ -1529,22 +1515,14 @@ foo let code = " list(list(), x) "; - let document = Document::new(code, None); - assert_eq!( - generate_diagnostics(document.clone(), current_state()).len(), - 0 - ); + assert_eq!(generate_diagnostics(code, current_state()).len(), 0); // `in_call_like_arguments` state variable is reset let code = " list() x "; - let document = Document::new(code, None); - assert_eq!( - generate_diagnostics(document.clone(), current_state()).len(), - 1 - ); + assert_eq!(generate_diagnostics(code, current_state()).len(), 1); }); // Subset @@ -1557,11 +1535,7 @@ foo data[x] data[,x] "; - let document = Document::new(code, None); - assert_eq!( - generate_diagnostics(document.clone(), current_state()).len(), - 0 - ); + assert_eq!(generate_diagnostics(code, current_state()).len(), 0); // Imagine this is `data.table()` (we don't necessarily have the package // installed in the test) @@ -1570,11 +1544,7 @@ foo data <- data.frame(x = 1) data[, y := x + 1] "; - let document = Document::new(code, None); - assert_eq!( - generate_diagnostics(document.clone(), current_state()).len(), - 0 - ); + assert_eq!(generate_diagnostics(code, current_state()).len(), 0); }); // Subset2 @@ -1583,11 +1553,7 @@ foo foo <- list() foo[[x]] "; - let document = Document::new(code, None); - assert_eq!( - generate_diagnostics(document.clone(), current_state()).len(), - 0 - ); + assert_eq!(generate_diagnostics(code, current_state()).len(), 0); }); } @@ -1599,11 +1565,7 @@ foo x <- list(a = 1) x |> _$a[1] "; - let document = Document::new(code, None); - assert_eq!( - generate_diagnostics(document.clone(), current_state()).len(), - 0 - ); + assert_eq!(generate_diagnostics(code, current_state()).len(), 0); // Imagine this is a data.table // https://github.com/posit-dev/positron/issues/3749 @@ -1611,22 +1573,14 @@ foo data <- data.frame(a = 1) data |> _[1] "; - let document = Document::new(code, None); - assert_eq!( - generate_diagnostics(document.clone(), current_state()).len(), - 0 - ); + assert_eq!(generate_diagnostics(code, current_state()).len(), 0); // We technically disable diagnostics for this symbol everywhere, even outside // of pipe scope, which is probably fine let code = " _ "; - let document = Document::new(code, None); - assert_eq!( - generate_diagnostics(document.clone(), current_state()).len(), - 0 - ); + assert_eq!(generate_diagnostics(code, current_state()).len(), 0); }) } @@ -1671,8 +1625,7 @@ foo foo() bar "; - let document = Document::new(code, None); - let diagnostics = generate_diagnostics(document, state.clone()); + let diagnostics = generate_diagnostics(code, state.clone()); assert_eq!(diagnostics.len(), 0); @@ -1682,9 +1635,8 @@ foo undefined() also_undefined "; - let document = Document::new(code, None); - let diagnostics = generate_diagnostics(document, state.clone()); + let diagnostics = generate_diagnostics(code, state.clone()); assert_eq!(diagnostics.len(), 2); assert!(diagnostics @@ -1705,8 +1657,7 @@ foo foo() bar "; - let document = Document::new(code, None); - let diagnostics = generate_diagnostics(document, state.clone()); + let diagnostics = generate_diagnostics(code, state.clone()); assert_eq!(diagnostics.len(), 0); // If the library call includes the `character.only` argument, we bail @@ -1714,8 +1665,7 @@ foo library(mockpkg, character.only = TRUE) foo() "#; - let document = Document::new(code, None); - let diagnostics = generate_diagnostics(document, state.clone()); + let diagnostics = generate_diagnostics(code, state.clone()); assert_eq!(diagnostics.len(), 1); // Same if passed `FALSE`, we're not trying to be smart (yet) @@ -1723,8 +1673,7 @@ foo library(mockpkg, character.only = FALSE) foo() "#; - let document = Document::new(code, None); - let diagnostics = generate_diagnostics(document, state); + let diagnostics = generate_diagnostics(code, state); assert_eq!(diagnostics.len(), 1); }); } @@ -1793,8 +1742,7 @@ foo bar # in scope baz # in scope "; - let document = Document::new(code, None); - let diagnostics = generate_diagnostics(document, state.clone()); + let diagnostics = generate_diagnostics(code, state.clone()); let messages: Vec<_> = diagnostics.iter().map(|d| d.message.clone()).collect(); assert!(messages.iter().any(|m| m.contains("No symbol named 'foo'"))); @@ -1839,8 +1787,7 @@ foo bar foo() "; - let document = Document::new(code, None); - let diagnostics = generate_diagnostics(document, state.clone()); + let diagnostics = generate_diagnostics(code, state.clone()); assert!(diagnostics .iter() .any(|d| d.message.contains("No symbol named 'foo'"))); @@ -1868,8 +1815,7 @@ foo path_to_file penguins_raw "#; - let document = Document::new(code, None); - let diagnostics = generate_diagnostics(document, state.clone()); + let diagnostics = generate_diagnostics(code, state.clone()); assert!(diagnostics.is_empty()); let code = r#" @@ -1878,8 +1824,7 @@ foo penguins_raw library(penguins) "#; - let document = Document::new(code, None); - let diagnostics = generate_diagnostics(document, state); + let diagnostics = generate_diagnostics(code, state); assert_eq!(diagnostics.len(), 3); }) } diff --git a/crates/ark/src/lsp/diagnostics_syntax.rs b/crates/ark/src/lsp/diagnostics_syntax.rs index f19ce41b5..6eb0a9251 100644 --- a/crates/ark/src/lsp/diagnostics_syntax.rs +++ b/crates/ark/src/lsp/diagnostics_syntax.rs @@ -339,7 +339,7 @@ fn diagnose_missing_binary_operator( let range = operator.range(); - let text = operator.node_as_str(&context.doc.contents)?; + let text = operator.node_as_str(context.contents())?; let message = format!("Invalid binary operator '{text}'. Missing a right hand side."); diagnostics.push(new_syntax_diagnostic(message, range, context)?); @@ -370,7 +370,7 @@ pub(crate) fn diagnose_missing_namespace_operator( let range = operator.range(); - let text = operator.node_as_str(&context.doc.contents)?; + let text = operator.node_as_str(context.contents())?; let message = format!("Invalid namespace operator '{text}'. Missing a right hand side."); diagnostics.push(new_syntax_diagnostic(message, range, context)?); @@ -421,7 +421,9 @@ fn new_syntax_diagnostic( range: Range, context: &DiagnosticContext, ) -> anyhow::Result { - let range = context.doc.lsp_range_from_tree_sitter_range(range)?; + let range = context + .file + .lsp_range_from_tree_sitter_range(context.db, range)?; Ok(Diagnostic::new_simple(range, message)) } @@ -431,15 +433,15 @@ mod tests { use tower_lsp::lsp_types::Diagnostic; use tower_lsp::lsp_types::Position; + use crate::lsp::ark_file::test_ark_file; use crate::lsp::diagnostics::DiagnosticContext; use crate::lsp::diagnostics_syntax::syntax_diagnostics; - use crate::lsp::document::Document; fn text_diagnostics(text: &str) -> Vec { - let document = Document::new(text, None); + let (db, file) = test_ark_file(text); let library = Library::default(); - let context = DiagnosticContext::new(&document, &None, &library); - let diagnostics = syntax_diagnostics(document.ast.root_node(), &context).unwrap(); + let context = DiagnosticContext::new(&db, &None, &library, &file); + let diagnostics = syntax_diagnostics(file.tree_sitter(&db).root_node(), &context).unwrap(); diagnostics } diff --git a/crates/ark/src/lsp/folding_range.rs b/crates/ark/src/lsp/folding_range.rs index b39c1552d..611adb875 100644 --- a/crates/ark/src/lsp/folding_range.rs +++ b/crates/ark/src/lsp/folding_range.rs @@ -14,12 +14,13 @@ use tower_lsp::lsp_types::FoldingRangeKind; use super::symbols::parse_comment_as_section; use crate::lsp; -use crate::lsp::document::Document; +use crate::lsp::ark_file::ArkFile; +use crate::lsp::db::ArkDb; -pub fn folding_range(document: &Document) -> anyhow::Result> { +pub(crate) fn folding_range(db: &dyn ArkDb, file: &ArkFile) -> anyhow::Result> { let mut folding_ranges: Vec = Vec::new(); - let ast = &document.ast; + let ast = file.tree_sitter(db); if ast.root_node().has_error() { tracing::error!("Folding range service: Parse error"); return Err(anyhow::anyhow!("Parse error")); @@ -28,10 +29,11 @@ pub fn folding_range(document: &Document) -> anyhow::Result> { // Traverse the AST let mut cursor = ast.root_node().walk(); parse_ts_node( + db, + file, &mut cursor, 0, &mut folding_ranges, - document, &mut vec![Vec::new()], &mut None, &mut None, @@ -41,10 +43,11 @@ pub fn folding_range(document: &Document) -> anyhow::Result> { } fn parse_ts_node( + db: &dyn ArkDb, + file: &ArkFile, cursor: &mut tree_sitter::TreeCursor, _depth: usize, folding_ranges: &mut Vec, - document: &Document, comment_stack: &mut Vec>, region_marker: &mut Option, cell_marker: &mut Option, @@ -70,18 +73,18 @@ fn parse_ts_node( start.column + 1, // Start after the opening delimiter end.row, end.column - 1, - count_leading_whitespaces(document, end.row), + count_leading_whitespaces(db, file, end.row), ); folding_ranges.push(folding_range); }, "comment" => { // Only process standalone comment - if count_leading_whitespaces(document, start.row) != start.column { + if count_leading_whitespaces(db, file, start.row) != start.column { return; } // Nested comment section handling - if let Some(comment_line) = document.get_line(start.row) { + if let Some(comment_line) = file.get_line(db, start.row) { if let Err(err) = nested_processor(comment_stack, folding_ranges, start.row, comment_line) { @@ -104,10 +107,11 @@ fn parse_ts_node( // recursive loop loop { parse_ts_node( + db, + file, cursor, _depth + 1, folding_ranges, - document, &mut child_comment_stack, &mut child_region_marker, &mut child_cell_marker, @@ -168,8 +172,8 @@ fn comment_range(start_line: usize, end_line: usize) -> FoldingRange { } } -fn count_leading_whitespaces(document: &Document, line_num: usize) -> usize { - let Some(line) = document.get_line(line_num) else { +fn count_leading_whitespaces(db: &dyn ArkDb, file: &ArkFile, line_num: usize) -> usize { + let Some(line) = file.get_line(db, line_num) else { return 0; }; @@ -361,12 +365,10 @@ fn end_node_handler( #[cfg(test)] mod tests { use super::*; - use crate::lsp::document::Document; fn test_folding_range(code: &str) -> Vec { - let doc = Document::new(code, None); - // Sort ranges for more consistent testing - sorted_ranges(folding_range(&doc).unwrap()) + let (db, file) = crate::lsp::ark_file::test_ark_file(code); + sorted_ranges(folding_range(&db, &file).unwrap()) } fn sorted_ranges(mut ranges: Vec) -> Vec { @@ -703,21 +705,18 @@ function(a, b, c) { // Test for unterminated structures #[test] fn test_folding_unterminated() { - // Add try_unwrap to handle the expected error from the parser - let doc = Document::new( - " + let code = " # #region without end # %% cell without another cell function() { # Unclosed function -", - None, - ); +"; + let (db, file) = crate::lsp::ark_file::test_ark_file(code); // Handle the expected parse error - match folding_range(&doc) { + match folding_range(&db, &file) { Ok(ranges) => insta::assert_debug_snapshot!(sorted_ranges(ranges)), Err(e) => insta::assert_debug_snapshot!(format!("Expected error: {}", e)), } @@ -726,18 +725,16 @@ function() { // Test for whitespace counting #[test] fn test_count_leading_whitespaces() { - let doc = Document::new( - "no spaces + let code = "no spaces two spaces four spaces -\ttab char", - None, - ); +\ttab char"; + let (db, file) = crate::lsp::ark_file::test_ark_file(code); - assert_eq!(count_leading_whitespaces(&doc, 0), 0); - assert_eq!(count_leading_whitespaces(&doc, 1), 2); - assert_eq!(count_leading_whitespaces(&doc, 2), 4); - assert_eq!(count_leading_whitespaces(&doc, 3), 1); // Tab counts as 1 char + assert_eq!(count_leading_whitespaces(&db, &file, 0), 0); + assert_eq!(count_leading_whitespaces(&db, &file, 1), 2); + assert_eq!(count_leading_whitespaces(&db, &file, 2), 4); + assert_eq!(count_leading_whitespaces(&db, &file, 3), 1); // Tab counts as 1 char } #[test] diff --git a/crates/ark/src/lsp/handlers.rs b/crates/ark/src/lsp/handlers.rs index 95abf5e72..470180a00 100644 --- a/crates/ark/src/lsp/handlers.rs +++ b/crates/ark/src/lsp/handlers.rs @@ -185,8 +185,9 @@ pub(crate) fn handle_folding_range( state: &WorldState, ) -> LspResult>> { let uri = ¶ms.text_document.uri; - let document = state.get_document(&FilePath::from_url(uri))?; - match folding_range(document) { + let file = state.ark_file(uri)?; + let db = &state.db; + match folding_range(db, &file) { Ok(foldings) => Ok(Some(foldings)), Err(err) => { lsp::log_error!("{err:?}"); @@ -316,23 +317,25 @@ pub(crate) fn handle_selection_range( params: SelectionRangeParams, state: &WorldState, ) -> LspResult>> { - let document = state.get_document(&FilePath::from_url(¶ms.text_document.uri))?; + let uri = ¶ms.text_document.uri; + let file = state.ark_file(uri)?; + let db = &state.db; // Get tree-sitter points to return selection ranges for let points = params .positions .into_iter() - .map(|position| document.tree_sitter_point_from_lsp_position(position)) + .map(|position| file.tree_sitter_point_from_lsp_position(db, position)) .collect::>>()?; - let Some(selections) = selection_range(&document.ast, points) else { + let Some(selections) = selection_range(file.tree_sitter(db), points) else { return Ok(None); }; // Convert tree-sitter points to LSP positions everywhere let selections = selections .into_iter() - .map(|selection| convert_selection_range_from_tree_sitter_to_lsp(selection, document)) + .map(|selection| convert_selection_range_from_tree_sitter_to_lsp(db, &file, selection)) .collect::>>()?; Ok(Some(selections)) @@ -380,9 +383,11 @@ pub(crate) fn handle_statement_range( params: StatementRangeParams, state: &WorldState, ) -> LspResult> { - let document = state.get_document(&FilePath::from_url(¶ms.text_document.uri))?; - let point = document.tree_sitter_point_from_lsp_position(params.position)?; - statement_range(document, point) + let uri = ¶ms.text_document.uri; + let file = state.ark_file(uri)?; + let db = &state.db; + let point = file.tree_sitter_point_from_lsp_position(db, params.position)?; + statement_range(db, &file, point) } #[tracing::instrument(level = "info", skip_all)] @@ -390,9 +395,11 @@ pub(crate) fn handle_help_topic( params: HelpTopicParams, state: &WorldState, ) -> LspResult> { - let document = state.get_document(&FilePath::from_url(¶ms.text_document.uri))?; - let point = document.tree_sitter_point_from_lsp_position(params.position)?; - help_topic(point, document) + let uri = ¶ms.text_document.uri; + let file = state.ark_file(uri)?; + let db = &state.db; + let point = file.tree_sitter_point_from_lsp_position(db, params.position)?; + help_topic(db, &file, point) } #[tracing::instrument(level = "info", skip_all)] @@ -401,10 +408,11 @@ pub(crate) fn handle_indent( state: &WorldState, ) -> LspResult>> { let ctxt = params.text_document_position; - let doc = state.get_document(&FilePath::from_url(&ctxt.text_document.uri))?; - let point = doc.tree_sitter_point_from_lsp_position(ctxt.position)?; - - indent_edit(doc, point.row) + let uri = &ctxt.text_document.uri; + let file = state.ark_file(uri)?; + let db = &state.db; + let point = file.tree_sitter_point_from_lsp_position(db, ctxt.position)?; + indent_edit(db, &file, point.row) } #[tracing::instrument(level = "info", skip_all)] @@ -414,10 +422,11 @@ pub(crate) fn handle_code_action( state: &WorldState, ) -> LspResult> { let uri = params.text_document.uri; - let doc = state.get_document(&FilePath::from_url(&uri))?; - let range = doc.tree_sitter_range_from_lsp_range(params.range)?; + let file = state.ark_file(&uri)?; + let db = &state.db; + let range = file.tree_sitter_range_from_lsp_range(db, params.range)?; - let code_actions = code_actions(&uri, doc, range, &lsp_state.capabilities); + let code_actions = code_actions(db, &file, range, &lsp_state.capabilities); if code_actions.is_empty() { Ok(None) diff --git a/crates/ark/src/lsp/help_topic.rs b/crates/ark/src/lsp/help_topic.rs index de88b8358..7982877d9 100644 --- a/crates/ark/src/lsp/help_topic.rs +++ b/crates/ark/src/lsp/help_topic.rs @@ -14,8 +14,9 @@ use tree_sitter::Point; use tree_sitter::Tree; use crate::lsp; +use crate::lsp::ark_file::ArkFile; use crate::lsp::backend::LspResult; -use crate::lsp::document::Document; +use crate::lsp::db::ArkDb; use crate::lsp::traits::node::NodeExt; use crate::treesitter::NodeType; use crate::treesitter::NodeTypeExt; @@ -39,17 +40,18 @@ pub struct HelpTopicResponse { } pub(crate) fn help_topic( + db: &dyn ArkDb, + file: &ArkFile, point: Point, - document: &Document, ) -> LspResult> { - let tree = &document.ast; + let tree = file.tree_sitter(db); let Some(node) = locate_help_node(tree, point) else { lsp::log_warn!("help_topic(): No help node at position {point}"); return Ok(None); }; - let text = node.node_to_string(&document.contents)?; + let text = node.node_to_string(file.contents(db))?; let response = HelpTopicResponse { topic: text }; lsp::log_info!( diff --git a/crates/ark/src/lsp/indent.rs b/crates/ark/src/lsp/indent.rs index 7cc313532..05fca8461 100644 --- a/crates/ark/src/lsp/indent.rs +++ b/crates/ark/src/lsp/indent.rs @@ -1,11 +1,12 @@ use anyhow::anyhow; use tower_lsp::lsp_types::TextEdit; +use crate::lsp::ark_file::ArkFile; use crate::lsp::backend::LspError; use crate::lsp::backend::LspResult; use crate::lsp::config::IndentStyle; use crate::lsp::config::IndentationConfig; -use crate::lsp::document::Document; +use crate::lsp::db::ArkDb; use crate::lsp::traits::node::NodeExt; use crate::treesitter::NodeType; use crate::treesitter::NodeTypeExt; @@ -21,10 +22,14 @@ use crate::treesitter::NodeTypeExt; /// /// Once we implement a full formatter, indentation will be provided for any /// constructs based on the formatter and will be fully consistent with it. -pub(crate) fn indent_edit(doc: &Document, line: usize) -> LspResult>> { - let text = &doc.contents; - let ast = &doc.ast; - let config = &doc.config.indent; +pub(crate) fn indent_edit( + db: &dyn ArkDb, + file: &ArkFile, + line: usize, +) -> LspResult>> { + let text = file.contents(db); + let ast = file.tree_sitter(db); + let config = &file.config.indent; let line_count = if text.is_empty() { 1 @@ -176,7 +181,7 @@ pub(crate) fn indent_edit(doc: &Document, line: usize) -> LspResult LspResult line { - if let Some(ref mut close_edits) = indent_edit(doc, close_line)? { + if let Some(ref mut close_edits) = indent_edit(db, file, close_line)? { edits.append(close_edits); } } @@ -265,25 +270,19 @@ pub fn find_enclosing_brace(node: tree_sitter::Node) -> Option, doc: &mut Document) { - from_proto::apply_text_edits( - &mut doc.contents, - edits, - &mut doc.line_index, - doc.position_encoding, - ); - *doc = test_doc(&doc.contents); - } - // NOTE: If we keep adding tests we might want to switch to snapshot tests const SPACE_CFG: IndentationConfig = IndentationConfig { @@ -292,19 +291,27 @@ mod tests { tab_width: 2, }; - fn test_doc(text: &str) -> Document { - let mut doc = Document::new(text, None); - doc.config.indent = SPACE_CFG; - doc + const ENCODING: PositionEncoding = PositionEncoding::Wide(WideEncoding::Utf16); + + fn apply_text_edits( + edits: Vec, + db: &mut OakDatabase, + file: &ArkFile, + encoding: PositionEncoding, + ) { + let mut contents = file.contents(&*db).to_string(); + let mut line_index = file.line_index(&*db).clone(); + from_proto::apply_text_edits(&mut contents, edits, &mut line_index, encoding); + file.file.set_contents(db).to(contents); } #[test] fn test_line_indent_oob() { - let doc = test_doc(""); - assert_match!(indent_edit(&doc, 1), Err(_)); + let (db, ark_file) = crate::lsp::ark_file::test_ark_file(""); + assert_match!(indent_edit(&db, &ark_file, 1), Err(_)); - let doc = test_doc("\n"); - assert_match!(indent_edit(&doc, 2), Err(_)); + let (db, ark_file) = crate::lsp::ark_file::test_ark_file("\n"); + assert_match!(indent_edit(&db, &ark_file, 2), Err(_)); } #[test] @@ -313,173 +320,182 @@ mod tests { // there is before the first newline // https://github.com/posit-dev/positron/issues/5258 let text = String::from(" \nx"); - let doc = test_doc(&text); - let edit = indent_edit(&doc, 1).unwrap(); + let (db, file) = crate::lsp::ark_file::test_ark_file(&text); + let edit = indent_edit(&db, &file, 1).unwrap(); assert!(edit.is_none()); let text = String::from("\r\nx"); - let doc = test_doc(&text); - let edit = indent_edit(&doc, 1).unwrap(); + let (db, file) = crate::lsp::ark_file::test_ark_file(&text); + let edit = indent_edit(&db, &file, 1).unwrap(); assert!(edit.is_none()); } #[test] fn test_line_indent_chains() { - let mut doc = test_doc("foo +\n bar +\n baz + qux |>\nfoofy()"); + let (mut db, file) = + crate::lsp::ark_file::test_ark_file("foo +\n bar +\n baz + qux |>\nfoofy()"); // Indenting the first two lines doesn't change the text - assert_match!(indent_edit(&doc, 0), Ok(None)); - assert_match!(indent_edit(&doc, 1), Ok(None)); + assert_match!(indent_edit(&db, &file, 0), Ok(None)); + assert_match!(indent_edit(&db, &file, 1), Ok(None)); - let edit = indent_edit(&doc, 2).unwrap().unwrap(); - apply_text_edits(edit, &mut doc); - assert_eq!(doc.contents, "foo +\n bar +\n baz + qux |>\nfoofy()"); + let edit = indent_edit(&db, &file, 2).unwrap().unwrap(); + apply_text_edits(edit, &mut db, &file, ENCODING); + assert_eq!( + file.contents(&db), + "foo +\n bar +\n baz + qux |>\nfoofy()" + ); - let edit = indent_edit(&doc, 3).unwrap().unwrap(); - apply_text_edits(edit, &mut doc); - assert_eq!(doc.contents, "foo +\n bar +\n baz + qux |>\n foofy()"); + let edit = indent_edit(&db, &file, 3).unwrap().unwrap(); + apply_text_edits(edit, &mut db, &file, ENCODING); + assert_eq!( + file.contents(&db), + "foo +\n bar +\n baz + qux |>\n foofy()" + ); } #[test] fn test_line_indent_chains_trailing_space() { - let mut doc = test_doc("foo +\n bar(\n x\n ) +\n baz\n "); + let (mut db, file) = + crate::lsp::ark_file::test_ark_file("foo +\n bar(\n x\n ) +\n baz\n "); - let edit = indent_edit(&doc, 4).unwrap().unwrap(); - apply_text_edits(edit, &mut doc); - assert_eq!(doc.contents, "foo +\n bar(\n x\n ) +\n baz\n "); + let edit = indent_edit(&db, &file, 4).unwrap().unwrap(); + apply_text_edits(edit, &mut db, &file, ENCODING); + assert_eq!(file.contents(&db), "foo +\n bar(\n x\n ) +\n baz\n "); } #[test] fn test_line_indent_chains_outdent() { let text = String::from("1 +\n 2\n"); - let doc = test_doc(&text); + let (db, file) = crate::lsp::ark_file::test_ark_file(&text); - assert_match!(indent_edit(&doc, 2), Ok(None)); + assert_match!(indent_edit(&db, &file, 2), Ok(None)); } #[test] fn test_line_indent_chains_deep() { - let mut doc = test_doc("deep()()[] +\n deep()()[]"); + let (mut db, file) = crate::lsp::ark_file::test_ark_file("deep()()[] +\n deep()()[]"); - let edit = indent_edit(&doc, 0).unwrap(); + let edit = indent_edit(&db, &file, 0).unwrap(); assert!(edit.is_none()); - let edit = indent_edit(&doc, 1).unwrap().unwrap(); - apply_text_edits(edit, &mut doc); - assert_eq!(doc.contents, "deep()()[] +\n deep()()[]"); + let edit = indent_edit(&db, &file, 1).unwrap().unwrap(); + apply_text_edits(edit, &mut db, &file, ENCODING); + assert_eq!(file.contents(&db), "deep()()[] +\n deep()()[]"); } #[test] fn test_line_indent_chains_deep_newlines() { // With newlines in the way - let mut doc = test_doc("deep(\n)()[] +\ndeep(\n)()[]"); + let (mut db, file) = crate::lsp::ark_file::test_ark_file("deep(\n)()[] +\ndeep(\n)()[]"); - let edit = indent_edit(&doc, 0).unwrap(); + let edit = indent_edit(&db, &file, 0).unwrap(); assert!(edit.is_none()); - let edit = indent_edit(&doc, 2).unwrap().unwrap(); - apply_text_edits(edit, &mut doc); - assert_eq!(doc.contents, "deep(\n)()[] +\n deep(\n)()[]"); + let edit = indent_edit(&db, &file, 2).unwrap().unwrap(); + apply_text_edits(edit, &mut db, &file, ENCODING); + assert_eq!(file.contents(&db), "deep(\n)()[] +\n deep(\n)()[]"); } #[test] fn test_line_indent_chains_calls() { - let mut doc = test_doc("foo() +\n bar() +\nbaz()"); + let (mut db, file) = crate::lsp::ark_file::test_ark_file("foo() +\n bar() +\nbaz()"); - let edit = indent_edit(&doc, 2).unwrap().unwrap(); - apply_text_edits(edit, &mut doc); - assert_eq!(doc.contents, "foo() +\n bar() +\n baz()"); + let edit = indent_edit(&db, &file, 2).unwrap().unwrap(); + apply_text_edits(edit, &mut db, &file, ENCODING); + assert_eq!(file.contents(&db), "foo() +\n bar() +\n baz()"); // Indenting the first two lines doesn't change the text - let edit = indent_edit(&doc, 0).unwrap(); + let edit = indent_edit(&db, &file, 0).unwrap(); assert!(edit.is_none()); - let edit = indent_edit(&doc, 1).unwrap(); + let edit = indent_edit(&db, &file, 1).unwrap(); assert!(edit.is_none()); - let doc = test_doc("foo(\n) +\n bar"); - let edit = indent_edit(&doc, 0).unwrap(); + let (db, file) = crate::lsp::ark_file::test_ark_file("foo(\n) +\n bar"); + let edit = indent_edit(&db, &file, 0).unwrap(); assert!(edit.is_none()); } #[test] fn test_line_indent_braced_expression() { - let mut doc = test_doc("{\nbar\n}"); + let (mut db, file) = crate::lsp::ark_file::test_ark_file("{\nbar\n}"); - let edit = indent_edit(&doc, 1).unwrap().unwrap(); - apply_text_edits(edit, &mut doc); - assert_eq!(doc.contents, "{\n bar\n}"); + let edit = indent_edit(&db, &file, 1).unwrap().unwrap(); + apply_text_edits(edit, &mut db, &file, ENCODING); + assert_eq!(file.contents(&db), "{\n bar\n}"); - let mut doc = test_doc("function() {\nbar\n}"); + let (mut db, ark_file) = crate::lsp::ark_file::test_ark_file("function() {\nbar\n}"); - let edit = indent_edit(&doc, 1).unwrap().unwrap(); - apply_text_edits(edit, &mut doc); - assert_eq!(doc.contents, "function() {\n bar\n}"); + let edit = indent_edit(&db, &ark_file, 1).unwrap().unwrap(); + apply_text_edits(edit, &mut db, &ark_file, ENCODING); + assert_eq!(ark_file.contents(&db), "function() {\n bar\n}"); } #[test] fn test_line_indent_braced_expression_closing() { - let mut doc = test_doc("{\n }"); + let (mut db, file) = crate::lsp::ark_file::test_ark_file("{\n }"); - let edit = indent_edit(&doc, 1).unwrap().unwrap(); - apply_text_edits(edit, &mut doc); - assert_eq!(doc.contents, "{\n}"); + let edit = indent_edit(&db, &file, 1).unwrap().unwrap(); + apply_text_edits(edit, &mut db, &file, ENCODING); + assert_eq!(file.contents(&db), "{\n}"); } #[test] fn test_line_indent_braced_expression_closing_multiline() { // https://github.com/posit-dev/positron/issues/3484 - let mut doc = test_doc("{\n\n }"); + let (mut db, file) = crate::lsp::ark_file::test_ark_file("{\n\n }"); - let edit = indent_edit(&doc, 1).unwrap().unwrap(); - apply_text_edits(edit, &mut doc); - assert_eq!(doc.contents, "{\n \n}"); + let edit = indent_edit(&db, &file, 1).unwrap().unwrap(); + apply_text_edits(edit, &mut db, &file, ENCODING); + assert_eq!(file.contents(&db), "{\n \n}"); } #[test] fn test_line_indent_braced_expression_multiline() { - let mut doc = test_doc("function(\n ) {\nfoo\n}"); + let (mut db, file) = crate::lsp::ark_file::test_ark_file("function(\n ) {\nfoo\n}"); - let edit = indent_edit(&doc, 2).unwrap().unwrap(); - apply_text_edits(edit, &mut doc); - assert_eq!(doc.contents, "function(\n ) {\n foo\n}"); + let edit = indent_edit(&db, &file, 2).unwrap().unwrap(); + apply_text_edits(edit, &mut db, &file, ENCODING); + assert_eq!(file.contents(&db), "function(\n ) {\n foo\n}"); } #[test] fn test_line_indent_braced_expression_multiline_empty() { - let mut doc = test_doc("function(\n ) {\n\n}"); + let (mut db, file) = crate::lsp::ark_file::test_ark_file("function(\n ) {\n\n}"); - let edit = indent_edit(&doc, 2).unwrap().unwrap(); - apply_text_edits(edit, &mut doc); - assert_eq!(doc.contents, "function(\n ) {\n \n}"); + let edit = indent_edit(&db, &file, 2).unwrap().unwrap(); + apply_text_edits(edit, &mut db, &file, ENCODING); + assert_eq!(file.contents(&db), "function(\n ) {\n \n}"); } #[test] fn test_line_indent_minimum() { // https://github.com/posit-dev/positron/issues/1683 - let mut doc = test_doc("function() {\n ({\n }\n)\n}"); + let (mut db, file) = crate::lsp::ark_file::test_ark_file("function() {\n ({\n }\n)\n}"); - let edit = indent_edit(&doc, 3).unwrap().unwrap(); - apply_text_edits(edit, &mut doc); - assert_eq!(doc.contents, "function() {\n ({\n }\n )\n}"); + let edit = indent_edit(&db, &file, 3).unwrap().unwrap(); + apply_text_edits(edit, &mut db, &file, ENCODING); + assert_eq!(file.contents(&db), "function() {\n ({\n }\n )\n}"); } #[test] fn test_line_indent_minimum_nested() { // Nested R function test with multiple levels of nesting - let mut doc = test_doc("{\n {\n ({\n }\n )\n }\n}"); + let (mut db, file) = + crate::lsp::ark_file::test_ark_file("{\n {\n ({\n }\n )\n }\n}"); - let edit = indent_edit(&doc, 4).unwrap().unwrap(); - apply_text_edits(edit, &mut doc); - assert_eq!(doc.contents, "{\n {\n ({\n }\n )\n }\n}"); + let edit = indent_edit(&db, &file, 4).unwrap().unwrap(); + apply_text_edits(edit, &mut db, &file, ENCODING); + assert_eq!(file.contents(&db), "{\n {\n ({\n }\n )\n }\n}"); } #[test] fn test_line_indent_function_opening_brace_own_line() { let text = String::from("object <- function()\n{\n body\n}"); - let doc = test_doc(&text); + let (db, file) = crate::lsp::ark_file::test_ark_file(&text); - assert_match!(indent_edit(&doc, 1).unwrap(), None); + assert_match!(indent_edit(&db, &file, 1).unwrap(), None); } #[test] @@ -529,20 +545,16 @@ mod tests { #[test] fn test_indent_snapshot() { let orig = read_text_asset("lsp/snapshots/indent.R"); - - let mut doc = test_doc(&orig); - - let n_lines = doc.contents.matches('\n').count(); - + let (mut db, file) = crate::lsp::ark_file::test_ark_file(&orig); + let n_lines = file.contents(&db).matches('\n').count(); for i in 0..n_lines { - if let Some(edit) = indent_edit(&doc, i).unwrap() { - apply_text_edits(edit, &mut doc); + if let Some(edit) = indent_edit(&db, &file, i).unwrap() { + apply_text_edits(edit, &mut db, &file, ENCODING); } } - - write_asset("lsp/snapshots/indent.R", &doc.contents); - - if orig != doc.contents { + let result = file.contents(&db).to_string(); + write_asset("lsp/snapshots/indent.R", &result); + if orig != result { panic!("Indentation snapshots have changed.\nPlease see git diff."); } } diff --git a/crates/ark/src/lsp/main_loop.rs b/crates/ark/src/lsp/main_loop.rs index d8e56b78a..7ec831cb1 100644 --- a/crates/ark/src/lsp/main_loop.rs +++ b/crates/ark/src/lsp/main_loop.rs @@ -18,7 +18,6 @@ use std::sync::RwLock; use aether_path::FilePath; use anyhow::anyhow; -use futures::stream::FuturesUnordered; use futures::StreamExt; use oak_db::OakDatabase; use oak_scan::DbScan; @@ -29,7 +28,6 @@ use oak_semantic::library::Library; use stdext::result::ResultExt; use tokio::sync::mpsc; use tokio::sync::mpsc::unbounded_channel as tokio_unbounded_channel; -use tokio::task; use tokio::task::JoinHandle; use tower_lsp::lsp_types; use tower_lsp::lsp_types::Diagnostic; @@ -40,6 +38,7 @@ use url::Url; use super::backend::RequestResponse; use crate::console::ConsoleNotification; use crate::lsp; +use crate::lsp::ark_file::ArkFile; use crate::lsp::backend::LspMessage; use crate::lsp::backend::LspNotification; use crate::lsp::backend::LspRequest; @@ -451,6 +450,22 @@ impl GlobalState { }, Event::OakScanCompleted(scan) => { + // This scan ran on a background task, but it sends its result + // back here so the write happens on the main loop. Keep it that + // way: Only the main loop should write to the oak DB (not + // enforced by types unfortunately). Consequences of infringement: + // + // - It would deadlock on writes. Salsa makes a writer block + // until every other snapshot handle has dropped (`clones == + // 1`), and the main loop holds a handle that never drops. So a + // background writer would wait forever. + // + // - It would panic on reads. A salsa write cancels every + // in-flight read. The main loop is the sole writer and runs + // serially, so it never wraps its own reads in + // `catch_cancellation()`. A background write would cancel those + // reads out from under it, causing a cancellation panic. + // Recompute editor-owned files at apply time, not at spawn // time: a buffer may have opened or closed since the scan // kicked off. The buffer-drain inside `apply_scan_completed` uses @@ -461,7 +476,16 @@ impl GlobalState { scan, &editor_owned, ); - dispatch_scan_requests(&self.events_tx, followups); + if followups.is_empty() { + // The scan settled (no more follow-ups). `apply_scan_completed()` + // was an oak write: it cancelled in-flight diagnostics and changed + // the workspace symbols diagnostics resolve against. Recompute + // diagnostics for the open files so they reflect the indexed + // workspace and any cancelled pass is refreshed. + diagnostics_refresh_all(&self.world); + } else { + dispatch_scan_requests(&self.events_tx, followups); + } }, } @@ -779,12 +803,19 @@ pub(crate) fn log(level: lsp_types::MessageType, message: String) { /// /// Can optionally return an event for the auxiliary loop (i.e. a log message or /// diagnostics publication). +/// +/// Salsa cancellation is handled here so callers don't have to. A `set_*` on +/// the main loop cancels concurrent oak queries by unwinding with `Cancelled`. +/// We swallow that into `Ok(None)`, so a cancelled task is a quiet no-op +/// instead of a logged "task panicked". The write that cancelled it enqueues +/// its own follow-up. Any other panic still surfaces on join. pub(crate) fn spawn_blocking(handler: Handler) where Handler: FnOnce() -> anyhow::Result>, Handler: Send + 'static, { - let handle = tokio::task::spawn_blocking(handler); + let handle = + tokio::task::spawn_blocking(move || catch_cancellation(handler).unwrap_or(Ok(None))); // Send the join handle to the auxiliary loop so it can log any errors // or panics @@ -839,8 +870,11 @@ pub enum IndexerTask { #[derive(Debug)] pub(crate) struct RefreshDiagnosticsTask { - uri: Url, + /// Snapshot carrying the live oak plus the session context the diagnostics + /// walk reads. See [`WorldState::diagnostics_snapshot`]. state: WorldState, + /// The file to diagnose, built against the live oak at enqueue time. + file: ArkFile, } #[derive(Debug)] @@ -933,7 +967,7 @@ async fn process_indexer_queue(mut rx: mpsc::UnboundedReceiver process_indexer_batch(std::mem::take(&mut indexer_batch)).await; } - process_diagnostics_batch(std::mem::take(&mut diagnostics_batch)).await; + process_diagnostics_batch(std::mem::take(&mut diagnostics_batch)); } } @@ -978,47 +1012,63 @@ async fn process_indexer_batch(batch: Vec) { } } -async fn process_diagnostics_batch(batch: Vec) { +fn process_diagnostics_batch(batch: Vec) { tracing::trace!("Processing {n} diagnostic tasks", n = batch.len()); // Deduplicate tasks by keeping only the last one for each URI. We use a // `HashMap` so only the last insertion is retained. This is effectively a // way of cancelling diagnostics tasks for outdated documents. - let batch: std::collections::HashMap<_, _> = batch + let batch: HashMap<_, _> = batch .into_iter() - .map(|task| (task.uri, task.state)) + .map(|task| (task.file.url.clone(), task)) .collect(); - let mut futures = FuturesUnordered::new(); - - for (uri, state) in batch { - futures.push(task::spawn_blocking(move || { - let _span = tracing::info_span!("diagnostics_refresh", uri = %uri).entered(); - - if let Some(document) = state.documents.get(&FilePath::from_url(&uri)) { - // Special case testthat-specific behaviour. This is a simple - // stopgap approach that has some false positives (e.g. when we - // work on testthat itself the flag will always be true), but - // that shouldn't have much practical impact. - let testthat = Path::new(uri.path()) - .components() - .any(|c| c.as_os_str() == "testthat"); - - let diagnostics = generate_diagnostics(document.clone(), state.clone(), testthat); - Some(RefreshDiagnosticsResult { - uri, - diagnostics, - version: document.version, - }) - } else { - None - } - })); + // Each file is its own blocking task. `spawn_blocking()` catches salsa + // cancellation, so a pass cancelled by a concurrent edit just produces no + // event. The publish happens via the returned [`AuxiliaryEvent`]. + for (_uri, task) in batch { + lsp::spawn_blocking(move || { + let result = refresh_diagnostics(task); + Ok(Some(AuxiliaryEvent::PublishDiagnostics( + result.uri, + result.diagnostics, + result.version, + ))) + }); + } +} + +fn refresh_diagnostics(task: RefreshDiagnosticsTask) -> RefreshDiagnosticsResult { + let RefreshDiagnosticsTask { file, state } = task; + let uri = file.url.clone(); + let version = file.version; + let _span = tracing::info_span!("diagnostics_refresh", uri = %uri).entered(); + + // Special case testthat-specific behaviour. This is a simple stopgap + // approach that has some false positives (e.g. when we work on testthat + // itself the flag will always be true), but that shouldn't have much + // practical impact. + let testthat = Path::new(uri.path()) + .components() + .any(|c| c.as_os_str() == "testthat"); + + let diagnostics = generate_diagnostics(file, state, testthat); + + RefreshDiagnosticsResult { + uri, + diagnostics, + version, } +} - // Publish results as they complete - while let Some(Ok(Some(result))) = futures.next().await { - publish_diagnostics(result.uri, result.diagnostics, result.version); +/// Run `f`, swallowing a salsa cancellation as `None`. Any other panic propagates. +fn catch_cancellation(f: impl FnOnce() -> T) -> Option { + match salsa::Cancelled::catch(std::panic::AssertUnwindSafe(f)) { + Ok(value) => Some(value), + Err(cancelled) => { + log::trace!("Salsa query {cancelled}"); + None + }, } } @@ -1068,7 +1118,7 @@ pub(crate) fn index_create(uris: Vec, state: WorldState) { .unwrap_or_else(|err| crate::lsp::log_error!("Failed to queue index create: {err}")); } - diagnostics_refresh_all(state); + diagnostics_refresh_all(&state); } pub(crate) fn index_update(uris: Vec, state: WorldState) { @@ -1095,7 +1145,7 @@ pub(crate) fn index_update(uris: Vec, state: WorldState) { // Refresh all diagnostics since the indexer results for one file may affect // other files - diagnostics_refresh_all(state); + diagnostics_refresh_all(&state); } pub(crate) fn index_delete(uris: Vec, state: WorldState) { @@ -1107,7 +1157,7 @@ pub(crate) fn index_delete(uris: Vec, state: WorldState) { // Refresh all diagnostics since the indexer results for one file may affect // other files - diagnostics_refresh_all(state); + diagnostics_refresh_all(&state); } pub(crate) fn index_rename(uris: Vec<(Url, Url)>, state: WorldState) { @@ -1122,10 +1172,10 @@ pub(crate) fn index_rename(uris: Vec<(Url, Url)>, state: WorldState) { // Refresh all diagnostics since the indexer results for one file may affect // other files - diagnostics_refresh_all(state); + diagnostics_refresh_all(&state); } -pub(crate) fn diagnostics_refresh_all(state: WorldState) { +pub(crate) fn diagnostics_refresh_all(state: &WorldState) { tracing::trace!( "Refreshing diagnostics for {n} documents", n = state.documents.len() @@ -1136,15 +1186,62 @@ pub(crate) fn diagnostics_refresh_all(state: WorldState) { continue; } - // The task sits in the indexer queue off the main loop. - // `legacy_snapshot()` hands it a detached oak db so it can't pin the - // live one against the main loop's next `set_*` (diagnostics read only - // non-oak state). + let file = match state.ark_file(&document.url) { + Ok(file) => file, + Err(err) => { + tracing::warn!("Can't build ArkFile for '{}': {err:?}", document.url); + continue; + }, + }; + INDEXER_QUEUE .send(IndexerQueueTask::Diagnostics(RefreshDiagnosticsTask { - uri: document.url.clone(), - state: state.legacy_snapshot(), + file, + state: state.diagnostics_snapshot(), })) .unwrap_or_else(|err| lsp::log_error!("Failed to queue diagnostics refresh: {err}")); } } + +#[cfg(test)] +mod tests { + use aether_path::FilePath; + use oak_scan::DbScan; + use salsa::Database; + use url::Url; + + use super::catch_cancellation; + use super::refresh_diagnostics; + use super::RefreshDiagnosticsTask; + use crate::lsp::document::Document; + use crate::lsp::state::WorldState; + + /// A salsa cancellation during the pass is swallowed into `None` by + /// `catch_cancellation`, the wrapper `spawn_blocking` applies to every task, + /// rather than unwinding and killing the task. + /// + /// `cancellation_token().cancel()` arms local cancellation on the snapshot's + /// oak, so the first salsa query in `generate_diagnostics` (the `tree_sitter` + /// fetch) unwinds with `salsa::Cancelled`, the same payload a concurrent + /// `set_*` produces. The unwind fires before any R, so no `r_task` here. + #[test] + fn test_cancelled_diagnostics_pass_is_caught() { + let mut state = WorldState::default(); + let uri = Url::parse("file:///test.R").unwrap(); + let code = "foo"; + state.insert_document(uri.clone(), Document::new(code, None)); + state + .db + .upsert_editor(FilePath::from_url(&uri), code.to_string()); + + let file = state.ark_file(&uri).unwrap(); + let snapshot = state.diagnostics_snapshot(); + snapshot.db.cancellation_token().cancel(); + + let task = RefreshDiagnosticsTask { + file, + state: snapshot, + }; + assert!(catch_cancellation(|| refresh_diagnostics(task)).is_none()); + } +} diff --git a/crates/ark/src/lsp/selection_range.rs b/crates/ark/src/lsp/selection_range.rs index fbc53d7d2..ec0d35da9 100644 --- a/crates/ark/src/lsp/selection_range.rs +++ b/crates/ark/src/lsp/selection_range.rs @@ -103,16 +103,17 @@ fn range_default(node: Node) -> Range { node.range() } -pub fn convert_selection_range_from_tree_sitter_to_lsp( +pub(crate) fn convert_selection_range_from_tree_sitter_to_lsp( + db: &dyn crate::lsp::db::ArkDb, + file: &crate::lsp::ark_file::ArkFile, selection: SelectionRange, - document: &crate::lsp::document::Document, ) -> anyhow::Result { - let range = document.lsp_range_from_tree_sitter_range(selection.range)?; + let range = file.lsp_range_from_tree_sitter_range(db, selection.range)?; // If there is a parent, convert it and box it let parent = match selection.parent { Some(selection) => { - let selection = convert_selection_range_from_tree_sitter_to_lsp(*selection, document)?; + let selection = convert_selection_range_from_tree_sitter_to_lsp(db, file, *selection)?; Some(Box::new(selection)) }, None => None, diff --git a/crates/ark/src/lsp/state.rs b/crates/ark/src/lsp/state.rs index 912e2c16a..82cbbf107 100644 --- a/crates/ark/src/lsp/state.rs +++ b/crates/ark/src/lsp/state.rs @@ -2,10 +2,12 @@ use std::collections::HashMap; use aether_path::FilePath; use anyhow::anyhow; +use oak_db::Db; use oak_db::OakDatabase; use oak_semantic::library::Library; use url::Url; +use crate::lsp::ark_file::ArkFile; use crate::lsp::config::LspConfig; use crate::lsp::document::Document; use crate::lsp::inputs::source_root::SourceRoot; @@ -96,28 +98,46 @@ impl WorldState { } } - /// Copy the world state for a background handler that does not query oak. - /// - /// The copy gets a fresh, empty `OakDatabase` instead of a handle to the - /// live one. A salsa db handle held off the main loop blocks the next - /// `set_*` on the owner: the setter waits for `clones == 1`, and an idle - /// handle (parked in the indexer queue, or held by a handler blocked in - /// `r_task`) never drops on its own. - /// - /// This is the snapshot for the non-salsa handlers (diagnostics, - /// indexing) that read only the plain `WorldState` fields. A salsa-based - /// handler that queries oak off the main loop needs a different snapshot, - /// one that keeps the live db handle and runs its queries under - /// cancellation (catch `Cancelled`, don't span `r_task`), so it sees real - /// oak data. That's what the `legacy_` prefix warns: don't reach for this - /// from oak-querying code. - pub(crate) fn legacy_snapshot(&self) -> WorldState { + /// Snapshot for the diagnostics worker, which runs off the main loop and + /// queries oak. + pub(crate) fn diagnostics_snapshot(&self) -> WorldState { WorldState { - db: OakDatabase::new(), - ..self.clone() + db: self.db.clone(), + console_scopes: self.console_scopes.clone(), + installed_packages: self.installed_packages.clone(), + root: self.root.clone(), + library: self.library.clone(), + config: self.config.clone(), + documents: HashMap::new(), + virtual_documents: HashMap::new(), + workspace: Workspace::default(), } } + /// Build an [`ArkFile`] for a request. + /// + /// Most fields come from the legacy `Document` struct, namely `version`, + /// `config`, and `url`. The `encoding` comes from the world config instead. + /// The analysis handle comes from the matching `oak_db::File`. + /// + /// The `Document` and the `File` are kept in sync by the editor bridge, + /// which calls `upsert_editor()` on every `did_open` and `did_change`. So a + /// `File` exists whenever a `Document` does. + pub(crate) fn ark_file(&self, uri: &Url) -> anyhow::Result { + let key = FilePath::from_url(uri); + let document = self.get_document(&key)?; + let Some(file) = self.db.file_by_path(&key) else { + return Err(anyhow!("No `oak_db` file for URI {uri}")); + }; + Ok(ArkFile { + file, + version: document.version, + config: document.config.clone(), + url: document.url.clone(), + encoding: self.config.position_encoding, + }) + } + /// Insert a document, keying on the normalised [`FilePath`] and /// stashing the verbatim editor URL on [`Document::url`] for wire /// output. @@ -135,61 +155,3 @@ pub(crate) fn workspace_uris(state: &WorldState) -> Vec { .map(|doc| doc.url.clone()) .collect() } - -#[cfg(test)] -mod tests { - use std::sync::mpsc; - use std::sync::Arc; - use std::sync::Barrier; - use std::time::Duration; - - use oak_db::OakDatabase; - use oak_scan::DbScan; - use oak_semantic::library::Library; - - use super::WorldState; - - /// A legacy background snapshot must not pin the oak db against a - /// main-loop mutation. - /// - /// salsa reclaims `&mut` access for a setter by raising the cancellation - /// flag and then blocking on `clones == 1`. That flag only frees a clone - /// whose thread is inside a running query and notices it. A snapshot that - /// sits idle (parked in the indexer queue, or held by a `spawn_blocking` - /// handler blocked in `r_task`) never notices, so the next setter on the - /// owner blocks until the snapshot drops. This test parks a snapshot with - /// no query running and asserts a setter on the owner still completes. - #[test] - fn legacy_snapshot_does_not_pin_oak_against_mutation() { - let mut state = WorldState::new(OakDatabase::new(), Library::new(vec![])); - - let snapshot = state.legacy_snapshot(); - - // Park the snapshot with no salsa query running, then hold it until - // the main thread has finished timing the mutation. - let release = Arc::new(Barrier::new(2)); - let held = { - let release = Arc::clone(&release); - std::thread::spawn(move || { - let _snapshot = snapshot; - release.wait(); - }) - }; - - let (tx, rx) = mpsc::channel(); - let mutator = std::thread::spawn(move || { - state.db.set_library_paths(&[]); - let _ = tx.send(()); - }); - - let completed = rx.recv_timeout(Duration::from_secs(2)).is_ok(); - - // Release the parked snapshot so a blocked mutator can finish and both - // threads join, regardless of the outcome. - release.wait(); - held.join().unwrap(); - mutator.join().unwrap(); - - assert!(completed); - } -} diff --git a/crates/ark/src/lsp/state_handlers.rs b/crates/ark/src/lsp/state_handlers.rs index 634211c9a..30dadf514 100644 --- a/crates/ark/src/lsp/state_handlers.rs +++ b/crates/ark/src/lsp/state_handlers.rs @@ -280,7 +280,7 @@ pub(crate) fn did_open( // NOTE: Do we need to call `update_config()` here? // update_config(vec![uri]).await; - lsp::main_loop::diagnostics_refresh_all(state.clone()); + lsp::main_loop::diagnostics_refresh_all(state); Ok(()) } @@ -342,6 +342,12 @@ pub(crate) fn did_close( let path = FilePath::from_url(&uri); state.db.close_editor(&path); + // `close_editor` is an oak write, so it cancels any diagnostics pass in + // flight for the other open files. Re-enqueue them so a cancelled pass + // isn't silently dropped. The closed file was removed above, so it isn't + // refreshed (we already cleared it with the empty publish). + lsp::main_loop::diagnostics_refresh_all(state); + lsp::log_info!("did_close(): closed document with URI: '{uri}'."); Ok(()) @@ -605,7 +611,7 @@ async fn update_config( // Refresh diagnostics if the configuration changed if state.config.diagnostics != diagnostics_config { tracing::info!("Refreshing diagnostics after configuration changed"); - lsp::main_loop::diagnostics_refresh_all(state.clone()); + lsp::main_loop::diagnostics_refresh_all(state); } Ok(()) @@ -623,7 +629,7 @@ pub(crate) fn did_change_console_inputs( // during package development in conjunction with `devtools::load_all()`. // Ideally diagnostics would not rely on these though, and we wouldn't need // to refresh from here. - lsp::diagnostics_refresh_all(state.clone()); + lsp::diagnostics_refresh_all(state); Ok(()) } diff --git a/crates/ark/src/lsp/statement_range.rs b/crates/ark/src/lsp/statement_range.rs index 86ea56406..7e541259e 100644 --- a/crates/ark/src/lsp/statement_range.rs +++ b/crates/ark/src/lsp/statement_range.rs @@ -18,8 +18,10 @@ use tower_lsp::lsp_types::VersionedTextDocumentIdentifier; use tree_sitter::Node; use tree_sitter::Point; +use crate::lsp::ark_file::ArkFile; use crate::lsp::backend::LspResult; -use crate::lsp::document::Document; +use crate::lsp::db::parse_tree_sitter; +use crate::lsp::db::ArkDb; use crate::lsp::traits::cursor::TreeCursorExt; use crate::lsp::traits::node::NodeExt; use crate::treesitter::node_has_error_or_missing; @@ -97,13 +99,17 @@ struct ArkStatementRangeSyntaxRejection { impl ArkStatementRangeResponse { // Sole conversion method between `ArkStatementRangeResponse` and `StatementRangeResponse`, // which handles all Tree-sitter to LSP conversion at the method boundary - fn into_lsp_response(self, document: &Document) -> anyhow::Result { + fn into_lsp_response( + self, + db: &dyn ArkDb, + file: &ArkFile, + ) -> anyhow::Result { match self { ArkStatementRangeResponse::Success(response) => { // Tree-sitter `Point`s to LSP `Position`s let start = - document.lsp_position_from_tree_sitter_point(response.range.start_point)?; - let end = document.lsp_position_from_tree_sitter_point(response.range.end_point)?; + file.lsp_position_from_tree_sitter_point(db, response.range.start_point)?; + let end = file.lsp_position_from_tree_sitter_point(db, response.range.end_point)?; let range = lsp_types::Range { start, end }; Ok(StatementRangeResponse::Success(StatementRangeSuccess { range, @@ -130,22 +136,23 @@ impl ArkStatementRangeResponse { static RE_ROXYGEN2_COMMENT: Lazy = Lazy::new(|| Regex::new(r"^#+'").unwrap()); pub(crate) fn statement_range( - document: &Document, + db: &dyn ArkDb, + file: &ArkFile, point: Point, ) -> LspResult> { - let root = document.ast.root_node(); - let contents = &document.contents; + let root = file.tree_sitter(db).root_node(); + let contents = file.contents(db); // Initial check to see if we are in a roxygen2 comment, in which case we parse a // subdocument containing the `@examples` or `@examplesIf` section and locate a // statement range within that to execute. The returned `code` represents the // statement range's code stripped of `#'` tokens so it is runnable. if let Some(response) = find_roxygen_statement_range(&root, contents, point)? { - return Ok(Some(response.into_lsp_response(document)?)); + return Ok(Some(response.into_lsp_response(db, file)?)); } if let Some(response) = find_statement_range(&root, point.row)? { - return Ok(Some(response.into_lsp_response(document)?)); + return Ok(Some(response.into_lsp_response(db, file)?)); } Ok(None) @@ -341,9 +348,11 @@ fn find_roxygen_examples_range( .collect(); let subcontents = subcontents.join("\n"); - // Parse the subdocument - let subdocument = Document::new(&subcontents, None); - let subdocument_root = subdocument.ast.root_node(); + // Parse the subdocument directly. The `@examples` subdocument is a + // re-indented slice of the buffer, not an editor file with its own + // `oak_db::File`. + let subdocument_tree = parse_tree_sitter(&subcontents); + let subdocument_root = subdocument_tree.root_node(); // Adjust original document row to point to the subdocument row so we know where to // start our search from within the subdocument @@ -360,7 +369,7 @@ fn find_roxygen_examples_range( ArkStatementRangeResponse::Success(subdocument_statement_range) => { adjust_roxygen_examples_success( subdocument_statement_range, - &subdocument, + &subcontents, root, row_adjustment, ) @@ -375,16 +384,14 @@ fn find_roxygen_examples_range( fn adjust_roxygen_examples_success( subdocument_statement_range: ArkStatementRangeSuccess, - subdocument: &Document, + subcontents: &str, root: &Node, row_adjustment: usize, ) -> Option { let subdocument_range = subdocument_statement_range.range; // Slice out code to execute from the subdocument - let slice = subdocument - .contents - .get(subdocument_range.start_byte..subdocument_range.end_byte)?; + let slice = subcontents.get(subdocument_range.start_byte..subdocument_range.end_byte)?; let subdocument_code = slice.to_string(); // Map the `subdocument_range` that covers the executable code back to a `range` @@ -829,9 +836,9 @@ mod tests { use tree_sitter::Parser; use tree_sitter::Point; + use super::parse_tree_sitter; use crate::fixtures::point_and_offset_from_cursor; use crate::fixtures::point_from_cursor; - use crate::lsp::document::Document; use crate::lsp::statement_range::find_roxygen_statement_range; use crate::lsp::statement_range::find_statement_range; use crate::lsp::statement_range::ArkStatementRangeRejection; @@ -2087,9 +2094,9 @@ fn <- function() { #' 1 + 1 "; let (text, point) = statement_range_point_from_cursor(text); - let document = Document::new(&text, None); - let root = document.ast.root_node(); - let contents = &document.contents; + let tree = parse_tree_sitter(&text); + let root = tree.root_node(); + let contents = text.as_str(); let statement_range = find_roxygen_statement_range_success(&root, contents, point); assert_eq!( get_text(statement_range.range, contents), @@ -2107,9 +2114,9 @@ fn <- function() { #' 1 + 1 "; let (text, point) = statement_range_point_from_cursor(text); - let document = Document::new(&text, None); - let root = document.ast.root_node(); - let contents = &document.contents; + let tree = parse_tree_sitter(&text); + let root = tree.root_node(); + let contents = text.as_str(); let statement_range = find_roxygen_statement_range_success(&root, contents, point); assert_eq!( get_text(statement_range.range, contents), @@ -2127,9 +2134,9 @@ fn <- function() { #' 1 + 1 "; let (text, point) = statement_range_point_from_cursor(text); - let document = Document::new(&text, None); - let root = document.ast.root_node(); - let contents = &document.contents; + let tree = parse_tree_sitter(&text); + let root = tree.root_node(); + let contents = text.as_str(); let statement_range = find_roxygen_statement_range_success(&root, contents, point); assert_eq!( get_text(statement_range.range, contents), @@ -2148,9 +2155,9 @@ fn <- function() { #' 2 + 2 "; let (text, point) = statement_range_point_from_cursor(text); - let document = Document::new(&text, None); - let root = document.ast.root_node(); - let contents = &document.contents; + let tree = parse_tree_sitter(&text); + let root = tree.root_node(); + let contents = text.as_str(); let statement_range = find_roxygen_statement_range_success(&root, contents, point); assert_eq!( get_text(statement_range.range, contents), @@ -2168,9 +2175,9 @@ fn <- function() { #'^ "; let (text, point) = statement_range_point_from_cursor(text); - let document = Document::new(&text, None); - let root = document.ast.root_node(); - let contents = &document.contents; + let tree = parse_tree_sitter(&text); + let root = tree.root_node(); + let contents = text.as_str(); let statement_range = find_roxygen_statement_range_success(&root, contents, point); assert_eq!( get_text(statement_range.range, contents), @@ -2188,9 +2195,9 @@ fn <- function() { #' ^@returns "; let (text, point) = statement_range_point_from_cursor(text); - let document = Document::new(&text, None); - let root = document.ast.root_node(); - let contents = &document.contents; + let tree = parse_tree_sitter(&text); + let root = tree.root_node(); + let contents = text.as_str(); let statement_range = find_roxygen_statement_range_success(&root, contents, point); assert_eq!( get_text(statement_range.range, contents), @@ -2214,9 +2221,9 @@ fn <- function() { #' 2 + 2 "; let (text, point) = statement_range_point_from_cursor(text); - let document = Document::new(&text, None); - let root = document.ast.root_node(); - let contents = &document.contents; + let tree = parse_tree_sitter(&text); + let root = tree.root_node(); + let contents = text.as_str(); let statement_range = find_roxygen_statement_range_success(&root, contents, point); assert_eq!( get_text(statement_range.range, contents), @@ -2257,9 +2264,9 @@ fn <- function() { #' 2 + 2 "; let (text, point) = statement_range_point_from_cursor(text); - let document = Document::new(&text, None); - let root = document.ast.root_node(); - let contents = &document.contents; + let tree = parse_tree_sitter(&text); + let root = tree.root_node(); + let contents = text.as_str(); let statement_range = find_roxygen_statement_range_success(&root, contents, point); assert_eq!( get_text(statement_range.range, contents), @@ -2299,9 +2306,9 @@ fn <- function() { NULL "; let (text, point) = statement_range_point_from_cursor(text); - let document = Document::new(&text, None); - let root = document.ast.root_node(); - let contents = &document.contents; + let tree = parse_tree_sitter(&text); + let root = tree.root_node(); + let contents = text.as_str(); let statement_range = find_roxygen_statement_range_success(&root, contents, point); assert_eq!( get_text(statement_range.range, contents), @@ -2340,9 +2347,9 @@ x %>% #' @returns "; let (text, point) = statement_range_point_from_cursor(text); - let document = Document::new(&text, None); - let root = document.ast.root_node(); - let contents = &document.contents; + let tree = parse_tree_sitter(&text); + let root = tree.root_node(); + let contents = text.as_str(); let statement_range = find_roxygen_statement_range_success(&root, contents, point); assert_eq!( get_text(statement_range.range, contents), @@ -2362,9 +2369,9 @@ x %>% #'2 + ^2 "; let (text, point) = statement_range_point_from_cursor(text); - let document = Document::new(&text, None); - let root = document.ast.root_node(); - let contents = &document.contents; + let tree = parse_tree_sitter(&text); + let root = tree.root_node(); + let contents = text.as_str(); let statement_range = find_roxygen_statement_range_success(&root, contents, point); assert_eq!( get_text(statement_range.range, contents), @@ -2384,9 +2391,9 @@ x %>% #' ^@returns "; let (text, point) = statement_range_point_from_cursor(text); - let document = Document::new(&text, None); - let root = document.ast.root_node(); - let contents = &document.contents; + let tree = parse_tree_sitter(&text); + let root = tree.root_node(); + let contents = text.as_str(); let statement_range = find_roxygen_statement_range_success(&root, contents, point); assert_eq!( get_text(statement_range.range, contents), @@ -2406,9 +2413,9 @@ x %>% ###' @returns "; let (text, point) = statement_range_point_from_cursor(text); - let document = Document::new(&text, None); - let root = document.ast.root_node(); - let contents = &document.contents; + let tree = parse_tree_sitter(&text); + let root = tree.root_node(); + let contents = text.as_str(); let statement_range = find_roxygen_statement_range_success(&root, contents, point); assert_eq!( get_text(statement_range.range, contents), @@ -2429,9 +2436,9 @@ x %>% ###' @returns "; let (text, point) = statement_range_point_from_cursor(text); - let document = Document::new(&text, None); - let root = document.ast.root_node(); - let contents = &document.contents; + let tree = parse_tree_sitter(&text); + let root = tree.root_node(); + let contents = text.as_str(); let statement_range = find_roxygen_statement_range_success(&root, contents, point); assert_eq!( get_text(statement_range.range, contents), @@ -2452,9 +2459,9 @@ x %>% ###' @returns "; let (text, point) = statement_range_point_from_cursor(text); - let document = Document::new(&text, None); - let root = document.ast.root_node(); - let contents = &document.contents; + let tree = parse_tree_sitter(&text); + let root = tree.root_node(); + let contents = text.as_str(); let statement_range = find_roxygen_statement_range_success(&root, contents, point); assert_eq!( get_text(statement_range.range, contents), @@ -2474,9 +2481,9 @@ x %>% 2 + 2 "; let (text, point) = statement_range_point_from_cursor(text); - let document = Document::new(&text, None); - let root = document.ast.root_node(); - let contents = &document.contents; + let tree = parse_tree_sitter(&text); + let root = tree.root_node(); + let contents = text.as_str(); let statement_range = find_roxygen_statement_range_success(&root, contents, point); assert_eq!( get_text(statement_range.range, contents), @@ -2501,9 +2508,9 @@ x %>% #' @returns "; let (text, point) = statement_range_point_from_cursor(text); - let document = Document::new(&text, None); - let root = document.ast.root_node(); - let contents = &document.contents; + let tree = parse_tree_sitter(&text); + let root = tree.root_node(); + let contents = text.as_str(); let statement_range = find_roxygen_statement_range_success(&root, contents, point); assert_eq!( get_text(statement_range.range, contents), @@ -2532,9 +2539,9 @@ x %>% #' @returns "; let (text, point) = statement_range_point_from_cursor(text); - let document = Document::new(&text, None); - let root = document.ast.root_node(); - let contents = &document.contents; + let tree = parse_tree_sitter(&text); + let root = tree.root_node(); + let contents = text.as_str(); let rejection = find_roxygen_statement_range_rejection(&root, contents, point); assert_matches::assert_matches!( rejection, @@ -2552,9 +2559,9 @@ x %>% #' @returns "; let (text, point) = statement_range_point_from_cursor(text); - let document = Document::new(&text, None); - let root = document.ast.root_node(); - let contents = &document.contents; + let tree = parse_tree_sitter(&text); + let root = tree.root_node(); + let contents = text.as_str(); let rejection = find_roxygen_statement_range_rejection(&root, contents, point); assert_matches::assert_matches!( rejection, @@ -2577,9 +2584,9 @@ x %>% 2 + 2 "; let (text, point) = statement_range_point_from_cursor(text); - let document = Document::new(&text, None); - let root = document.ast.root_node(); - let contents = &document.contents; + let tree = parse_tree_sitter(&text); + let root = tree.root_node(); + let contents = text.as_str(); assert!(find_roxygen_statement_range(&root, contents, point) .unwrap() .is_none()); @@ -2595,9 +2602,9 @@ x %>% NULL "; let (text, point) = statement_range_point_from_cursor(text); - let document = Document::new(&text, None); - let root = document.ast.root_node(); - let contents = &document.contents; + let tree = parse_tree_sitter(&text); + let root = tree.root_node(); + let contents = text.as_str(); let statement_range = find_roxygen_statement_range_success(&root, contents, point); assert_eq!( get_text(statement_range.range, contents), @@ -2617,9 +2624,9 @@ NULL NULL "; let (text, point) = statement_range_point_from_cursor(text); - let document = Document::new(&text, None); - let root = document.ast.root_node(); - let contents = &document.contents; + let tree = parse_tree_sitter(&text); + let root = tree.root_node(); + let contents = text.as_str(); let statement_range = find_roxygen_statement_range_success(&root, contents, point); assert_eq!( get_text(statement_range.range, contents), diff --git a/crates/ark/src/lsp/symbols.rs b/crates/ark/src/lsp/symbols.rs index f7eb0a793..a539274c2 100644 --- a/crates/ark/src/lsp/symbols.rs +++ b/crates/ark/src/lsp/symbols.rs @@ -9,7 +9,6 @@ use std::result::Result::Ok; -use aether_path::FilePath; use stdext::unwrap::IntoResult; use tower_lsp::lsp_types::DocumentSymbol; use tower_lsp::lsp_types::DocumentSymbolParams; @@ -20,7 +19,8 @@ use tower_lsp::lsp_types::SymbolKind; use tower_lsp::lsp_types::WorkspaceSymbolParams; use tree_sitter::Node; -use crate::lsp::document::Document; +use crate::lsp::ark_file::ArkFile; +use crate::lsp::db::ArkDb; use crate::lsp::indexer; use crate::lsp::indexer::IndexEntryData; use crate::lsp::state::WorldState; @@ -160,11 +160,9 @@ pub(crate) fn document_symbols( params: &DocumentSymbolParams, ) -> anyhow::Result> { let uri = ¶ms.text_document.uri; - let doc = state - .documents - .get(&FilePath::from_url(uri)) - .into_result()?; - let ast = &doc.ast; + let file = state.ark_file(uri)?; + let db = &state.db; + let ast = file.tree_sitter(db); // Start walking from the root node let root_node = ast.root_node(); @@ -174,7 +172,7 @@ pub(crate) fn document_symbols( ctx.include_assignments_in_blocks = state.config.symbols.include_assignments_in_blocks; // Extract and process all symbols from the AST - if let Err(err) = collect_symbols(&mut ctx, &root_node, doc, &mut result) { + if let Err(err) = collect_symbols(&mut ctx, &root_node, &file, db, &mut result) { log::error!("Failed to collect symbols: {err:?}"); return Ok(Vec::new()); } @@ -186,16 +184,17 @@ pub(crate) fn document_symbols( fn collect_symbols( ctx: &mut CollectContext, node: &Node, - doc: &Document, + file: &ArkFile, + db: &dyn ArkDb, symbols: &mut Vec, ) -> anyhow::Result<()> { match node.node_type() { NodeType::Program => { - collect_list_sections(ctx, node, doc, symbols)?; + collect_list_sections(ctx, node, file, db, symbols)?; }, NodeType::BracedExpression => { - collect_list_sections(ctx, node, doc, symbols)?; + collect_list_sections(ctx, node, file, db, symbols)?; }, NodeType::IfStatement => { @@ -205,37 +204,37 @@ fn collect_symbols( // } else { // x <- top_level_assignment // } - collect_if_statement(ctx, node, doc, symbols)?; + collect_if_statement(ctx, node, file, db, symbols)?; }, NodeType::BinaryOperator(BinaryOperatorType::LeftAssignment) | NodeType::BinaryOperator(BinaryOperatorType::EqualsAssignment) => { - collect_assignment(ctx, node, doc, symbols)?; + collect_assignment(ctx, node, file, db, symbols)?; }, NodeType::ForStatement => { - collect_for_statement(ctx, node, doc, symbols)?; + collect_for_statement(ctx, node, file, db, symbols)?; }, NodeType::WhileStatement => { - collect_while_statement(ctx, node, doc, symbols)?; + collect_while_statement(ctx, node, file, db, symbols)?; }, NodeType::RepeatStatement => { - collect_repeat_statement(ctx, node, doc, symbols)?; + collect_repeat_statement(ctx, node, file, db, symbols)?; }, NodeType::Call => { let old = ctx.top_level; ctx.top_level = false; - collect_call(ctx, node, doc, symbols)?; + collect_call(ctx, node, file, db, symbols)?; ctx.top_level = old; }, NodeType::FunctionDefinition => { let old = ctx.top_level; ctx.top_level = false; - collect_function(ctx, node, doc, symbols)?; + collect_function(ctx, node, file, db, symbols)?; ctx.top_level = old; }, // For all other node types, no symbols need to be added @@ -248,17 +247,18 @@ fn collect_symbols( fn collect_if_statement( ctx: &mut CollectContext, node: &Node, - doc: &Document, + file: &ArkFile, + db: &dyn ArkDb, symbols: &mut Vec, ) -> anyhow::Result<()> { if let Some(condition) = node.child_by_field_name("condition") { - collect_symbols(ctx, &condition, doc, symbols)?; + collect_symbols(ctx, &condition, file, db, symbols)?; } if let Some(consequent) = node.child_by_field_name("consequence") { - collect_symbols(ctx, &consequent, doc, symbols)?; + collect_symbols(ctx, &consequent, file, db, symbols)?; } if let Some(alternative) = node.child_by_field_name("alternative") { - collect_symbols(ctx, &alternative, doc, symbols)?; + collect_symbols(ctx, &alternative, file, db, symbols)?; } Ok(()) @@ -267,17 +267,18 @@ fn collect_if_statement( fn collect_for_statement( ctx: &mut CollectContext, node: &Node, - doc: &Document, + file: &ArkFile, + db: &dyn ArkDb, symbols: &mut Vec, ) -> anyhow::Result<()> { if let Some(variable) = node.child_by_field_name("variable") { - collect_symbols(ctx, &variable, doc, symbols)?; + collect_symbols(ctx, &variable, file, db, symbols)?; } if let Some(iterator) = node.child_by_field_name("iterator") { - collect_symbols(ctx, &iterator, doc, symbols)?; + collect_symbols(ctx, &iterator, file, db, symbols)?; } if let Some(body) = node.child_by_field_name("body") { - collect_symbols(ctx, &body, doc, symbols)?; + collect_symbols(ctx, &body, file, db, symbols)?; } Ok(()) @@ -286,14 +287,15 @@ fn collect_for_statement( fn collect_while_statement( ctx: &mut CollectContext, node: &Node, - doc: &Document, + file: &ArkFile, + db: &dyn ArkDb, symbols: &mut Vec, ) -> anyhow::Result<()> { if let Some(condition) = node.child_by_field_name("condition") { - collect_symbols(ctx, &condition, doc, symbols)?; + collect_symbols(ctx, &condition, file, db, symbols)?; } if let Some(body) = node.child_by_field_name("body") { - collect_symbols(ctx, &body, doc, symbols)?; + collect_symbols(ctx, &body, file, db, symbols)?; } Ok(()) @@ -302,11 +304,12 @@ fn collect_while_statement( fn collect_repeat_statement( ctx: &mut CollectContext, node: &Node, - doc: &Document, + file: &ArkFile, + db: &dyn ArkDb, symbols: &mut Vec, ) -> anyhow::Result<()> { if let Some(body) = node.child_by_field_name("body") { - collect_symbols(ctx, &body, doc, symbols)?; + collect_symbols(ctx, &body, file, db, symbols)?; } Ok(()) @@ -315,14 +318,15 @@ fn collect_repeat_statement( fn collect_function( ctx: &mut CollectContext, node: &Node, - doc: &Document, + file: &ArkFile, + db: &dyn ArkDb, symbols: &mut Vec, ) -> anyhow::Result<()> { if let Some(parameters) = node.child_by_field_name("parameters") { - collect_function_parameters(ctx, ¶meters, doc, symbols)?; + collect_function_parameters(ctx, ¶meters, file, db, symbols)?; } if let Some(body) = node.child_by_field_name("body") { - collect_symbols(ctx, &body, doc, symbols)?; + collect_symbols(ctx, &body, file, db, symbols)?; } Ok(()) @@ -376,12 +380,19 @@ fn collect_function( fn collect_sections( ctx: &mut CollectContext, node: &Node, - doc: &Document, + file: &ArkFile, + db: &dyn ArkDb, symbols: &mut Vec, mut handle_child: F, ) -> anyhow::Result<()> where - F: FnMut(&mut CollectContext, &Node, &Document, &mut Vec) -> anyhow::Result<()>, + F: FnMut( + &mut CollectContext, + &Node, + &ArkFile, + &dyn ArkDb, + &mut Vec, + ) -> anyhow::Result<()>, { // In lists of expressions we track and collect section comments, then // collect symbols from children nodes @@ -393,7 +404,7 @@ where for child in node.children(&mut cursor) { if let NodeType::Comment = child.node_type() { - let comment_text = child.node_as_str(&doc.contents)?; + let comment_text = child.node_as_str(file.contents(db))?; // If we have a section comment, add it to our stack and close any sections if needed if let Some((level, title)) = parse_comment_as_section(comment_text) { @@ -402,10 +413,11 @@ where { // Set end position for the section being closed if let Some(section) = active_sections.last_mut() { - let pos = point_end_of_previous_row(child.start_position(), &doc.contents); + let pos = + point_end_of_previous_row(child.start_position(), file.contents(db)); section.end_position = Some(pos); } - finalize_section(&mut active_sections, symbols, doc)?; + finalize_section(&mut active_sections, symbols, file, db)?; } let section = Section { @@ -426,11 +438,11 @@ where if active_sections.is_empty() { // If no active section, extend current vector of symbols - handle_child(ctx, &child, doc, symbols)?; + handle_child(ctx, &child, file, db, symbols)?; } else { // Otherwise create new store of symbols for the current section let mut child_symbols = Vec::new(); - handle_child(ctx, &child, doc, &mut child_symbols)?; + handle_child(ctx, &child, file, db, &mut child_symbols)?; // Nest them inside last section if !child_symbols.is_empty() { @@ -449,11 +461,11 @@ where if let Some(section) = active_sections.last_mut() { let mut pos = node.end_position(); if pos.row > section.start_position.row { - pos = point_end_of_previous_row(pos, &doc.contents); + pos = point_end_of_previous_row(pos, file.contents(db)); } section.end_position = Some(pos); } - finalize_section(&mut active_sections, symbols, doc)?; + finalize_section(&mut active_sections, symbols, file, db)?; } Ok(()) @@ -462,18 +474,18 @@ where fn collect_list_sections( ctx: &mut CollectContext, node: &Node, - doc: &Document, + file: &ArkFile, + db: &dyn ArkDb, symbols: &mut Vec, ) -> anyhow::Result<()> { - collect_sections(ctx, node, doc, symbols, |ctx, child, doc, symbols| { - collect_symbols(ctx, child, doc, symbols) - }) + collect_sections(ctx, node, file, db, symbols, collect_symbols) } fn collect_call( ctx: &mut CollectContext, node: &Node, - doc: &Document, + file: &ArkFile, + db: &dyn ArkDb, symbols: &mut Vec, ) -> anyhow::Result<()> { let Some(callee) = node.child_by_field_name("function") else { @@ -481,13 +493,13 @@ fn collect_call( }; if callee.is_identifier() { - let fun_symbol = callee.node_as_str(&doc.contents)?; + let fun_symbol = callee.node_as_str(file.contents(db))?; if fun_symbol == "test_that" { - return collect_call_test_that(ctx, node, doc, symbols); + return collect_call_test_that(ctx, node, file, db, symbols); } } - collect_call_arguments(ctx, node, doc, symbols)?; + collect_call_arguments(ctx, node, file, db, symbols)?; Ok(()) } @@ -495,62 +507,79 @@ fn collect_call( fn collect_call_arguments( ctx: &mut CollectContext, node: &Node, - doc: &Document, + file: &ArkFile, + db: &dyn ArkDb, symbols: &mut Vec, ) -> anyhow::Result<()> { let Some(arguments) = node.child_by_field_name("arguments") else { return Ok(()); }; - collect_sections(ctx, &arguments, doc, symbols, |ctx, child, doc, symbols| { - let Some(arg_value) = child.child_by_field_name("value") else { - return Ok(()); - }; - - // If this is a named function, collect it as a method (new node in the tree) - if arg_value.kind() == "function_definition" { - if let Some(arg_fun) = child.child_by_field_name("name") { - collect_method(ctx, &arg_fun, &arg_value, doc, symbols)?; + collect_sections( + ctx, + &arguments, + file, + db, + symbols, + |ctx, child, file, db, symbols| { + let Some(arg_value) = child.child_by_field_name("value") else { return Ok(()); }; - // else fallthrough - } - collect_symbols(ctx, &arg_value, doc, symbols)?; + // If this is a named function, collect it as a method (new node in the tree) + if arg_value.kind() == "function_definition" { + if let Some(arg_fun) = child.child_by_field_name("name") { + collect_method(ctx, &arg_fun, &arg_value, file, db, symbols)?; + return Ok(()); + }; + // else fallthrough + } + + collect_symbols(ctx, &arg_value, file, db, symbols)?; - Ok(()) - }) + Ok(()) + }, + ) } fn collect_function_parameters( ctx: &mut CollectContext, node: &Node, - doc: &Document, + file: &ArkFile, + db: &dyn ArkDb, symbols: &mut Vec, ) -> anyhow::Result<()> { - collect_sections(ctx, node, doc, symbols, |_ctx, _child, _doc, _symbols| { - // We only collect sections and don't recurse inside parameters - Ok(()) - }) + collect_sections( + ctx, + node, + file, + db, + symbols, + |_ctx, _child, _file, _db, _symbols| { + // We only collect sections and don't recurse inside parameters + Ok(()) + }, + ) } fn collect_method( ctx: &mut CollectContext, arg_fun: &Node, arg_value: &Node, - doc: &Document, + file: &ArkFile, + db: &dyn ArkDb, symbols: &mut Vec, ) -> anyhow::Result<()> { if !arg_fun.is_identifier_or_string() { return Ok(()); } - let arg_name = arg_fun.node_to_string(&doc.contents)?; + let arg_name = arg_fun.node_to_string(file.contents(db))?; - let start = doc.lsp_position_from_tree_sitter_point(arg_value.start_position())?; - let end = doc.lsp_position_from_tree_sitter_point(arg_value.end_position())?; + let start = file.lsp_position_from_tree_sitter_point(db, arg_value.start_position())?; + let end = file.lsp_position_from_tree_sitter_point(db, arg_value.end_position())?; let mut children = vec![]; - collect_symbols(ctx, arg_value, doc, &mut children)?; + collect_symbols(ctx, arg_value, file, db, &mut children)?; let mut symbol = new_symbol_node(arg_name, SymbolKind::METHOD, Range { start, end }, children); @@ -568,7 +597,8 @@ fn collect_method( fn collect_call_test_that( ctx: &mut CollectContext, node: &Node, - doc: &Document, + file: &ArkFile, + db: &dyn ArkDb, symbols: &mut Vec, ) -> anyhow::Result<()> { let Some(arguments) = node.child_by_field_name("arguments") else { @@ -593,15 +623,15 @@ fn collect_call_test_that( let mut cursor = arguments.walk(); for child in arguments.children_by_field_name("argument", &mut cursor) { if let Some(value) = child.child_by_field_name("value") { - collect_symbols(ctx, &value, doc, &mut children)?; + collect_symbols(ctx, &value, file, db, &mut children)?; } } - let name = string.node_as_str(&doc.contents)?; + let name = string.node_as_str(file.contents(db))?; let name = name.to_string(); - let start = doc.lsp_position_from_tree_sitter_point(node.start_position())?; - let end = doc.lsp_position_from_tree_sitter_point(node.end_position())?; + let start = file.lsp_position_from_tree_sitter_point(db, node.start_position())?; + let end = file.lsp_position_from_tree_sitter_point(db, node.end_position())?; let symbol = new_symbol_node(name, SymbolKind::FUNCTION, Range { start, end }, children); symbols.push(symbol); @@ -612,7 +642,8 @@ fn collect_call_test_that( fn collect_assignment( ctx: &mut CollectContext, node: &Node, - doc: &Document, + file: &ArkFile, + db: &dyn ArkDb, symbols: &mut Vec, ) -> anyhow::Result<()> { let (NodeType::BinaryOperator(BinaryOperatorType::LeftAssignment) | @@ -631,27 +662,27 @@ fn collect_assignment( // If a function, collect symbol as function let function = lhs.is_identifier_or_string() && rhs.is_function_definition(); if function { - return collect_assignment_with_function(ctx, node, doc, symbols); + return collect_assignment_with_function(ctx, node, file, db, symbols); } if ctx.top_level || ctx.include_assignments_in_blocks { // Collect as generic object, but typically only if we're at top-level. Assigned // objects in nested functions and blocks cause the outline to become // too busy. - let name = lhs.node_to_string(&doc.contents)?; + let name = lhs.node_to_string(file.contents(db))?; - let start = doc.lsp_position_from_tree_sitter_point(node.start_position())?; - let end = doc.lsp_position_from_tree_sitter_point(node.end_position())?; + let start = file.lsp_position_from_tree_sitter_point(db, node.start_position())?; + let end = file.lsp_position_from_tree_sitter_point(db, node.end_position())?; // Now recurse into RHS let mut children = Vec::new(); - collect_symbols(ctx, &rhs, doc, &mut children)?; + collect_symbols(ctx, &rhs, file, db, &mut children)?; let symbol = new_symbol_node(name, SymbolKind::VARIABLE, Range { start, end }, children); symbols.push(symbol); } else { // Recurse into RHS - collect_symbols(ctx, &rhs, doc, symbols)?; + collect_symbols(ctx, &rhs, file, db, symbols)?; } Ok(()) @@ -660,7 +691,8 @@ fn collect_assignment( fn collect_assignment_with_function( ctx: &mut CollectContext, node: &Node, - doc: &Document, + file: &ArkFile, + db: &dyn ArkDb, symbols: &mut Vec, ) -> anyhow::Result<()> { // check for lhs, rhs @@ -674,21 +706,21 @@ fn collect_assignment_with_function( let mut cursor = parameters.walk(); for parameter in parameters.children_by_field_name("parameter", &mut cursor) { let name = parameter.child_by_field_name("name").into_result()?; - let name = name.node_to_string(&doc.contents)?; + let name = name.node_to_string(file.contents(db))?; arguments.push(name); } - let name = lhs.node_to_string(&doc.contents)?; + let name = lhs.node_to_string(file.contents(db))?; let detail = format!("function({})", arguments.join(", ")); let range = Range { - start: doc.lsp_position_from_tree_sitter_point(lhs.start_position())?, - end: doc.lsp_position_from_tree_sitter_point(rhs.end_position())?, + start: file.lsp_position_from_tree_sitter_point(db, lhs.start_position())?, + end: file.lsp_position_from_tree_sitter_point(db, rhs.end_position())?, }; // Process the function body to extract child symbols let mut children = Vec::new(); - collect_symbols(ctx, &rhs, doc, &mut children)?; + collect_symbols(ctx, &rhs, file, db, &mut children)?; let mut symbol = new_symbol_node(name, SymbolKind::FUNCTION, range, children); symbol.detail = Some(detail); @@ -701,15 +733,16 @@ fn collect_assignment_with_function( fn finalize_section( active_sections: &mut Vec
, symbols: &mut Vec, - doc: &Document, + file: &ArkFile, + db: &dyn ArkDb, ) -> anyhow::Result<()> { if let Some(section) = active_sections.pop() { let start_pos = section.start_position; let end_pos = section.end_position.unwrap_or(section.start_position); let range = Range { - start: doc.lsp_position_from_tree_sitter_point(start_pos)?, - end: doc.lsp_position_from_tree_sitter_point(end_pos)?, + start: file.lsp_position_from_tree_sitter_point(db, start_pos)?, + end: file.lsp_position_from_tree_sitter_point(db, end_pos)?, }; let symbol = new_symbol(section.title, SymbolKind::STRING, range); @@ -755,11 +788,11 @@ mod tests { use crate::lsp::util::test_path; fn test_symbol(code: &str) -> Vec { - let doc = Document::new(code, None); - let node = doc.ast.root_node(); + let (db, file) = crate::lsp::ark_file::test_ark_file(code); + let node = file.tree_sitter(&db).root_node(); let mut symbols = Vec::new(); - collect_symbols(&mut CollectContext::new(), &node, &doc, &mut symbols).unwrap(); + collect_symbols(&mut CollectContext::new(), &node, &file, &db, &mut symbols).unwrap(); symbols } @@ -1108,7 +1141,7 @@ outer <- 4 #[test] fn test_symbol_nested_assignments_enabled() { - let doc = Document::new( + let (db, file) = crate::lsp::ark_file::test_ark_file( " local({ inner1 <- 1 @@ -1119,15 +1152,14 @@ a <- function() { } outer <- 4 ", - None, ); - let node = doc.ast.root_node(); + let node = file.tree_sitter(&db).root_node(); let ctx = &mut CollectContext::new(); ctx.include_assignments_in_blocks = true; let mut symbols = Vec::new(); - collect_symbols(ctx, &node, &doc, &mut symbols).unwrap(); + collect_symbols(ctx, &node, &file, &db, &mut symbols).unwrap(); insta::assert_debug_snapshot!(symbols); } diff --git a/crates/ark/src/lsp/tests.rs b/crates/ark/src/lsp/tests.rs index 0bbcd3e82..e34c6c508 100644 --- a/crates/ark/src/lsp/tests.rs +++ b/crates/ark/src/lsp/tests.rs @@ -1,3 +1,5 @@ +mod db; +mod diagnostics; mod find_references; mod goto_definition; mod main_loop; diff --git a/crates/ark/src/lsp/tests/db.rs b/crates/ark/src/lsp/tests/db.rs new file mode 100644 index 000000000..1736048c7 --- /dev/null +++ b/crates/ark/src/lsp/tests/db.rs @@ -0,0 +1,33 @@ +use aether_path::FilePath; +use oak_db::File; +use oak_db::OakDatabase; +use url::Url; + +use crate::lsp::db::FileArkExt; + +fn file(db: &OakDatabase, contents: &str) -> File { + let url = FilePath::from_url(&Url::parse("file:///test.R").unwrap()); + File::new(db, url, contents.to_string(), None) +} + +#[test] +fn test_tree_sitter_parses_contents() { + let db = OakDatabase::new(); + let file = file(&db, "x <- 1\n"); + + let root = file.tree_sitter(&db).root_node(); + + assert!(!root.has_error()); + assert_eq!(root.end_byte(), "x <- 1\n".len()); +} + +#[test] +fn test_tree_sitter_is_cached() { + let db = OakDatabase::new(); + let file = file(&db, "x <- 1\n"); + + // `returns(ref)` hands back the same memoized tree on repeat calls. + let first = file.tree_sitter(&db); + let second = file.tree_sitter(&db); + assert!(std::ptr::eq(first, second)); +} diff --git a/crates/ark/src/lsp/tests/diagnostics.rs b/crates/ark/src/lsp/tests/diagnostics.rs new file mode 100644 index 000000000..7f773230b --- /dev/null +++ b/crates/ark/src/lsp/tests/diagnostics.rs @@ -0,0 +1,38 @@ +use aether_path::FilePath; +use oak_scan::DbScan; +use url::Url; + +use crate::lsp::diagnostics::generate_diagnostics; +use crate::lsp::document::Document; +use crate::lsp::state::WorldState; +use crate::r_task; + +#[test] +fn test_diagnostics_published_through_refresh_snapshot() { + let mut state = WorldState::default(); + + // A tighter scope for `r_task()` results in a compilation error about + // sharing Salsa ingredients across threads + let diagnostics = r_task(|| { + // Open an editor file with an undefined symbol, mirroring `did_open`: + // a `Document` plus its matching `oak_db::File`. + let uri = Url::parse("file:///test.R").unwrap(); + let code = "foo"; + state.insert_document(uri.clone(), Document::new(code, None)); + state + .db + .upsert_editor(FilePath::from_url(&uri), code.to_string()); + + // Mirror `diagnostics_refresh_all`: build the `ArkFile` from the live + // state, then hand the worker the `diagnostics_snapshot`. The snapshot's + // oak must still serve the file the `ArkFile` points at. + let ark_file = state + .ark_file(&uri) + .expect("ArkFile builds from live state"); + + let snapshot = state.diagnostics_snapshot(); + generate_diagnostics(ark_file, snapshot, false) + }); + + assert!(!diagnostics.is_empty()); +} diff --git a/crates/ark/src/r_task.rs b/crates/ark/src/r_task.rs index 0420156a6..36710c312 100644 --- a/crates/ark/src/r_task.rs +++ b/crates/ark/src/r_task.rs @@ -191,6 +191,10 @@ impl RTaskStartInfo { // running, so borrowing is allowed even though we send it to another // thread. See also `Crossbeam::thread::ScopedThreadBuilder` (from which // `r_task()` is adapted) for a similar approach. +// +// Don't run oak (salsa) queries inside `f`. `f` executes on the R thread, and a +// `salsa::Cancelled` unwind there would cross R's C frames, which is UB. Pull +// whatever you need out of oak before the `r_task()`, on the calling thread. pub fn r_task<'env, F, T>(f: F) -> T where