Skip to content

Commit 59d0f7d

Browse files
authored
Finish migrating off Document to ArkFile (#1267)
Branched from #1266 Progress towards #1212 With this PR everything now goes through Oak's DB. - Remove Ark's `Document` structure, along with its copy of the tree-sitter tree, source code, and line index. These are now all accessed via Oak queries on `File`. - `WorldState`'s `documents` field becomes `open_files: HashMap<FilePath, ArkFile>`. (Maybe we should rename to `ArkOpenFile`?)
2 parents c9d1c7d + bcaec36 commit 59d0f7d

14 files changed

Lines changed: 388 additions & 636 deletions

crates/ark/src/lsp.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@ pub mod code_action;
1212
pub mod comm;
1313
pub mod completions;
1414
mod config;
15+
mod content_changes;
1516
pub(crate) mod db;
1617
mod declarations;
1718
pub mod diagnostics;
1819
pub mod diagnostics_syntax;
19-
pub mod document;
2020
pub mod document_context;
2121
pub mod events;
2222
pub mod folding_range;

crates/ark/src/lsp/ark_file.rs

Lines changed: 76 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,29 @@ use crate::lsp::db::FileArkExt;
1818

1919
/// Editor-managed buffer state, paired with its `oak_db::File`.
2020
///
21-
/// `ArkFile` and `OakDatabase` are sibling fields on `WorldState`, so an
22-
/// `ArkFile` cannot hold a reference to the database. That's why the methods
23-
/// below take `db` as an argument instead of storing a reference, which is the
24-
/// Salsa convention anyway.
25-
#[derive(Debug)]
21+
/// This is a temporary structure during the transition to pure Oak handlers.
22+
///
23+
/// The methods take `db` as a parameter rather than holding it. `ArkFile` lives
24+
/// in `WorldState`, and the db is a sibling field there, so a stored borrow of
25+
/// it would be self-referential, which safe Rust forbids. Passing `db` per call
26+
/// is the salsa idiom anyway (`file.parse(db)`).
27+
#[derive(Clone, Debug)]
2628
pub(crate) struct ArkFile {
2729
pub(crate) file: File,
2830
pub(crate) version: Option<i32>,
2931
pub(crate) config: DocumentConfig,
32+
// The editor's verbatim URL. We store it rather than recompute it from
33+
// `file`'s path so the bytes the frontend sent round-trip exactly. It lives
34+
// on `ArkFile` so it travels with owned values for callers that can't
35+
// easily access `WorldState::open_files`: the diagnostics task on a worker
36+
// thread (`RefreshDiagnosticsTask`) and `code_action/roxygen.rs`, which
37+
// builds a `WorkspaceEdit` keyed by URL.
38+
//
39+
// TODO: this is a stopgap that goes away with `ArkFile`. Once handlers are
40+
// pure Oak, they return `File`-keyed results (diagnostics, the edit targets
41+
// in a `WorkspaceEdit`) and the wire URL gets attached at the transport
42+
// boundary from a map of open editor URLs owned by the LSP layer. In that
43+
// design the verbatim URL never travels through the analysis layer.
3044
pub(crate) url: Url,
3145
pub(crate) encoding: PositionEncoding,
3246
}
@@ -161,3 +175,60 @@ pub(crate) fn test_ark_file(code: &str) -> (oak_db::OakDatabase, ArkFile) {
161175
};
162176
(db, file)
163177
}
178+
179+
#[cfg(test)]
180+
mod tests {
181+
use tree_sitter::Point;
182+
183+
use super::*;
184+
185+
#[test]
186+
fn test_tree_sitter_point_from_lsp_position_wide_encoding() {
187+
// The emoji is 4 UTF-8 bytes and 2 UTF-16 bytes
188+
// `test_ark_file` defaults to UTF-16, the encoding under test here.
189+
let (db, ark_file) = test_ark_file("😃a");
190+
191+
let point = ark_file
192+
.tree_sitter_point_from_lsp_position(&db, lsp_types::Position::new(0, 2))
193+
.unwrap();
194+
assert_eq!(point, Point::new(0, 4));
195+
196+
let point = ark_file
197+
.tree_sitter_point_from_lsp_position(&db, lsp_types::Position::new(0, 3))
198+
.unwrap();
199+
assert_eq!(point, Point::new(0, 5));
200+
}
201+
202+
#[test]
203+
fn test_lsp_position_from_tree_sitter_point_wide_encoding() {
204+
let (db, ark_file) = test_ark_file("😃a");
205+
206+
let position = ark_file
207+
.lsp_position_from_tree_sitter_point(&db, Point::new(0, 4))
208+
.unwrap();
209+
assert_eq!(position, lsp_types::Position::new(0, 2));
210+
211+
let position = ark_file
212+
.lsp_position_from_tree_sitter_point(&db, Point::new(0, 5))
213+
.unwrap();
214+
assert_eq!(position, lsp_types::Position::new(0, 3));
215+
}
216+
217+
#[test]
218+
fn test_utf8_position_roundtrip_multibyte() {
219+
// `é` is 2 bytes
220+
let (db, mut ark_file) = test_ark_file(\n");
221+
ark_file.encoding = PositionEncoding::Utf8;
222+
223+
let lsp_position = lsp_types::Position::new(0, 2);
224+
let point = ark_file
225+
.tree_sitter_point_from_lsp_position(&db, lsp_position)
226+
.unwrap();
227+
assert_eq!(point, Point::new(0, 2));
228+
229+
let roundtrip_position = ark_file
230+
.lsp_position_from_tree_sitter_point(&db, point)
231+
.unwrap();
232+
assert_eq!(roundtrip_position, lsp_position);
233+
}
234+
}
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
use aether_lsp_utils::proto::from_proto;
2+
use aether_lsp_utils::proto::PositionEncoding;
3+
use tower_lsp::lsp_types;
4+
5+
// --- source
6+
// authors = ["rust-analyzer team"]
7+
// license = "MIT OR Apache-2.0"
8+
// origin = "https://github.com/rust-lang/rust-analyzer/blob/master/crates/rust-analyzer/src/lsp/utils.rs"
9+
// ---
10+
/// Apply a batch of LSP content changes to `contents`, returning the new text.
11+
pub(crate) fn apply_content_changes(
12+
contents: &str,
13+
content_changes: &[lsp_types::TextDocumentContentChangeEvent],
14+
encoding: PositionEncoding,
15+
) -> String {
16+
let mut contents = contents.to_string();
17+
let mut changes = content_changes.to_vec();
18+
19+
// If at least one of the changes is a full document change, use the last of them
20+
// as the starting point and ignore all previous changes. We then know that all
21+
// changes after this (if any!) are incremental changes.
22+
//
23+
// If we do have a full document change, that implies the `last_start_line`
24+
// corresponding to that change is line 0, which will correctly force a rebuild
25+
// of the line index before applying any incremental changes.
26+
let (changes, mut last_start_line) =
27+
match changes.iter().rposition(|change| change.range.is_none()) {
28+
Some(idx) => {
29+
let incremental = changes.split_off(idx + 1);
30+
// Unwrap: `rposition()` confirmed this index contains a full document change
31+
let change = changes.pop().unwrap();
32+
contents = change.text;
33+
(incremental, 0)
34+
},
35+
None => (changes, u32::MAX),
36+
};
37+
38+
let mut line_index = biome_line_index::LineIndex::new(&contents);
39+
40+
// Handle all incremental changes after the last full document change. We don't
41+
// typically get >1 incremental change as the user types, but we do get them in a
42+
// batch after a find-and-replace, or after a format-on-save request.
43+
//
44+
// Some editors like VS Code send the edits in reverse order (from the bottom of
45+
// file -> top of file). We can take advantage of this, because applying an edit
46+
// on, say, line 10, doesn't invalidate the `line_index` if we then need to apply
47+
// an additional edit on line 5. That said, we may still have edits that cross
48+
// lines, so rebuilding the `line_index` is not always unavoidable.
49+
for change in changes {
50+
let range = change
51+
.range
52+
.expect("`None` case already handled by finding the last full document change.");
53+
54+
// If the end of this change is at or past the start of the last change, then
55+
// the `line_index` needed to apply this change is now invalid, so we have to
56+
// rebuild it.
57+
if range.end.line >= last_start_line {
58+
line_index = biome_line_index::LineIndex::new(&contents);
59+
}
60+
last_start_line = range.start.line;
61+
62+
// This is a panic if we can't convert. It means we can't keep the document up
63+
// to date and something is very wrong.
64+
let range: std::ops::Range<usize> = from_proto::text_range(range, &line_index, encoding)
65+
.expect("Can convert `range` from `Position` to `TextRange`.")
66+
.into();
67+
68+
contents.replace_range(range, &change.text);
69+
}
70+
71+
contents
72+
}
73+
74+
#[cfg(test)]
75+
mod tests {
76+
use biome_line_index::WideEncoding;
77+
78+
use super::*;
79+
80+
const ENCODING: PositionEncoding = PositionEncoding::Wide(WideEncoding::Utf16);
81+
82+
fn insert(text: &str, line: u32, character: u32) -> lsp_types::TextDocumentContentChangeEvent {
83+
let position = lsp_types::Position::new(line, character);
84+
lsp_types::TextDocumentContentChangeEvent {
85+
range: Some(lsp_types::Range::new(position, position)),
86+
range_length: None,
87+
text: text.to_string(),
88+
}
89+
}
90+
91+
#[test]
92+
fn test_apply_content_changes_incremental_inserts() {
93+
// Type "lib" one character at a time, the way an editor streams it.
94+
let after_l = apply_content_changes("", &[insert("l", 0, 0)], ENCODING);
95+
assert_eq!(after_l, "l");
96+
97+
let after_i = apply_content_changes(&after_l, &[insert("i", 0, 1)], ENCODING);
98+
assert_eq!(after_i, "li");
99+
100+
let after_b = apply_content_changes(&after_i, &[insert("b", 0, 2)], ENCODING);
101+
assert_eq!(after_b, "lib");
102+
}
103+
104+
#[test]
105+
fn test_apply_content_changes_full_replacement_wins() {
106+
// A range-less change replaces the whole buffer; earlier changes in the
107+
// batch are discarded, later incremental ones apply on top of it.
108+
let changes = vec![
109+
insert("ignored", 0, 0),
110+
lsp_types::TextDocumentContentChangeEvent {
111+
range: None,
112+
range_length: None,
113+
text: "abc\n".to_string(),
114+
},
115+
insert("X", 0, 3),
116+
];
117+
assert_eq!(apply_content_changes("old", &changes, ENCODING), "abcX\n");
118+
}
119+
}

crates/ark/src/lsp/declarations.rs

Lines changed: 51 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -89,68 +89,83 @@ fn ark_diagnostics_args<'tree>(ark_args: Node<'tree>, contents: &str) -> Option<
8989
mod test {
9090
use stdext::assert_match;
9191

92+
use crate::lsp::ark_file::test_ark_file;
9293
use crate::lsp::declarations::declare_ark_args;
9394
use crate::lsp::declarations::top_level_declare;
9495
use crate::lsp::declarations::top_level_declare_args;
95-
use crate::lsp::document::Document;
9696

9797
#[test]
9898
fn test_declare_args() {
99-
let doc = Document::new("", None);
100-
assert_match!(top_level_declare_args(&doc.ast, &doc.contents), None);
101-
102-
let doc = Document::new("declare()", None);
103-
assert_match!(top_level_declare_args(&doc.ast, &doc.contents), Some(_));
104-
105-
let doc = Document::new("~declare()", None);
106-
assert_match!(top_level_declare_args(&doc.ast, &doc.contents), Some(_));
107-
108-
let doc = Document::new("# foo\n#bar\n\ndeclare()", None);
109-
assert_match!(top_level_declare_args(&doc.ast, &doc.contents), Some(_));
110-
111-
let doc = Document::new("# foo\nbar\n\ndeclare()", None);
112-
assert_match!(top_level_declare_args(&doc.ast, &doc.contents), None);
99+
let (db, file) = test_ark_file("");
100+
assert_match!(
101+
top_level_declare_args(file.tree_sitter(&db), file.contents(&db)),
102+
None
103+
);
104+
105+
let (db, file) = test_ark_file("declare()");
106+
assert_match!(
107+
top_level_declare_args(file.tree_sitter(&db), file.contents(&db)),
108+
Some(_)
109+
);
110+
111+
let (db, file) = test_ark_file("~declare()");
112+
assert_match!(
113+
top_level_declare_args(file.tree_sitter(&db), file.contents(&db)),
114+
Some(_)
115+
);
116+
117+
let (db, file) = test_ark_file("# foo\n#bar\n\ndeclare()");
118+
assert_match!(
119+
top_level_declare_args(file.tree_sitter(&db), file.contents(&db)),
120+
Some(_)
121+
);
122+
123+
let (db, file) = test_ark_file("# foo\nbar\n\ndeclare()");
124+
assert_match!(
125+
top_level_declare_args(file.tree_sitter(&db), file.contents(&db)),
126+
None
127+
);
113128
}
114129

115130
#[test]
116131
fn test_declare_ark_args() {
117-
let doc = Document::new("declare()", None);
118-
let decls = top_level_declare_args(&doc.ast, &doc.contents).unwrap();
119-
assert_match!(declare_ark_args(decls, &doc.contents), None);
132+
let (db, file) = test_ark_file("declare()");
133+
let decls = top_level_declare_args(file.tree_sitter(&db), file.contents(&db)).unwrap();
134+
assert_match!(declare_ark_args(decls, file.contents(&db)), None);
120135

121-
let doc = Document::new("declare(ark())", None);
122-
let decls = top_level_declare_args(&doc.ast, &doc.contents).unwrap();
123-
assert_match!(declare_ark_args(decls, &doc.contents), Some(_));
136+
let (db, file) = test_ark_file("declare(ark())");
137+
let decls = top_level_declare_args(file.tree_sitter(&db), file.contents(&db)).unwrap();
138+
assert_match!(declare_ark_args(decls, file.contents(&db)), Some(_));
124139

125-
let doc = Document::new("declare(foo, ark())", None);
126-
let decls = top_level_declare_args(&doc.ast, &doc.contents).unwrap();
127-
assert_match!(declare_ark_args(decls, &doc.contents), Some(_));
140+
let (db, file) = test_ark_file("declare(foo, ark())");
141+
let decls = top_level_declare_args(file.tree_sitter(&db), file.contents(&db)).unwrap();
142+
assert_match!(declare_ark_args(decls, file.contents(&db)), Some(_));
128143
}
129144

130145
#[test]
131146
fn test_declare_diagnostics() {
132-
let doc = Document::new("", None);
133-
let decls = top_level_declare(&doc.ast, &doc.contents);
147+
let (db, file) = test_ark_file("");
148+
let decls = top_level_declare(file.tree_sitter(&db), file.contents(&db));
134149
assert!(decls.diagnostics);
135150

136-
let doc = Document::new("declare(ark(diagnostics(enable = TRUE)))", None);
137-
let decls = top_level_declare(&doc.ast, &doc.contents);
151+
let (db, file) = test_ark_file("declare(ark(diagnostics(enable = TRUE)))");
152+
let decls = top_level_declare(file.tree_sitter(&db), file.contents(&db));
138153
assert!(decls.diagnostics);
139154

140-
let doc = Document::new("declare(ark(diagnostics(enable = NULL)))", None);
141-
let decls = top_level_declare(&doc.ast, &doc.contents);
155+
let (db, file) = test_ark_file("declare(ark(diagnostics(enable = NULL)))");
156+
let decls = top_level_declare(file.tree_sitter(&db), file.contents(&db));
142157
assert!(decls.diagnostics);
143158

144-
let doc = Document::new("declare(ark(diagnostics(enable = invalid())))", None);
145-
let decls = top_level_declare(&doc.ast, &doc.contents);
159+
let (db, file) = test_ark_file("declare(ark(diagnostics(enable = invalid())))");
160+
let decls = top_level_declare(file.tree_sitter(&db), file.contents(&db));
146161
assert!(decls.diagnostics);
147162

148-
let doc = Document::new("~declare()", None);
149-
let decls = top_level_declare(&doc.ast, &doc.contents);
163+
let (db, file) = test_ark_file("~declare()");
164+
let decls = top_level_declare(file.tree_sitter(&db), file.contents(&db));
150165
assert!(decls.diagnostics);
151166

152-
let doc = Document::new("declare(ark(diagnostics(enable = FALSE)))", None);
153-
let decls = top_level_declare(&doc.ast, &doc.contents);
167+
let (db, file) = test_ark_file("declare(ark(diagnostics(enable = FALSE)))");
168+
let decls = top_level_declare(file.tree_sitter(&db), file.contents(&db));
154169
assert!(!decls.diagnostics);
155170
}
156171
}

0 commit comments

Comments
 (0)