Skip to content

Commit 330c0e9

Browse files
committed
Implement the production SourceHandler for the LSP
1 parent 01a6930 commit 330c0e9

5 files changed

Lines changed: 226 additions & 13 deletions

File tree

Cargo.lock

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/ark/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,9 @@ oak_ide.workspace = true
5555
oak_package_metadata.workspace = true
5656
oak_scan.workspace = true
5757
oak_semantic.workspace = true
58+
oak_source.workspace = true
5859
oak_sources.workspace = true
60+
oak_srcref.workspace = true
5961
once_cell.workspace = true
6062
regex.workspace = true
6163
reqwest.workspace = true
@@ -90,6 +92,7 @@ ark = { workspace = true, features = ["testing"] }
9092
ark_test.workspace = true
9193
assert_matches.workspace = true
9294
insta.workspace = true
95+
oak_r_process.workspace = true
9396
oak_semantic = { workspace = true, features = ["testing"] }
9497
oak_package_metadata.workspace = true
9598
stdext = { workspace = true, features = ["testing"] }

crates/ark/src/lsp/main_loop.rs

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use std::path::PathBuf;
1313
use std::pin::Pin;
1414
use std::sync::atomic::AtomicBool;
1515
use std::sync::atomic::Ordering;
16+
use std::sync::Arc;
1617
use std::sync::LazyLock;
1718
use std::sync::RwLock;
1819

@@ -53,7 +54,9 @@ use crate::lsp::diagnostics::generate_diagnostics;
5354
use crate::lsp::handlers;
5455
use crate::lsp::indexer;
5556
use crate::lsp::open_file::OpenFile;
57+
use crate::lsp::sources::OakSourceHandler;
5658
use crate::lsp::sources::SourceCompleted;
59+
use crate::lsp::sources::SourceHandler;
5760
use crate::lsp::sources::SourceScheduler;
5861
use crate::lsp::state::WorldState;
5962
use crate::lsp::state_handlers;
@@ -240,7 +243,7 @@ impl GlobalState {
240243
/// and auxiliary loop.
241244
pub(crate) fn new(
242245
client: Client,
243-
_r_home: PathBuf,
246+
r_home: PathBuf,
244247
console_notification_tx: TokioUnboundedSender<ConsoleNotification>,
245248
) -> Self {
246249
// FIXME: We shouldn't call R code in the kernel to figure this out
@@ -268,7 +271,10 @@ impl GlobalState {
268271
Self::from_parts(
269272
client,
270273
WorldState::new(db, library),
271-
LspState::new(console_notification_tx, SourceScheduler::new(None)),
274+
LspState::new(
275+
console_notification_tx,
276+
SourceScheduler::new(source_handler(&r_home)),
277+
),
272278
)
273279
}
274280

@@ -603,6 +609,33 @@ impl GlobalState {
603609
}
604610
}
605611

612+
/// Build the LSP's [`SourceHandler`], or `None` to disable source fetching
613+
fn source_handler(r_home: &Path) -> Option<Arc<dyn SourceHandler>> {
614+
if !cfg!(debug_assertions) {
615+
// TODO!: Remove this to activate in release builds as well.
616+
// Currently only active in debug builds (including unit and integration tests).
617+
return None;
618+
}
619+
620+
let Some(r) = harp::command::r_executable(r_home) else {
621+
log::warn!(
622+
"Can't locate an R executable under '{}', package source fetching is disabled",
623+
r_home.display()
624+
);
625+
return None;
626+
};
627+
628+
match OakSourceHandler::new(r) {
629+
Ok(handler) => Some(Arc::new(handler)),
630+
Err(err) => {
631+
log::error!(
632+
"Can't create package source handler, source fetching is disabled: {err:?}"
633+
);
634+
None
635+
},
636+
}
637+
}
638+
606639
/// Test-only methods for driving the main loop without R or a live LSP
607640
/// connection. Kept here, next to the loop they exercise, so the pump uses the
608641
/// real `handle_event()` and the private channels rather than a reconstruction.

crates/ark/src/lsp/sources.rs

Lines changed: 93 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ use std::sync::Arc;
66
use aether_path::FilePath;
77
use oak_db::Db;
88
use oak_db::Package;
9+
use oak_db::Priority;
10+
use oak_source::SourceCache;
11+
use oak_srcref::SrcrefCache;
912
use stdext::result::ResultExt;
1013

1114
use crate::lsp::main_loop::Event;
@@ -19,16 +22,97 @@ pub(crate) trait SourceHandler: Send + Sync {
1922
fn handle(&self, request: &SourceRequest) -> SourceResponse;
2023
}
2124

25+
/// The production [`SourceHandler`]
26+
///
27+
/// Recovers a package's R sources in the following order:
28+
/// - Base packages come from a downloaded CRAN's R source tarball
29+
/// - CRAN packages come from:
30+
/// - A local `srcref`, if available
31+
/// - A downloaded CRAN package tarball
32+
pub(crate) struct OakSourceHandler {
33+
srcref: SrcrefCache,
34+
source: SourceCache,
35+
}
36+
37+
impl OakSourceHandler {
38+
/// Build the handler, opening both caches against the shared on disk cache
39+
pub(crate) fn new(r: PathBuf) -> anyhow::Result<Self> {
40+
Ok(Self {
41+
srcref: SrcrefCache::new(r)?,
42+
source: SourceCache::new()?,
43+
})
44+
}
45+
46+
/// Build the handler with both caches rooted under `root`, so tests don't touch the
47+
/// real on disk cache
48+
#[cfg(test)]
49+
pub(crate) fn new_in(root: &Path, r: PathBuf) -> anyhow::Result<Self> {
50+
Ok(Self {
51+
srcref: SrcrefCache::new_in(root.join("srcref"), r)?,
52+
source: SourceCache::new_in(root.join("source"))?,
53+
})
54+
}
55+
}
56+
57+
impl SourceHandler for OakSourceHandler {
58+
fn handle(&self, request: &SourceRequest) -> SourceResponse {
59+
let name = request.name();
60+
let version = request.version();
61+
62+
// Base packages only exist in the R version source tarball, served from one
63+
// download shared across all of them
64+
if matches!(request.priority(), Some(Priority::Base)) {
65+
return self
66+
.source
67+
.get_r(version)
68+
.or_else(|| self.source.insert_r(version))
69+
.map(|root| SourceResponse::Success(r_dir_for_base(&root, name)))
70+
.unwrap_or(SourceResponse::Failure);
71+
}
72+
73+
// Try to "get" from all sources before doing an expensive "insert"
74+
if let Some(dir) = self.srcref.get(name, version, request.built()) {
75+
return SourceResponse::Success(dir);
76+
}
77+
if let Some(root) = self.source.get_cran(name, version) {
78+
return SourceResponse::Success(r_dir_for_cran(&root));
79+
}
80+
81+
// Prefer `srcref` to CRAN download since it doesn't require internet and would
82+
// be an exact match to the installed package
83+
if let Some(dir) =
84+
self.srcref
85+
.insert(name, version, request.built(), request.library_path())
86+
{
87+
return SourceResponse::Success(dir);
88+
}
89+
if let Some(root) = self.source.insert_cran(name, version) {
90+
return SourceResponse::Success(r_dir_for_cran(&root));
91+
}
92+
93+
SourceResponse::Failure
94+
}
95+
}
96+
97+
/// Find `R/` from a CRAN package tarball
98+
fn r_dir_for_cran(root: &Path) -> PathBuf {
99+
root.join("R")
100+
}
101+
102+
/// Find `R/` from a R source tarball for a base package
103+
fn r_dir_for_base(root: &Path, name: &str) -> PathBuf {
104+
root.join("src").join("library").join(name).join("R")
105+
}
106+
22107
#[derive(Debug, Clone)]
23108
pub(crate) struct SourceRequest {
24109
name: String,
25110
version: String,
26111
built: String,
112+
priority: Option<Priority>,
27113
library_path: PathBuf,
28114
}
29115

30-
// TODO!: Remove when we have a production `SourceHandler`
31-
#[cfg_attr(not(test), expect(dead_code))]
32116
#[derive(Debug)]
33117
pub(crate) enum SourceResponse {
34118
Success(PathBuf),
@@ -56,7 +140,6 @@ enum SourceState {
56140
}
57141

58142
pub(crate) struct SourceScheduler {
59-
// TODO!: Remove the `Option<>` when we implement a production `SourceHandler`
60143
handler: Option<Arc<dyn SourceHandler>>,
61144
state: HashMap<Package, SourceState>,
62145
}
@@ -147,6 +230,8 @@ impl SourceRequest {
147230
));
148231
};
149232

233+
let priority = package.priority(db).clone();
234+
150235
let library_path = match package.description_path(db) {
151236
FilePath::File(path) => {
152237
match path.as_path().as_std_path().parent().and_then(Path::parent) {
@@ -169,30 +254,27 @@ impl SourceRequest {
169254
name,
170255
version,
171256
built,
257+
priority,
172258
library_path,
173259
})
174260
}
175261

176-
// TODO!: Remove when we have a production `SourceHandler`
177-
#[cfg_attr(not(test), expect(dead_code))]
178262
pub(crate) fn name(&self) -> &str {
179263
&self.name
180264
}
181265

182-
// TODO!: Remove when we have a production `SourceHandler`
183-
#[cfg_attr(not(test), expect(dead_code))]
184266
pub(crate) fn version(&self) -> &str {
185267
&self.version
186268
}
187269

188-
// TODO!: Remove when we have a production `SourceHandler`
189-
#[cfg_attr(not(test), expect(dead_code))]
190270
pub(crate) fn built(&self) -> &str {
191271
&self.built
192272
}
193273

194-
// TODO!: Remove when we have a production `SourceHandler`
195-
#[cfg_attr(not(test), expect(dead_code))]
274+
pub(crate) fn priority(&self) -> Option<&Priority> {
275+
self.priority.as_ref()
276+
}
277+
196278
pub(crate) fn library_path(&self) -> &Path {
197279
&self.library_path
198280
}

crates/ark/src/lsp/tests/sources.rs

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
44
use std::collections::HashMap;
55
use std::path::Path;
6+
use std::path::PathBuf;
67
use std::sync::Arc;
78
use std::sync::Mutex;
89

@@ -26,6 +27,7 @@ use crate::lsp::main_loop::init_aux_for_test;
2627
use crate::lsp::main_loop::Event;
2728
use crate::lsp::main_loop::GlobalState;
2829
use crate::lsp::main_loop::LspState;
30+
use crate::lsp::sources::OakSourceHandler;
2931
use crate::lsp::sources::SourceHandler;
3032
use crate::lsp::sources::SourceRequest;
3133
use crate::lsp::sources::SourceResponse;
@@ -121,6 +123,28 @@ fn dispatched_names(calls: &Mutex<Vec<SourceRequest>>) -> Vec<String> {
121123
.collect()
122124
}
123125

126+
/// Find R on the `PATH`
127+
///
128+
/// On Windows, `which` (from Git) returns POSIX paths that `Command::new()` can't resolve.
129+
/// Use `where` which returns native paths.
130+
fn find_r() -> PathBuf {
131+
let output = std::process::Command::new(if cfg!(windows) { "where" } else { "which" })
132+
.arg("R")
133+
.output()
134+
.unwrap_or_else(|err| panic!("Failed to find R: {err}"));
135+
assert!(output.status.success());
136+
137+
// `where` on Windows can return multiple matches, take the first
138+
PathBuf::from(
139+
String::from_utf8(output.stdout)
140+
.expect("Non-UTF8 R path")
141+
.trim()
142+
.lines()
143+
.next()
144+
.expect("R should exist"),
145+
)
146+
}
147+
124148
/// The happy path end to end: a workspace uses an installed library package via
125149
/// `::`, so the revision-advance check dispatches a source request, the handler
126150
/// returns a directory, and the main loop ingests it into the library package.
@@ -226,3 +250,71 @@ async fn test_failed_source_is_not_retried() {
226250
"donor"
227251
)]);
228252
}
253+
254+
/// End to end against real `srcref` recovery: install {generics} from source into a
255+
/// temporary library, point a workspace at it via `::`, inject the real
256+
/// [`OakSourceHandler`], and assert the recovered sources are ingested.
257+
///
258+
/// Requires R on the `PATH` and internet access. We use {generics} because it is small and
259+
/// easy to install from source, the same package `oak_srcref`'s own extraction test uses.
260+
#[tokio::test]
261+
async fn test_source_pipeline_ingests_real_srcref_sources() {
262+
let _aux = init_aux_for_test();
263+
264+
let r = find_r();
265+
266+
// Temporary library, with {generics} installed from source so srcrefs are preserved
267+
let library = tempfile::tempdir().unwrap();
268+
269+
// Use forward slashes so the path is safe inside R string literals on Windows
270+
let library_literal = library.path().display().to_string().replace('\\', "/");
271+
272+
let output = oak_r_process::run_text(
273+
&r,
274+
&format!(
275+
r#"install.packages("generics", lib = "{library_literal}", repos = "https://cran.r-project.org", type = "source", INSTALL_opts = "--with-keep.source")"#,
276+
),
277+
&[],
278+
&[],
279+
)
280+
.expect("Failed to run install.packages()");
281+
assert!(output.status.success());
282+
283+
// The real handler, with both caches rooted in a temp dir so the test doesn't touch
284+
// the shared on disk cache
285+
let cache = tempfile::tempdir().unwrap();
286+
let handler: Arc<dyn SourceHandler> =
287+
Arc::new(OakSourceHandler::new_in(cache.path(), r).unwrap());
288+
289+
let mut db = OakDatabase::new();
290+
db.set_library_paths(&[library.path().to_path_buf()]);
291+
292+
let mut state = GlobalState::from_parts(
293+
test_client(),
294+
WorldState::new(db, Library::new(vec![])),
295+
LspState::new(
296+
tokio::sync::mpsc::unbounded_channel().0,
297+
SourceScheduler::new(Some(handler)),
298+
),
299+
);
300+
301+
// A workspace package that uses {generics} via `::`
302+
let workspace = tempfile::tempdir().unwrap();
303+
let myproj = workspace.path().join("myproj");
304+
write_description(&myproj, "myproj");
305+
write_sources(&myproj.join("R"), &[("use.R", "generics::as.factor()\n")]);
306+
307+
state
308+
.handle_event_to_quiescence(did_change_workspace_folders(workspace.path()))
309+
.await;
310+
311+
// {generics} now carries its recovered sources, readable from disk. {generics} is a
312+
// package of S3 generics, so every recovered file is full of `UseMethod()` calls.
313+
let db = &state.world().db;
314+
let generics = db.package_by_name("generics").unwrap();
315+
let files = generics.files(db).clone();
316+
assert!(!files.is_empty());
317+
assert!(files
318+
.iter()
319+
.any(|file| file.source_text(db).contains("UseMethod")));
320+
}

0 commit comments

Comments
 (0)