diff --git a/Cargo.lock b/Cargo.lock index 6509245cd..417277aa2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2622,6 +2622,21 @@ dependencies = [ "tempfile", ] +[[package]] +name = "oak_scan" +version = "0.1.0" +dependencies = [ + "aether_url", + "log", + "oak_db", + "oak_package_metadata", + "salsa", + "stdext", + "tempfile", + "url", + "walkdir", +] + [[package]] name = "oak_semantic" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 761521c78..29f9e0c14 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -79,6 +79,7 @@ oak_ide = { path = "crates/oak_ide" } oak_index_vec = { path = "crates/oak_index_vec" } oak_package_metadata = { path = "crates/oak_package_metadata" } oak_r_process = { path = "crates/oak_r_process" } +oak_scan = { path = "crates/oak_scan" } oak_semantic = { path = "crates/oak_semantic" } oak_sources = { path = "crates/oak_sources" } once_cell = "1.21.4" diff --git a/crates/oak_db/src/db.rs b/crates/oak_db/src/db.rs index cc83a7c79..eb41cef52 100644 --- a/crates/oak_db/src/db.rs +++ b/crates/oak_db/src/db.rs @@ -3,9 +3,11 @@ use rustc_hash::FxHashMap; use crate::File; use crate::LibraryRoots; +use crate::LiveRoot; use crate::OrphanRoot; use crate::Package; use crate::Root; +use crate::StaleRoot; use crate::WorkspaceRoots; /// Concrete-input surface of the salsa database. Each impl @@ -26,6 +28,10 @@ pub trait DbInputs: salsa::Database { /// Files not yet anchored to any workspace or library root. fn orphan_root(&self) -> OrphanRoot; + + /// Files and packages from roots that have been removed. Holding + /// pen for entity reuse on re-add (see [`StaleRoot`]). + fn stale_root(&self) -> StaleRoot; } /// Salsa database trait used throughout `oak_db`. Tracked queries take `&dyn @@ -53,6 +59,57 @@ pub trait Db: DbInputs { /// - Installed packages in an earlier root shadow later ones /// (mirroring `.libPaths()`). fn package_by_name(&self, name: &str) -> Option; + + /// Look up a `Package` by its `DESCRIPTION` URL. + /// + /// Walks workspace packages, then library packages, then falls back + /// to [`StaleRoot`]. Stale matches are intentional: scanner upserts + /// use this to find a `Package` entity whose live container was + /// dropped on a previous `set_*_paths` call, so the entity gets + /// reused on re-add. Analysis paths should not call this — they use + /// [`Db::package_by_name`] which is stale-blind. + fn package_by_url(&self, url: &UrlId) -> Option; + + /// Resolve the live `Root` that contains `pkg`, if any. + /// + /// Returns `None` when the package is only in [`StaleRoot`] (its live + /// container was previously evicted). + /// + /// **Nested roots.** Two roots can claim the same package when one is + /// nested inside the other, e.g. the frontend opens both `/proj` and + /// `/proj/sub-pkg` as workspace folders and both scans walk into + /// `sub-pkg/DESCRIPTION`. Both scans hand the same `Package` entity to + /// their respective root's `packages` vec; the longest-path root wins + /// the ownership query here. The shorter root's vec still transiently + /// lists the package, but it self-heals on its next scan since + /// `set_packages` replaces the vec wholesale. + fn root_by_package(&self, pkg: Package) -> Option; + + /// All live roots in lookup-precedence order: workspace folders first, then + /// library paths (mirroring R's `.libPaths()`), then the orphan bucket. + /// Stale roots are not included. Salsa-cached and invalidates when one of + /// `workspace_roots` / `library_roots` / `orphan_root` changes. + fn live_roots(&self) -> &[LiveRoot]; +} + +#[salsa::tracked(returns(ref))] +pub fn live_roots_query(db: &dyn Db) -> Vec { + let mut roots: Vec = db + .workspace_roots() + .roots(db) + .iter() + .map(|&r| LiveRoot::Workspace(r)) + .collect(); + + roots.extend( + db.library_roots() + .roots(db) + .iter() + .map(|&r| LiveRoot::Library(r)), + ); + + roots.push(LiveRoot::Orphan(db.orphan_root())); + roots } /// Implementation of [`Db::file_by_url`]. Walks the per-root indices. @@ -62,35 +119,104 @@ pub trait Db: DbInputs { /// cached map, so adding a file to one root invalidates only that /// root's index. pub fn file_by_url_query(db: &dyn Db, url: &UrlId) -> Option { - for root in db.workspace_roots().roots(db) { - if let Some(&file) = root_url_index(db, *root).get(url) { - return Some(file); + for &root in db.live_roots() { + let hit = match root { + LiveRoot::Workspace(r) | LiveRoot::Library(r) => { + root_url_index(db, r).get(url).copied() + }, + LiveRoot::Orphan(_) => orphan_url_index(db).get(url).copied(), + }; + if hit.is_some() { + return hit; } } - for root in db.library_roots().roots(db) { - if let Some(&file) = root_url_index(db, *root).get(url) { - return Some(file); - } - } - orphan_url_index(db).get(url).copied() + None } /// Implementation of [`Db::package_by_name`]. Same shape as -/// [`file_by_url_query`]. +/// [`file_by_url_query`]; orphan has no packages, so it contributes +/// nothing to the walk. pub fn package_by_name_query(db: &dyn Db, name: &str) -> Option { - for root in db.workspace_roots().roots(db) { - if let Some(&pkg) = root_package_index(db, *root).get(name) { - return Some(pkg); + for &root in db.live_roots() { + if let LiveRoot::Workspace(r) | LiveRoot::Library(r) = root { + if let Some(&pkg) = root_package_index(db, r).get(name) { + return Some(pkg); + } } } - for root in db.library_roots().roots(db) { - if let Some(&pkg) = root_package_index(db, *root).get(name) { - return Some(pkg); + None +} + +/// Implementation of [`Db::package_by_url`]. Walks live roots' packages +/// by `description_url`, then falls back to the stale bucket. +pub fn package_by_url_query(db: &dyn Db, url: &UrlId) -> Option { + for &root in db.live_roots() { + if let LiveRoot::Workspace(r) | LiveRoot::Library(r) = root { + if let Some(&pkg) = root_package_url_index(db, r).get(url) { + return Some(pkg); + } + } + } + stale_package_url_index(db).get(url).copied() +} + +/// Implementation of [`Db::root_by_package`]. Walks all live roots looking for +/// `pkg` in their `packages` vec, picking the longest-path root on ties. +pub fn root_by_package_query(db: &dyn Db, pkg: Package) -> Option { + let mut best: Option<(Root, usize)> = None; + for &root in db.live_roots() { + let (LiveRoot::Workspace(r) | LiveRoot::Library(r)) = root else { + continue; + }; + if r.packages(db).contains(&pkg) { + let depth = root_depth(db, r); + if best.is_none_or(|(_, d)| depth > d) { + best = Some((r, depth)); + } + } + } + best.map(|(root, _)| root) +} + +/// Resolve the [`Package`] that owns `file`, if any. +/// +/// Walks live roots' `File -> Package` indexes in workspace-then-library +/// order and returns the first hit. A file belongs to at most one package +/// *entity* (in the nested-root case both roots' `packages` hold the same +/// entity), so first-hit is unambiguous. The orphan bucket has no packages +/// and contributes nothing; stale entities are invisible by design, which +/// is what makes an evicted file's package association clear to `None`. +/// +/// Backs [`crate::File::package`], which is the derived replacement for the +/// old `File.package` back-pointer field: the container vecs are now the +/// single source of truth for file ownership. +pub fn package_by_file_query(db: &dyn Db, file: File) -> Option { + for &root in db.live_roots() { + if let LiveRoot::Workspace(r) | LiveRoot::Library(r) = root { + if let Some(&pkg) = root_file_package_index(db, r).get(&file) { + return Some(pkg); + } } } None } +/// Number of path segments in a root's URL. Used as the tiebreaker by +/// [`root_by_package_query`] when nested roots both claim the same package. +/// +/// Counts URL segments directly rather than going through `to_file_path()`. +/// `to_file_path()` errors on Windows for non-OS-style URLs (no drive +/// letter), which would silently collapse all depths to zero and degrade +/// the tiebreaker into "first found wins". Depth is a structural property +/// of the URL hierarchy, so the URL itself is the right source. +fn root_depth(db: &dyn Db, root: Root) -> usize { + root.path(db) + .as_url() + .path_segments() + .map(|s| s.filter(|seg| !seg.is_empty()).count()) + .unwrap_or(0) +} + /// Per-root URL -> File index. Salsa caches one map per `Root`; /// reads only `root.scripts`, `root.packages`, and each /// `pkg.files` reachable from this root. Adding or removing a file @@ -109,6 +235,21 @@ fn root_url_index(db: &dyn Db, root: Root) -> FxHashMap { map } +/// Per-root File -> owning Package index. Built from `root.packages` and +/// each package's `files`. Same per-root granularity as [`root_url_index`]: +/// adding or removing a file in this root invalidates only this entry. +/// Backs [`package_by_file_query`]. +#[salsa::tracked(returns(ref))] +fn root_file_package_index(db: &dyn Db, root: Root) -> FxHashMap { + let mut map = FxHashMap::default(); + for &pkg in root.packages(db) { + for &file in pkg.files(db) { + map.insert(file, pkg); + } + } + map +} + /// Orphan URL -> File index. Reads only `orphan_root().files`. #[salsa::tracked(returns(ref))] fn orphan_url_index(db: &dyn Db) -> FxHashMap { @@ -129,3 +270,45 @@ fn root_package_index(db: &dyn Db, root: Root) -> FxHashMap { } map } + +/// Per-root DESCRIPTION URL -> Package index. Used by +/// [`package_by_url_query`] for entity-reuse lookups across rescans; +/// salsa cache invalidates only when this root's packages change. +#[salsa::tracked(returns(ref))] +fn root_package_url_index(db: &dyn Db, root: Root) -> FxHashMap { + let mut map = FxHashMap::default(); + for &pkg in root.packages(db) { + map.insert(pkg.description_url(db).clone(), pkg); + } + map +} + +/// Stale file URL -> File index. Reads only `stale_root().files`. Not +/// consulted by [`file_by_url_query`] — analysis is stale-blind by +/// design. Scanner upserts use [`stale_file_by_url`] when re-adding a +/// path. +#[salsa::tracked(returns(ref))] +fn stale_url_index(db: &dyn Db) -> FxHashMap { + let mut map = FxHashMap::default(); + for &file in db.stale_root().files(db) { + map.insert(file.url(db).clone(), file); + } + map +} + +/// Look up a stale `File` by URL. Public so scanner upsert helpers in +/// `oak_scan` can fall back to stale after [`Db::file_by_url`] misses. +pub fn stale_file_by_url(db: &dyn Db, url: &UrlId) -> Option { + stale_url_index(db).get(url).copied() +} + +/// Stale DESCRIPTION URL -> Package index. Same role as +/// [`stale_url_index`] for packages. +#[salsa::tracked(returns(ref))] +fn stale_package_url_index(db: &dyn Db) -> FxHashMap { + let mut map = FxHashMap::default(); + for &pkg in db.stale_root().packages(db) { + map.insert(pkg.description_url(db).clone(), pkg); + } + map +} diff --git a/crates/oak_db/src/file.rs b/crates/oak_db/src/file.rs index 8ae55703a..5eac38ea7 100644 --- a/crates/oak_db/src/file.rs +++ b/crates/oak_db/src/file.rs @@ -23,34 +23,18 @@ use crate::Root; /// The `url` field is a [`UrlId`], so the type system enforces "everything /// inside Salsa is a canonical URL". /// -/// `package` is a back-pointer to the [`Package`] this file belongs to, or -/// `None` for standalone scripts. Inverse of `Package.files`, so queries -/// answering "what package owns this file?" don't walk the forward edge. -/// Files with `package == None` are either standalone scripts under a -/// workspace root or orphan files not registered anywhere. -/// -/// # Placement invariant -/// -/// `File.package` and the file's physical location in a `Vec` are -/// expected to agree. A file with `package == Some(pkg)` should live in -/// `pkg.files`. A file with `package == None` should live in either some -/// `root.scripts` or `orphan_root().files`. The salsa setters (`set_url`, -/// `set_contents`, `set_package`) are `pub` because field visibility couples to -/// setter visibility in salsa but calling `set_package` directly leaves the -/// file in its old bucket and silently breaks this invariant. -/// -/// The scanner crate (`oak_scan`) wraps these setters in helpers that -/// maintain placement (move the file between `pkg.files`, -/// `root.scripts`, and `orphan_root().files` as `package` changes). -/// Callers that go around the helpers and use the salsa setters -/// directly must maintain placement themselves. +/// File ownership has a single source of truth: the forward edge. A file +/// belongs to whichever container currently holds it -- `pkg.files`, +/// `root.scripts`, or `orphan_root().files`. There is no stored +/// back-pointer to keep in sync. "What package owns this file?" is +/// answered by the derived [`File::package`] query, which walks the +/// per-root `File -> Package` indexes; "what root?" by [`File::root`]. #[salsa::input(debug)] pub struct File { #[returns(ref)] pub url: UrlId, #[returns(ref)] pub contents: String, - pub package: Option, } #[salsa::tracked] @@ -142,19 +126,38 @@ impl File { .collect() } + /// The [`Package`] that owns this file, or `None` for standalone + /// scripts and orphan / stale files. + /// + /// Derived from live-graph containment via + /// [`package_by_file_query`](crate::db::package_by_file_query): the + /// file belongs to whichever live `pkg.files` currently holds it. + /// This replaces the old `File.package` back-pointer field, so file + /// ownership has a single source of truth (the forward edge) and no + /// invariant to maintain by hand. + #[salsa::tracked] + pub fn package(self, db: &dyn Db) -> Option { + crate::db::package_by_file_query(db, self) + } + /// The root containing this file, if any. /// - /// If the file has a registered [`Package`], dispatches through - /// `Package.root`. Otherwise falls back to a URL-prefix lookup - /// against [`WorkspaceRoots`] (orphan files live under a workspace - /// root or nowhere; library files always have a package). + /// If the file has an owning [`Package`], asks the db which live + /// root holds it via [`Db::root_by_package`]. Otherwise falls back to a + /// URL-prefix lookup against [`WorkspaceRoots`] (orphan files live + /// under a workspace root or nowhere). Library files normally have + /// a package; the `root_by_package` branch covers them too. + /// + /// Returns `None` if the file's package was evicted to + /// [`StaleRoot`] (no live root contains it), or if the file is in + /// orphan and the URL falls outside every workspace folder. /// /// Callers that need to distinguish workspace from library roots /// inspect `root.kind(db)`. #[salsa::tracked] pub fn root(self, db: &dyn Db) -> Option { if let Some(pkg) = self.package(db) { - return Some(pkg.root(db)); + return db.root_by_package(pkg); } root_by_url(db, self.url(db)) } diff --git a/crates/oak_db/src/file_imports.rs b/crates/oak_db/src/file_imports.rs index 4fea76d12..f57ab22f3 100644 --- a/crates/oak_db/src/file_imports.rs +++ b/crates/oak_db/src/file_imports.rs @@ -128,10 +128,11 @@ fn narrow_script_top_level(file: File, db: &dyn Db, offset: TextSize) -> Vec Vec { let files = package.files(db); let Some(file_pos) = files.iter().position(|f| *f == file) else { - // File claims membership but isn't in the package's `files`. + // `package` was derived from `package.files` containing this file + // (via `File::package`), so the position is always found. // Shouldn't happen. log::warn!( - "File {file} has package back-pointer to {package} but is not in its files", + "File {file} resolved to package {package} but is not in its files", file = file.url(db), package = package.name(db), ); diff --git a/crates/oak_db/src/inputs.rs b/crates/oak_db/src/inputs.rs index 2647bbbb9..7735c2b25 100644 --- a/crates/oak_db/src/inputs.rs +++ b/crates/oak_db/src/inputs.rs @@ -23,8 +23,14 @@ pub struct Root { pub path: UrlId, pub kind: RootKind, /// Top-level R scripts directly under this root. Each entry is a - /// `File` with `package(db) == None`. Always empty for `Library` - /// roots. + /// `File` not owned by any package (`package(db) == None`). Always + /// empty for `Library` roots. + /// + /// A file should live in exactly one container (`Root.scripts`, + /// `Package.files`, or `OrphanRoot.files`); `File::package` derives + /// ownership from that. Push files here only through `oak_scan`'s + /// helpers, which move a file out of its old container as it changes + /// homes. #[returns(ref)] pub scripts: Vec, /// Packages discovered under this root (workspace packages for @@ -39,6 +45,31 @@ pub enum RootKind { Library, } +/// A live root container that participates in analysis lookups. +/// +/// Bundles the three salsa inputs that hold files / packages the user is +/// actively working with: workspace [`Root`]s, library [`Root`]s, and the +/// [`OrphanRoot`] that catches unanchored buffers. Stale entities in +/// [`StaleRoot`] aren't included -- they have separate access patterns +/// (scanner upsert only, never analysis), so they stay as their own input. +/// +/// `Db::live_roots()` yields these in lookup precedence (workspace first, then +/// library, then orphan). +/// +/// TODO(salsa): this enum carries the workspace-vs-library distinction in its +/// variant tag, which makes the `Root.kind` field redundant. Drop the field +/// once callers route through `LiveRoot` everywhere instead of reading +/// `root.kind(db)` directly. Further out, splitting `Root` into separate +/// `WorkspaceRoot` and `LibraryRoot` salsa inputs (each with the fields that +/// actually apply to its kind: scripts only on workspace, etc.) frees up +/// the name `Root` to be this enum. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum LiveRoot { + Workspace(Root), + Library(Root), + Orphan(OrphanRoot), +} + /// The set of workspace folders the user has open. /// /// Populated by the LSP layer from `initialize.workspaceFolders` and @@ -88,8 +119,12 @@ impl LibraryRoots { /// Singleton: there is one `OrphanRoot` per concrete database, lazily /// initialised by the implementation. The `files` field is what /// [`crate::Db::file_by_url`] consults to find unanchored files. -#[salsa::input] +#[salsa::input(debug)] pub struct OrphanRoot { + /// Files here are owned by no package: since the orphan bucket has no + /// `packages`, `File::package` derives `None` for them. Push files + /// here only through `oak_scan`'s helpers, which move a file out of + /// its old container as it changes homes. #[returns(ref)] pub files: Vec, } @@ -100,12 +135,63 @@ impl OrphanRoot { } } +/// Files and packages from workspace or library roots that were removed +/// during a `set_*_paths` call. +/// +/// Salsa doesn't garbage-collect entities, so dropping a `Root` doesn't +/// free its `File` and `Package` entities. They'd just leak. Instead we +/// move them here and consult this bucket on the next `set_*_paths`, +/// reusing entities by URL when their paths come back. This matters for +/// agent / multi-repo workflows where the same workspace folder gets +/// added and removed repeatedly across a session. +/// +/// **Not consulted by analysis.** `Db::file_by_url` and +/// `Db::package_by_name` walk workspace / library roots and (for files) +/// `OrphanRoot` only. Entities in `StaleRoot` are invisible to +/// completions, goto-def, etc. — they correspond to folders the user +/// has explicitly removed. +/// +/// **Consulted by scanners.** `Db::package_by_url` walks live roots +/// then falls back to stale. Scanner upsert helpers do the same for +/// files. On reuse, the entity is moved out of stale back into a live +/// container. +/// +/// Singleton like `OrphanRoot`. The `files` and `packages` fields are +/// independent: a stale file's `package` may still point at a stale +/// package, and that's fine. Both are invisible to analysis until one +/// of them gets pulled back into a live container. +#[salsa::input] +pub struct StaleRoot { + #[returns(ref)] + pub files: Vec, + #[returns(ref)] + pub packages: Vec, +} + +impl StaleRoot { + pub fn empty(db: &dyn Db) -> Self { + Self::new(db, vec![], vec![]) + } +} + #[salsa::input(debug)] pub struct Package { - /// The `Root` this package belongs to. Workspace packages live under - /// a [`RootKind::Workspace`] root, installed packages live under a - /// [`RootKind::Library`] root. Read `root.kind(db)` to distinguish. - pub root: Root, + /// URL of the package's `DESCRIPTION` file. Stable identity across + /// rescans and workspace / library churn: scanners look up an + /// existing `Package` by this URL before creating a new one (see + /// [`Db::package_by_url`]). Two packages with the same `Package:` + /// name can coexist on disk and the URL distinguishes them. + /// + /// The package's owning [`Root`] is not stored as a field. It is + /// derived from live-graph containment via [`Db::root_by_package`]: a + /// package belongs to whichever `Root.packages` currently holds it. + /// Workspace-vs-library is then `root.kind(db)`. + #[returns(ref)] + pub description_url: UrlId, + // TODO(salsa): Expose a tracked `name_interned(db) -> Name<'db>` + // method so `db.package_by_name()` and other lookups key on the + // interned id rather than the string. Can't store `Name<'db>` on + // `Package` directly because salsa inputs are lifetime-free. #[returns(ref)] pub name: String, /// Installed-package version (from `DESCRIPTION`). `None` for @@ -118,6 +204,12 @@ pub struct Package { /// Per-package granularity: adding or removing a file in one /// package doesn't invalidate tracked queries reading another /// package's files. + /// + /// This vec is the source of truth for package membership: + /// `File::package` derives ownership by finding the file here. A file + /// should live in exactly one container, so push files here only + /// through `oak_scan`'s helpers, which move a file out of its old + /// container as it changes homes. #[returns(ref)] pub files: Vec, /// The basename ordering from `DESCRIPTION`'s `Collate` field, if diff --git a/crates/oak_db/src/lib.rs b/crates/oak_db/src/lib.rs index b8879a9f4..372ed8c9f 100644 --- a/crates/oak_db/src/lib.rs +++ b/crates/oak_db/src/lib.rs @@ -13,6 +13,7 @@ mod storage; #[cfg(test)] mod tests; +pub use db::stale_file_by_url; pub use db::Db; pub use db::DbInputs; pub use definition::Definition; @@ -21,10 +22,12 @@ pub use file_exports::ExportEntry; pub use file_exports::FileExports; pub use file_imports::ImportLayer; pub use inputs::LibraryRoots; +pub use inputs::LiveRoot; pub use inputs::OrphanRoot; pub use inputs::Package; pub use inputs::Root; pub use inputs::RootKind; +pub use inputs::StaleRoot; pub use inputs::WorkspaceRoots; pub use name::Name; pub use storage::OakDatabase; diff --git a/crates/oak_db/src/storage.rs b/crates/oak_db/src/storage.rs index bbf291663..ef837ca63 100644 --- a/crates/oak_db/src/storage.rs +++ b/crates/oak_db/src/storage.rs @@ -5,12 +5,13 @@ use crate::Db; use crate::DbInputs; use crate::LibraryRoots; use crate::OrphanRoot; +use crate::StaleRoot; use crate::WorkspaceRoots; /// Concrete Salsa database. /// -/// Holds singleton `WorkspaceRoots` / `LibraryRoots` / `OrphanRoot` -/// inputs and lazy-initialises them on first access. +/// Holds singleton `WorkspaceRoots` / `LibraryRoots` / `OrphanRoot` / +/// `StaleRoot` inputs and lazy-initialises them on first access. #[salsa::db] #[derive(Clone, Default)] pub struct OakDatabase { @@ -18,6 +19,7 @@ pub struct OakDatabase { workspace_roots: Arc>, library_roots: Arc>, orphan_root: Arc>, + stale_root: Arc>, } impl OakDatabase { @@ -44,6 +46,10 @@ impl DbInputs for OakDatabase { fn orphan_root(&self) -> OrphanRoot { *self.orphan_root.get_or_init(|| OrphanRoot::empty(self)) } + + fn stale_root(&self) -> StaleRoot { + *self.stale_root.get_or_init(|| StaleRoot::empty(self)) + } } #[salsa::db] @@ -55,4 +61,16 @@ impl Db for OakDatabase { fn package_by_name(&self, name: &str) -> Option { crate::db::package_by_name_query(self, name) } + + fn package_by_url(&self, url: &aether_url::UrlId) -> Option { + crate::db::package_by_url_query(self, url) + } + + fn root_by_package(&self, pkg: crate::Package) -> Option { + crate::db::root_by_package_query(self, pkg) + } + + fn live_roots(&self) -> &[crate::LiveRoot] { + crate::db::live_roots_query(self) + } } diff --git a/crates/oak_db/src/tests/db.rs b/crates/oak_db/src/tests/db.rs index 6c3c44fcd..8fa2540cc 100644 --- a/crates/oak_db/src/tests/db.rs +++ b/crates/oak_db/src/tests/db.rs @@ -14,7 +14,7 @@ use crate::Package; fn test_file_by_url_finds_workspace_script() { let mut db = TestDb::new(); let root = workspace_root(&db, "proj"); - let file = File::new(&db, file_url("proj/a.R"), String::new(), None); + let file = File::new(&db, file_url("proj/a.R"), String::new()); root.set_scripts(&mut db).to(vec![file]); db.workspace_roots().set_roots(&mut db).to(vec![root]); @@ -25,20 +25,19 @@ fn test_file_by_url_finds_workspace_script() { fn test_file_by_url_finds_workspace_package_file() { let mut db = TestDb::new(); let root = workspace_root(&db, "proj"); - // Construct the back-pointer + forward-edge pair: create the - // `Package` with an empty `files` list, then the `File` with the - // `package` set, then attach the file via `set_files`. Matches the - // shape `oak_scan`'s placement-preserving helpers will produce. + // Attach the file to the package via the forward edge (`set_files`), + // the single source of truth for membership. Matches the shape + // `oak_scan`'s placement-preserving helpers produce. let pkg = Package::new( &db, - root, + file_url("proj/DESCRIPTION"), "mypkg".to_string(), None, Namespace::default(), vec![], None, ); - let file = File::new(&db, file_url("proj/R/foo.R"), String::new(), Some(pkg)); + let file = File::new(&db, file_url("proj/R/foo.R"), String::new()); pkg.set_files(&mut db).to(vec![file]); root.set_packages(&mut db).to(vec![pkg]); db.workspace_roots().set_roots(&mut db).to(vec![root]); @@ -52,14 +51,14 @@ fn test_file_by_url_finds_library_package_file() { let lib = library_root(&db, "libs"); let pkg = Package::new( &db, - lib, + file_url("libs/foo/DESCRIPTION"), "foo".to_string(), Some("1.0.0".to_string()), Namespace::default(), vec![], None, ); - let file = File::new(&db, file_url("libs/foo/R/a.R"), String::new(), Some(pkg)); + let file = File::new(&db, file_url("libs/foo/R/a.R"), String::new()); pkg.set_files(&mut db).to(vec![file]); lib.set_packages(&mut db).to(vec![pkg]); db.library_roots().set_roots(&mut db).to(vec![lib]); @@ -70,7 +69,7 @@ fn test_file_by_url_finds_library_package_file() { #[test] fn test_file_by_url_finds_orphan_file() { let mut db = TestDb::new(); - let file = File::new(&db, file_url("untitled.R"), String::new(), None); + let file = File::new(&db, file_url("untitled.R"), String::new()); db.orphan_root().set_files(&mut db).to(vec![file]); assert_eq!(db.file_by_url(&file_url("untitled.R")), Some(file)); @@ -90,8 +89,8 @@ fn test_root_url_index_invalidates_per_root() { let mut db = TestDb::new(); let root_a = workspace_root(&db, "a"); let root_b = workspace_root(&db, "b"); - let file_a = File::new(&db, file_url("a/file.R"), String::new(), None); - let file_b = File::new(&db, file_url("b/file.R"), String::new(), None); + let file_a = File::new(&db, file_url("a/file.R"), String::new()); + let file_b = File::new(&db, file_url("b/file.R"), String::new()); root_a.set_scripts(&mut db).to(vec![file_a]); root_b.set_scripts(&mut db).to(vec![file_b]); db.workspace_roots() @@ -106,7 +105,7 @@ fn test_root_url_index_invalidates_per_root() { assert_eq!(db.executions("root_url_index"), 2); // Add a script to root B. B's index invalidates; A's stays cached. - let file_b2 = File::new(&db, file_url("b/other.R"), String::new(), None); + let file_b2 = File::new(&db, file_url("b/other.R"), String::new()); root_b.set_scripts(&mut db).to(vec![file_b, file_b2]); // Look up the file in A. A's index is still cached, no re-exec. diff --git a/crates/oak_db/src/tests/file.rs b/crates/oak_db/src/tests/file.rs index f2cf9c461..a0d60d21d 100644 --- a/crates/oak_db/src/tests/file.rs +++ b/crates/oak_db/src/tests/file.rs @@ -9,7 +9,7 @@ use crate::File; /// touching the orphan/workspace bucketing logic that's exercised in /// `oak_storage/tests/`. fn new_file(db: &mut TestDb, name: &str, contents: &str) -> File { - File::new(db, file_url(name), contents.to_string(), None) + File::new(db, file_url(name), contents.to_string()) } #[test] @@ -56,7 +56,7 @@ fn test_semantic_index_matches_oak_semantic() { let db = TestDb::new(); let source = "x <- 1\nx\n"; let url = file_url("a.R"); - let file = File::new(&db, url.clone(), source.to_string(), None); + let file = File::new(&db, url.clone(), source.to_string()); let via_salsa = file.semantic_index(&db); diff --git a/crates/oak_db/src/tests/file_exports.rs b/crates/oak_db/src/tests/file_exports.rs index e9d4a2571..1705e6a20 100644 --- a/crates/oak_db/src/tests/file_exports.rs +++ b/crates/oak_db/src/tests/file_exports.rs @@ -16,7 +16,7 @@ fn setup_workspace(db: &mut TestDb, scripts: &[(&str, &str)]) -> Vec { let root = workspace_root(db, "w"); let files: Vec = scripts .iter() - .map(|(name, contents)| File::new(db, file_url(name), contents.to_string(), None)) + .map(|(name, contents)| File::new(db, file_url(name), contents.to_string())) .collect(); root.set_scripts(db).to(files.clone()); db.workspace_roots().set_roots(db).to(vec![root]); diff --git a/crates/oak_db/src/tests/file_imports.rs b/crates/oak_db/src/tests/file_imports.rs index dd9df4ef2..a7e783c8d 100644 --- a/crates/oak_db/src/tests/file_imports.rs +++ b/crates/oak_db/src/tests/file_imports.rs @@ -19,7 +19,7 @@ fn make_installed(db: &mut TestDb, name: &str) -> (crate::Root, Package) { let root = library_root(db, &format!("libs/{name}")); let pkg = Package::new( db, - root, + file_url(&format!("libs/{name}/DESCRIPTION")), name.to_string(), Some("1.0.0".to_string()), Namespace::default(), @@ -51,7 +51,7 @@ fn test_script_with_no_attaches_returns_only_default_search_path() { let base = packages[0]; let stats = packages[1]; - let file = File::new(&db, file_url("a.R"), "x <- 1\n".to_string(), None); + let file = File::new(&db, file_url("a.R"), "x <- 1\n".to_string()); let layers = file.imports(&db); // Only `stats` and `base` are registered in this test; the other @@ -77,7 +77,6 @@ fn test_script_attach_produces_package_exports_layer_in_lifo_order() { &db, file_url("a.R"), "library(dplyr)\nlibrary(ggplot2)\n".to_string(), - None, ); let layers = file.imports(&db); @@ -98,7 +97,7 @@ fn test_script_attach_produces_package_exports_layer_in_lifo_order() { fn test_script_attach_to_unregistered_package_drops_layer() { let db = TestDb::new(); // No `dplyr` in any library root. - let file = File::new(&db, file_url("a.R"), "library(dplyr)\n".to_string(), None); + let file = File::new(&db, file_url("a.R"), "library(dplyr)\n".to_string()); let layers = file.imports(&db); assert!(layers.is_empty()); @@ -125,25 +124,15 @@ fn test_package_file_emits_namespace_and_collation_layers() { let workspace = workspace_root(&db, "w"); let pkg = Package::new( &db, - workspace, + file_url("w/pkg/DESCRIPTION"), "pkg".to_string(), None, namespace, Vec::new(), None, ); - let first = File::new( - &db, - file_url("w/pkg/R/_a.R"), - "first <- 1\n".to_string(), - Some(pkg), - ); - let second = File::new( - &db, - file_url("w/pkg/R/b.R"), - "second <- 2\n".to_string(), - Some(pkg), - ); + let first = File::new(&db, file_url("w/pkg/R/_a.R"), "first <- 1\n".to_string()); + let second = File::new(&db, file_url("w/pkg/R/b.R"), "second <- 2\n".to_string()); pkg.set_files(&mut db).to(vec![first, second]); workspace.set_packages(&mut db).to(vec![pkg]); db.workspace_roots().set_roots(&mut db).to(vec![workspace]); @@ -189,7 +178,7 @@ fn test_imports_is_cached_per_file() { let mut db = TestDb::new(); let _ = install_packages(&mut db, &["dplyr"]); - let file = File::new(&db, file_url("a.R"), "library(dplyr)\n".to_string(), None); + let file = File::new(&db, file_url("a.R"), "library(dplyr)\n".to_string()); let _ = file.imports(&db); let _ = file.imports(&db); diff --git a/crates/oak_db/src/tests/file_imports_at.rs b/crates/oak_db/src/tests/file_imports_at.rs index 85ba11cc7..63416f6f6 100644 --- a/crates/oak_db/src/tests/file_imports_at.rs +++ b/crates/oak_db/src/tests/file_imports_at.rs @@ -12,11 +12,7 @@ use crate::ImportLayer; use crate::Package; fn make_file(db: &mut TestDb, path: &str, contents: &str) -> File { - File::new(db, file_url(path), contents.to_string(), None) -} - -fn make_package_file(db: &mut TestDb, path: &str, contents: &str, package: Package) -> File { - File::new(db, file_url(path), contents.to_string(), Some(package)) + File::new(db, file_url(path), contents.to_string()) } /// Register a set of installed packages on `LibraryRoots`, one library @@ -28,7 +24,7 @@ fn install_packages(db: &mut TestDb, names: &[&str]) -> Vec { let root = library_root(db, &format!("libs/{name}")); let pkg = Package::new( db, - root, + file_url(&format!("libs/{name}/DESCRIPTION")), name.to_string(), Some("1.0.0".to_string()), Namespace::default(), @@ -49,7 +45,7 @@ fn install_workspace_package(db: &mut TestDb, name: &str) -> Package { let root = workspace_root(db, &format!("workspace/{name}")); let pkg = Package::new( db, - root, + file_url(&format!("workspace/{name}/DESCRIPTION")), name.to_string(), None, Namespace::default(), @@ -148,10 +144,10 @@ fn test_package_top_level_sees_predecessor_files_only() { let _ = install_packages(&mut db, &["base"]); let pkg = install_workspace_package(&mut db, "pkg"); - let a = make_package_file(&mut db, "/w/pkg/R/a.R", "first <- 1\n", pkg); + let a = make_file(&mut db, "/w/pkg/R/a.R", "first <- 1\n"); let b_source = "x <- 1\n"; - let b = make_package_file(&mut db, "/w/pkg/R/b.R", b_source, pkg); - let c = make_package_file(&mut db, "/w/pkg/R/c.R", "second <- 2\n", pkg); + let b = make_file(&mut db, "/w/pkg/R/b.R", b_source); + let c = make_file(&mut db, "/w/pkg/R/c.R", "second <- 2\n"); pkg.set_files(&mut db).to(vec![a, b, c]); // Cursor at top-level in b. Only a (the predecessor in `Package.files`) @@ -167,10 +163,10 @@ fn test_package_function_body_sees_other_package_files_in_lifo_order() { let _ = install_packages(&mut db, &["base"]); let pkg = install_workspace_package(&mut db, "pkg"); - let a = make_package_file(&mut db, "/w/pkg/R/a.R", "first <- 1\n", pkg); + let a = make_file(&mut db, "/w/pkg/R/a.R", "first <- 1\n"); let b_source = "f <- function() {\n x\n}\n"; - let b = make_package_file(&mut db, "/w/pkg/R/b.R", b_source, pkg); - let c = make_package_file(&mut db, "/w/pkg/R/c.R", "second <- 2\n", pkg); + let b = make_file(&mut db, "/w/pkg/R/b.R", b_source); + let c = make_file(&mut db, "/w/pkg/R/c.R", "second <- 2\n"); pkg.set_files(&mut db).to(vec![a, b, c]); // Cursor inside f's body. Full lazy view (same as `imports()`). @@ -190,10 +186,10 @@ fn test_package_top_level_predecessors_appear_in_lifo_order() { let _ = install_packages(&mut db, &["base"]); let pkg = install_workspace_package(&mut db, "pkg"); - let a = make_package_file(&mut db, "/w/pkg/R/a.R", "first <- 1\n", pkg); - let b = make_package_file(&mut db, "/w/pkg/R/b.R", "second <- 2\n", pkg); + let a = make_file(&mut db, "/w/pkg/R/a.R", "first <- 1\n"); + let b = make_file(&mut db, "/w/pkg/R/b.R", "second <- 2\n"); let c_source = "x <- 1\n"; - let c = make_package_file(&mut db, "/w/pkg/R/c.R", c_source, pkg); + let c = make_file(&mut db, "/w/pkg/R/c.R", c_source); pkg.set_files(&mut db).to(vec![a, b, c]); let offset = TextSize::from(c_source.find('x').unwrap() as u32); @@ -209,7 +205,7 @@ fn test_package_namespace_and_base_layers_always_visible() { let base = packages[0]; let pkg = install_workspace_package(&mut db, "pkg"); - let file = make_package_file(&mut db, "/w/pkg/R/a.R", "x <- 1\n", pkg); + let file = make_file(&mut db, "/w/pkg/R/a.R", "x <- 1\n"); pkg.set_files(&mut db).to(vec![file]); let layers = file.imports_at(&db, TextSize::from(0)); diff --git a/crates/oak_db/src/tests/file_resolve.rs b/crates/oak_db/src/tests/file_resolve.rs index d0927bd1b..af842f973 100644 --- a/crates/oak_db/src/tests/file_resolve.rs +++ b/crates/oak_db/src/tests/file_resolve.rs @@ -15,7 +15,7 @@ fn setup_workspace(db: &mut TestDb, scripts: &[(&str, &str)]) -> Vec { let root = workspace_root(db, "w"); let files: Vec = scripts .iter() - .map(|(name, contents)| File::new(db, file_url(name), contents.to_string(), None)) + .map(|(name, contents)| File::new(db, file_url(name), contents.to_string())) .collect(); root.set_scripts(db).to(files.clone()); db.workspace_roots().set_roots(db).to(vec![root]); @@ -281,7 +281,7 @@ fn test_resolve_unbound_name_in_package_does_not_cycle() { let workspace = workspace_root(&db, "w/pkg"); let pkg = crate::Package::new( &db, - workspace, + file_url("/w/pkg/DESCRIPTION"), "pkg".to_string(), None, oak_package_metadata::namespace::Namespace::default(), @@ -289,18 +289,8 @@ fn test_resolve_unbound_name_in_package_does_not_cycle() { None, ); - let a = File::new( - &db, - file_url("/w/pkg/R/a.R"), - "x <- 1\n".to_string(), - Some(pkg), - ); - let b = File::new( - &db, - file_url("/w/pkg/R/b.R"), - "y <- 2\n".to_string(), - Some(pkg), - ); + let a = File::new(&db, file_url("/w/pkg/R/a.R"), "x <- 1\n".to_string()); + let b = File::new(&db, file_url("/w/pkg/R/b.R"), "y <- 2\n".to_string()); pkg.set_files(&mut db).to(vec![a, b]); workspace.set_packages(&mut db).to(vec![pkg]); db.workspace_roots().set_roots(&mut db).to(vec![workspace]); @@ -320,7 +310,7 @@ fn test_resolve_walks_package_files_for_lazy_lookups() { let workspace = workspace_root(&db, "w/pkg"); let pkg = crate::Package::new( &db, - workspace, + file_url("/w/pkg/DESCRIPTION"), "pkg".to_string(), None, oak_package_metadata::namespace::Namespace::default(), @@ -328,17 +318,11 @@ fn test_resolve_walks_package_files_for_lazy_lookups() { None, ); - let a = File::new( - &db, - file_url("/w/pkg/R/a.R"), - "shared <- 1\n".to_string(), - Some(pkg), - ); + let a = File::new(&db, file_url("/w/pkg/R/a.R"), "shared <- 1\n".to_string()); let b = File::new( &db, file_url("/w/pkg/R/b.R"), "use_shared <- function() shared\n".to_string(), - Some(pkg), ); pkg.set_files(&mut db).to(vec![a, b]); workspace.set_packages(&mut db).to(vec![pkg]); diff --git a/crates/oak_db/src/tests/file_resolve_at.rs b/crates/oak_db/src/tests/file_resolve_at.rs index 32d6f7af5..01559e390 100644 --- a/crates/oak_db/src/tests/file_resolve_at.rs +++ b/crates/oak_db/src/tests/file_resolve_at.rs @@ -11,11 +11,7 @@ use crate::Package; use crate::Root; fn make_file(db: &mut TestDb, path: &str, contents: &str) -> File { - File::new(db, file_url(path), contents.to_string(), None) -} - -fn make_package_file(db: &mut TestDb, path: &str, contents: &str, package: Package) -> File { - File::new(db, file_url(path), contents.to_string(), Some(package)) + File::new(db, file_url(path), contents.to_string()) } /// Set up a workspace root with the given scripts (top-level files with @@ -40,7 +36,7 @@ fn install_workspace_package(db: &mut TestDb, name: &str) -> (Root, Package) { let root = workspace_root(db, &format!("workspace/{name}")); let pkg = Package::new( db, - root, + file_url(&format!("workspace/{name}/DESCRIPTION")), name.to_string(), None, Namespace::default(), @@ -146,9 +142,9 @@ fn test_resolves_package_sibling_predecessor() { let mut db = TestDb::new(); let (_root, pkg) = install_workspace_package(&mut db, "pkg"); - let a = make_package_file(&mut db, "workspace/pkg/R/a.R", "shared <- 1\n", pkg); + let a = make_file(&mut db, "workspace/pkg/R/a.R", "shared <- 1\n"); let b_source = "use_shared <- function() shared\n"; - let b = make_package_file(&mut db, "workspace/pkg/R/b.R", b_source, pkg); + let b = make_file(&mut db, "workspace/pkg/R/b.R", b_source); pkg.set_files(&mut db).to(vec![a, b]); // Cursor on `shared` inside `b`'s function body. Lexical walk finds diff --git a/crates/oak_db/src/tests/file_root.rs b/crates/oak_db/src/tests/file_root.rs index 9aca49c2b..7faa65ab3 100644 --- a/crates/oak_db/src/tests/file_root.rs +++ b/crates/oak_db/src/tests/file_root.rs @@ -12,7 +12,7 @@ use crate::Package; #[test] fn test_root_returns_none_for_orphan_file_outside_workspace() { let db = OakDatabase::new(); - let file = File::new(&db, file_url("orphan.R"), String::new(), None); + let file = File::new(&db, file_url("orphan.R"), String::new()); assert_eq!(file.root(&db), None); } @@ -23,7 +23,7 @@ fn test_root_finds_containing_workspace_for_orphan_file() { let workspace = workspace_root(&db, "proj"); db.workspace_roots().set_roots(&mut db).to(vec![workspace]); - let file = File::new(&db, file_url("proj/scripts/foo.R"), String::new(), None); + let file = File::new(&db, file_url("proj/scripts/foo.R"), String::new()); assert_eq!(file.root(&db), Some(workspace)); } @@ -36,56 +36,57 @@ fn test_root_returns_longest_prefix_for_orphan_file() { .set_roots(&mut db) .to(vec![outer, inner]); - let inner_file = File::new(&db, file_url("proj/inner/foo.R"), String::new(), None); + let inner_file = File::new(&db, file_url("proj/inner/foo.R"), String::new()); assert_eq!(inner_file.root(&db), Some(inner)); - let outer_file = File::new(&db, file_url("proj/foo.R"), String::new(), None); + let outer_file = File::new(&db, file_url("proj/foo.R"), String::new()); assert_eq!(outer_file.root(&db), Some(outer)); } #[test] fn test_root_dispatches_through_library_package_when_set() { - let db = OakDatabase::new(); + let mut db = OakDatabase::new(); let pkg_root = library_root(&db, "libs/mypkg"); let pkg = Package::new( &db, - pkg_root, + file_url("libs/mypkg/DESCRIPTION"), "mypkg".to_string(), Some("1.0.0".to_string()), Namespace::default(), Vec::new(), None, ); + // File placed in `pkg.files`. `root()` derives the package from that + // containment and dispatches through `Db::root_by_package` rather than + // falling back to the URL-prefix walk against workspace roots. + let file = File::new(&db, file_url("libs/mypkg/R/foo.R"), String::new()); + pkg.set_files(&mut db).to(vec![file]); + pkg_root.set_packages(&mut db).to(vec![pkg]); + db.library_roots().set_roots(&mut db).to(vec![pkg_root]); - // File created with package back-pointer set. `root()` dispatches - // through `Package.root` rather than falling back to the URL-prefix - // walk against workspace roots. - let file = File::new( - &db, - file_url("libs/mypkg/R/foo.R"), - String::new(), - Some(pkg), - ); assert_eq!(file.root(&db), Some(pkg_root)); } #[test] fn test_root_dispatches_through_workspace_package_when_set() { - // Same dispatch as the library case, but the package's `root` is a - // `Workspace` kind. The url-prefix fallback is *not* consulted here - // because `package` is set. - let db = OakDatabase::new(); + // Same dispatch as the library case, but the owning root is a + // `Workspace` kind. The URL-prefix fallback is *not* consulted here + // because the file belongs to a package. + let mut db = OakDatabase::new(); let pkg_root = workspace_root(&db, "proj"); let pkg = Package::new( &db, - pkg_root, + file_url("proj/DESCRIPTION"), "mypkg".to_string(), None, Namespace::default(), Vec::new(), None, ); + let file = File::new(&db, file_url("proj/R/foo.R"), String::new()); + pkg.set_files(&mut db).to(vec![file]); + pkg_root.set_packages(&mut db).to(vec![pkg]); + db.workspace_roots().set_roots(&mut db).to(vec![pkg_root]); - let file = File::new(&db, file_url("proj/R/foo.R"), String::new(), Some(pkg)); assert_eq!(file.root(&db), Some(pkg_root)); } diff --git a/crates/oak_db/src/tests/inputs.rs b/crates/oak_db/src/tests/inputs.rs index 448bcf2af..bbc394ae2 100644 --- a/crates/oak_db/src/tests/inputs.rs +++ b/crates/oak_db/src/tests/inputs.rs @@ -15,7 +15,7 @@ fn make_workspace_package(db: &mut OakDatabase, name: &str) -> (Root, Package) { let root = workspace_root(db, &format!("workspace/{name}")); let pkg = Package::new( db, - root, + file_url(&format!("workspace/{name}/DESCRIPTION")), name.to_string(), None, Namespace::default(), @@ -30,7 +30,7 @@ fn make_installed_package(db: &mut OakDatabase, name: &str) -> (Root, Package) { let root = library_root(db, &format!("libs/{name}")); let pkg = Package::new( db, - root, + file_url(&format!("libs/{name}/DESCRIPTION")), name.to_string(), Some("1.0.0".to_string()), Namespace::default(), @@ -42,7 +42,7 @@ fn make_installed_package(db: &mut OakDatabase, name: &str) -> (Root, Package) { } fn make_script(db: &mut OakDatabase, name: &str) -> File { - File::new(db, file_url(name), String::new(), None) + File::new(db, file_url(name), String::new()) } #[test] diff --git a/crates/oak_db/src/tests/resolver.rs b/crates/oak_db/src/tests/resolver.rs index 4f4f6370b..72c5207e2 100644 --- a/crates/oak_db/src/tests/resolver.rs +++ b/crates/oak_db/src/tests/resolver.rs @@ -11,7 +11,7 @@ use crate::File; use crate::Root; fn make_script(db: &mut TestDb, name: &str, contents: &str) -> File { - File::new(db, file_url(name), contents.to_string(), None) + File::new(db, file_url(name), contents.to_string()) } /// Build a fresh workspace root, attach the given scripts, register @@ -328,9 +328,8 @@ fn test_source_anchors_relative_to_workspace_root() { &db, file_url("proj/sub/a.R"), "source(\"b.R\")\n".to_string(), - None, ); - let b = File::new(&db, file_url("proj/b.R"), "x <- 1\n".to_string(), None); + let b = File::new(&db, file_url("proj/b.R"), "x <- 1\n".to_string()); root.set_scripts(&mut db).to(vec![a, b]); db.workspace_roots().set_roots(&mut db).to(vec![root]); @@ -343,13 +342,8 @@ fn test_source_anchors_to_parent_dir_when_no_workspace() { // Calling file isn't under any workspace root, so the anchor // falls back to the file's own parent directory. let mut db = TestDb::new(); - let a = File::new( - &db, - file_url("dir/a.R"), - "source(\"b.R\")\n".to_string(), - None, - ); - let b = File::new(&db, file_url("dir/b.R"), "x <- 1\n".to_string(), None); + let a = File::new(&db, file_url("dir/a.R"), "source(\"b.R\")\n".to_string()); + let b = File::new(&db, file_url("dir/b.R"), "x <- 1\n".to_string()); db.orphan_root().set_files(&mut db).to(vec![a, b]); let index = a.semantic_index(&db); @@ -365,9 +359,8 @@ fn test_source_path_with_parent_dir_segments() { &db, file_url("dir/sub/a.R"), "source(\"../b.R\")\n".to_string(), - None, ); - let b = File::new(&db, file_url("dir/b.R"), "x <- 1\n".to_string(), None); + let b = File::new(&db, file_url("dir/b.R"), "x <- 1\n".to_string()); db.orphan_root().set_files(&mut db).to(vec![a, b]); let index = a.semantic_index(&db); diff --git a/crates/oak_db/src/tests/test_db.rs b/crates/oak_db/src/tests/test_db.rs index c98d5b93b..df940712e 100644 --- a/crates/oak_db/src/tests/test_db.rs +++ b/crates/oak_db/src/tests/test_db.rs @@ -18,6 +18,7 @@ use crate::LibraryRoots; use crate::OrphanRoot; use crate::Root; use crate::RootKind; +use crate::StaleRoot; use crate::WorkspaceRoots; type Events = Arc>>; @@ -30,6 +31,7 @@ pub(super) struct TestDb { workspace_roots: Arc>, library_roots: Arc>, orphan_root: Arc>, + stale_root: Arc>, } impl TestDb { @@ -47,6 +49,7 @@ impl TestDb { workspace_roots: Arc::new(OnceLock::new()), library_roots: Arc::new(OnceLock::new()), orphan_root: Arc::new(OnceLock::new()), + stale_root: Arc::new(OnceLock::new()), } } @@ -89,6 +92,10 @@ impl DbInputs for TestDb { fn orphan_root(&self) -> OrphanRoot { *self.orphan_root.get_or_init(|| OrphanRoot::empty(self)) } + + fn stale_root(&self) -> StaleRoot { + *self.stale_root.get_or_init(|| StaleRoot::empty(self)) + } } #[salsa::db] @@ -100,6 +107,18 @@ impl Db for TestDb { fn package_by_name(&self, name: &str) -> Option { crate::db::package_by_name_query(self, name) } + + fn package_by_url(&self, url: &UrlId) -> Option { + crate::db::package_by_url_query(self, url) + } + + fn root_by_package(&self, pkg: crate::Package) -> Option { + crate::db::root_by_package_query(self, pkg) + } + + fn live_roots(&self) -> &[crate::LiveRoot] { + crate::db::live_roots_query(self) + } } pub(super) fn file_url(name: &str) -> UrlId { diff --git a/crates/oak_scan/Cargo.toml b/crates/oak_scan/Cargo.toml new file mode 100644 index 000000000..f53a05dfa --- /dev/null +++ b/crates/oak_scan/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "oak_scan" +version = "0.1.0" +description = """ +Scanners and update helpers for `oak_db`. Owns every input mutation that +crosses a single salsa setter (placement of files in buckets, atomic +package replacement, etc.). Read-only callers stay in `oak_db`. +""" + +authors.workspace = true +edition.workspace = true +license.workspace = true +rust-version.workspace = true + +[lints] +workspace = true + +[dependencies] +aether_url.workspace = true +log.workspace = true +oak_db.workspace = true +oak_package_metadata.workspace = true +salsa.workspace = true +stdext.workspace = true +url.workspace = true +walkdir.workspace = true + +[dev-dependencies] +tempfile.workspace = true diff --git a/crates/oak_scan/src/inputs.rs b/crates/oak_scan/src/inputs.rs new file mode 100644 index 000000000..88df692b2 --- /dev/null +++ b/crates/oak_scan/src/inputs.rs @@ -0,0 +1,241 @@ +//! Update helpers for `oak_db`. Each helper drives one update pattern +//! that a single salsa setter can't safely express on its own. +//! +//! The helpers preserve entity identity across rescans: a `File` is +//! keyed by URL, a `Package` by its `DESCRIPTION` name within its root, +//! and a `Root` by its path. Repeat scans reuse the existing entities +//! and update their fields in place rather than minting new ones. This +//! keeps downstream salsa caches (parse, semantic_index, `Definition` +//! entities for goto-def) stable across changes that don't actually +//! touch a given file's content. +//! +//! Placement is single-source-of-truth: a file belongs to whichever +//! container Vec holds it (`pkg.files`, `root.scripts`, or +//! `orphan_root().files`), and `File::package` derives ownership from +//! that. These helpers keep a file in exactly one container as it moves, +//! so the derived lookup stays unambiguous. + +use std::collections::HashSet; +use std::path::PathBuf; + +use aether_url::UrlId; +use oak_db::stale_file_by_url; +use oak_db::Db; +use oak_db::DbInputs; +use oak_db::File; +use oak_db::Package; +use oak_db::Root; +use oak_package_metadata::namespace::Namespace; +use salsa::Setter; + +/// Description of one R file the scanner wants to register. +/// +/// `contents` is the on-disk snapshot at scan time. It's used as the +/// initial content whenever the helper mints a new `File` entity, i.e. +/// the first time a URL is seen, whether at the initial scan or on a +/// later rescan that discovers a newly-created file. +/// +/// If a `File` already exists at this URL (scanner-created from an +/// earlier scan, or VFS-created via `didOpen`), the helpers reuse that +/// entity and leave its content alone. `set_contents` (driven by the +/// VFS) is the authoritative way to update content. +#[derive(Clone, Debug)] +pub struct FileEntry { + pub url: UrlId, + pub contents: String, +} + +/// Extension methods on the database for scanner orchestration and +/// placement-aware updates that don't have a natural `Root` receiver. +pub trait DbExt: Db + DbInputs { + /// Reconcile `LibraryRoots` to exactly `paths`. + /// + /// - Paths already present as a `Root`: untouched. No fs walk, no + /// salsa churn. + /// - New paths: scanned and added. + /// - Removed paths: their `Root` is dropped and the contained `File` + /// and `Package` entities move to [`oak_db::StaleRoot`] so that + /// a later call that brings the same path back reuses the same + /// entities (Salsa never GCs them since they are inputs). + /// + /// Order in `LibraryRoots.roots` follows `paths`, matching R's + /// `.libPaths()` precedence. + fn set_library_paths(&mut self, paths: &[PathBuf]); +} + +impl DbExt for DB { + fn set_library_paths(&mut self, paths: &[PathBuf]) { + crate::library::set_library_paths(self, paths); + } +} + +/// Extension methods on [`Root`] for placement-aware updates. +/// +/// These are the public surface for scanners and the LSP to push their +/// findings into the salsa input graph. Implementations live in +/// `oak_scan` because they coordinate across multiple input fields +/// (`Root.scripts`, `Package.files`, `File.package`) in ways the raw +/// salsa setters can't express on their own. +pub trait RootExt { + /// Create or update a package under this root. Atomic + /// full-replacement of the package's file set. + /// + /// Identity is keyed on `description_url`: if `self.packages` + /// already contains a `Package` with this URL, that entity is + /// reused and its `name` / `version` / `namespace` / `collation` + /// fields are updated in place. Salsa backdates each setter call + /// when the value doesn't actually change. + /// + /// Files are reused by URL via [`Db::file_by_url`]; see + /// [`FileEntry`] for the content-preservation semantics. Wiring + /// the returned `Package` into `self.packages` is the caller's + /// job. + fn set_package( + self, + db: &mut DB, + description_url: UrlId, + name: String, + version: Option, + namespace: Namespace, + files: Vec, + collation: Option>, + ) -> Package; + + /// Drop this root from its live container, rehoming its files and packages + /// so they survive the eviction for later reuse. + /// + /// `editor_owned` is `None` for callers without an editor concept (the + /// library scanner) and `Some(&set)` for the workspace scanner. Files in + /// `editor_owned` go to `OrphanRoot` (still analysis-visible since the + /// buffer is open). Everything else goes to `StaleRoot` + /// (analysis-invisible, available for entity reuse on the next + /// `set_*_paths` call). `Package` entities always go to stale. + /// + /// Doesn't touch `LibraryRoots` / `WorkspaceRoots`. The caller is + /// responsible for rebuilding those Vec inputs with `self` excluded. + fn set_stale(self, db: &mut DB, editor_owned: Option<&HashSet>); +} + +impl RootExt for Root { + fn set_package( + self, + db: &mut DB, + description_url: UrlId, + name: String, + version: Option, + namespace: Namespace, + files: Vec, + collation: Option>, + ) -> Package { + // `package_by_url()` finds the existing entity whether it's already + // in `self.packages` (rescan, common path) or in + // `stale_root.packages` (resurrection after a previous eviction). + // Either way we reuse the entity and refresh its metadata fields. + let pkg = match db.package_by_url(&description_url) { + Some(p) => { + p.set_name(db).to(name); + p.set_version(db).to(version); + p.set_namespace(db).to(namespace); + p.set_collation(db).to(collation); + remove_from_stale_packages(db, p); + p + }, + None => Package::new( + db, + description_url, + name, + version, + namespace, + Vec::new(), + collation, + ), + }; + + let file_entities: Vec = files + .into_iter() + .map(|entry| upsert_file(db, Some(pkg), entry)) + .collect(); + + pkg.set_files(db).to(file_entities); + pkg + } + + fn set_stale(self, db: &mut DB, editor_owned: Option<&HashSet>) { + crate::stale::set_root_stale(db, self, editor_owned); + } +} + +fn upsert_file(db: &mut DB, package: Option, entry: FileEntry) -> File { + if let Some(old) = db.file_by_url(&entry.url) { + // The caller will place this file in `package`'s `files` vec. Two + // cleanups keep the file in exactly one live container, so the + // derived `File::package` stays unambiguous: + // + // - If the file currently belongs to a *different* package, drop it + // from that package's `files` vec; otherwise both would list it. + // Normally a no-op, since a file's package is fixed by its path: + // this only fires in the pathological nested-root case where two + // packages claim the same URL. + // + // - If the file was in `OrphanRoot.files` (typically because the + // editor had it open before a scan classified it), remove it. + let old_package = old.package(db); + if old_package != package { + if let Some(old_pkg) = old_package { + remove_from_pkg_files(db, old_pkg, old); + } + } + remove_from_orphan(db, old); + return old; + } + + if let Some(stale) = stale_file_by_url(db, &entry.url) { + // Resurrecting an evicted File. Restore disk contents (the editor-owned + // variant lives in `orphan_root` instead; this branch only sees + // scanner-discovered files). The caller places it into `package.files`. + stale.set_contents(db).to(entry.contents); + remove_from_stale_files(db, stale); + return stale; + } + + File::new(db, entry.url, entry.contents) +} + +fn remove_from_pkg_files(db: &mut DB, pkg: Package, file: File) { + if !pkg.files(db).contains(&file) { + return; + } + let mut files = pkg.files(db).clone(); + files.retain(|f| *f != file); + pkg.set_files(db).to(files); +} + +fn remove_from_orphan(db: &mut DB, file: File) { + let orphan = db.orphan_root(); + if !orphan.files(db).contains(&file) { + return; + } + let mut files = orphan.files(db).clone(); + files.retain(|f| *f != file); + orphan.set_files(db).to(files); +} + +fn remove_from_stale_files(db: &mut DB, file: File) { + let stale = db.stale_root(); + if !stale.files(db).contains(&file) { + return; + } + let mut files = stale.files(db).clone(); + files.retain(|f| *f != file); + stale.set_files(db).to(files); +} + +fn remove_from_stale_packages(db: &mut DB, pkg: Package) { + let stale = db.stale_root(); + if !stale.packages(db).contains(&pkg) { + return; + } + let mut packages = stale.packages(db).clone(); + packages.retain(|p| *p != pkg); + stale.set_packages(db).to(packages); +} diff --git a/crates/oak_scan/src/lib.rs b/crates/oak_scan/src/lib.rs new file mode 100644 index 000000000..caf8f43ae --- /dev/null +++ b/crates/oak_scan/src/lib.rs @@ -0,0 +1,35 @@ +//! Scanners and update helpers for [`oak_db`]. +//! +//! `oak_db` is read-only: queries that walk the input graph. `oak_scan` +//! is the write side: it walks the filesystem, parses package metadata, +//! and pushes the result into salsa inputs through a small set of named +//! helpers. +//! +//! Identity is preserved across rescans. A `File` is keyed by URL, a +//! `Package` by its `DESCRIPTION` name within its root, and a `Root` by +//! its path. Repeat scans reuse existing entities and update their +//! fields in place, so downstream salsa caches (parse, semantic_index, +//! `Definition` entities for goto-def) stay valid across edits that +//! don't actually touch a given file's content. This matters for +//! workflows like git branch switching, where files routinely +//! disappear and reappear: we keep the same `File` entity instead of +//! minting fresh ones each time. Since Salsa doesn't do garbage collection, +//! recreating files would grow the cache unboundedly. +//! +//! The trade-off is a small placement invariant: `file.package` must +//! agree with which container Vec holds the file (`pkg.files`, +//! `root.scripts`, or `orphan_root().files`). The helpers in this +//! crate are the only intended callers of the placement-affecting +//! setters on `oak_db`'s input structs. + +mod inputs; +mod library; +mod packages; +mod stale; + +#[cfg(test)] +mod tests; + +pub use inputs::DbExt; +pub use inputs::FileEntry; +pub use inputs::RootExt; diff --git a/crates/oak_scan/src/library.rs b/crates/oak_scan/src/library.rs new file mode 100644 index 000000000..fc31a8ef6 --- /dev/null +++ b/crates/oak_scan/src/library.rs @@ -0,0 +1,100 @@ +//! Library scanner. Drives `LibraryRoots` from `.libPaths()` entries. +//! +//! [`set_library_paths`] is declarative: it reconciles the live set of +//! library roots to exactly the paths it's given. Unchanged paths are +//! left alone (no fs walk, no salsa churn). Removed paths' entities go +//! to [`oak_db::StaleRoot`] so re-adding the same path later reuses the +//! same `File` and `Package` entities (Salsa doesn't GC inputs). +//! +//! Installed packages don't have a watcher today: the LSP currently +//! calls this once at init. The diff path is here ahead of need so +//! libraries and workspaces share the same eviction story. + +use std::collections::HashMap; +use std::collections::HashSet; +use std::path::Path; +use std::path::PathBuf; + +use aether_url::UrlId; +use oak_db::Db; +use oak_db::DbInputs; +use oak_db::Package; +use oak_db::Root; +use oak_db::RootKind; +use salsa::Setter; +use walkdir::WalkDir; + +use crate::inputs::RootExt; +use crate::packages::read_package; + +/// Reconcile `LibraryRoots` to exactly `paths`. Called through +/// [`crate::DbExt::set_library_paths`]. Order in `LibraryRoots.roots` +/// follows `paths`, matching R's `.libPaths()` precedence. +pub(crate) fn set_library_paths(db: &mut DB, paths: &[PathBuf]) { + let new: Vec<(PathBuf, UrlId)> = paths + .iter() + .filter_map(|p| { + let url = UrlId::from_file_path(p).ok()?; + Some((p.clone(), url)) + }) + .collect(); + let new_urls: HashSet = new.iter().map(|(_, u)| u.clone()).collect(); + + let old: HashMap = db + .library_roots() + .roots(db) + .iter() + .map(|r| (r.path(db).clone(), *r)) + .collect(); + + // Evict roots not in the new set. Since library files aren't editor-owned, + // we pass `None` so everything routes to stale. + for (old_url, &old_root) in &old { + if !new_urls.contains(old_url) { + old_root.set_stale(db, None); + } + } + + // Build the new roots list in order: reuse the existing `Root` for + // unchanged paths (no rescan, that's handled by the watcher), scan the rest. + let mut new_roots = Vec::with_capacity(new.len()); + for (path, url) in new { + let root = match old.get(&url) { + Some(&r) => r, + None => scan_new_library_path(db, &path, url), + }; + new_roots.push(root); + } + db.library_roots().set_roots(db).to(new_roots); +} + +/// Initial scan of a path that wasn't previously a library root. Walks depth-1, +/// calls `set_package` per package directory, returns the freshly-built `Root`. +fn scan_new_library_path(db: &mut DB, path: &Path, url: UrlId) -> Root { + let root = Root::new(db, url, RootKind::Library, Vec::new(), Vec::new()); + + let mut packages: Vec = Vec::new(); + for entry in WalkDir::new(path).max_depth(1).min_depth(1) { + let Ok(entry) = entry else { + continue; + }; + if !entry.file_type().is_dir() { + continue; + } + let Some(pkg) = read_package(entry.path()) else { + continue; + }; + packages.push(root.set_package( + db, + pkg.description_url, + pkg.name, + pkg.version, + pkg.namespace, + pkg.files, + pkg.collation, + )); + } + + root.set_packages(db).to(packages); + root +} diff --git a/crates/oak_scan/src/packages.rs b/crates/oak_scan/src/packages.rs new file mode 100644 index 000000000..af89a1afa --- /dev/null +++ b/crates/oak_scan/src/packages.rs @@ -0,0 +1,110 @@ +//! Filesystem-level R package discovery. Pure I/O, no salsa access. +//! Reused by both the library scanner (which walks depth-1 over a +//! library directory) and the workspace scanner (which walks the +//! workspace tree looking for `DESCRIPTION` files at any depth). + +use std::fs; +use std::path::Path; +use std::path::PathBuf; + +use aether_url::UrlId; +use oak_package_metadata::description::Description; +use oak_package_metadata::namespace::Namespace; +use stdext::result::ResultExt; + +use crate::inputs::FileEntry; + +/// One package discovered on disk: its `DESCRIPTION`-derived metadata +/// plus the R files under `R/`. +#[derive(Debug)] +pub(crate) struct PackageDescriptor { + /// URL of the `DESCRIPTION` file. Stable identity for the `Package` + /// entity across rescans (see `Package::description_url`). The `name` + /// is *not* stable identity because two packages can declare the + /// same `Package:` field, and dedup picks one of them per root. + pub description_url: UrlId, + pub name: String, + pub version: Option, + pub namespace: Namespace, + pub files: Vec, + pub collation: Option>, +} + +/// Read a candidate package directory. Returns `None` if `DESCRIPTION` +/// is missing or malformed. +pub(crate) fn read_package(dir: &Path) -> Option { + let description_path = dir.join("DESCRIPTION"); + let description_text = fs::read_to_string(&description_path).ok()?; + let description = Description::parse(&description_text).log_err()?; + let description_url = UrlId::from_file_path(&description_path).ok()?; + + let namespace = fs::read_to_string(dir.join("NAMESPACE")) + .ok() + .and_then(|text| Namespace::parse(&text).log_err()) + .unwrap_or_default(); + + let files = scan_r_files(&dir.join("R")); + let collation = description.collate(); + + Some(PackageDescriptor { + description_url, + name: description.name, + version: Some(description.version), + namespace, + files, + collation, + }) +} + +/// Read every `*.R` / `*.r` file directly under `r_dir`, in alphabetical +/// order by basename. Subdirectories are skipped (the standard R package +/// layout is flat). R files that fail to read are logged at warn level +/// and skipped. Symlinks resolving to non-files (the `is_r_file` check) +/// are skipped quietly. +fn scan_r_files(r_dir: &Path) -> Vec { + let mut entries: Vec<(PathBuf, String)> = Vec::new(); + let Ok(read_dir) = fs::read_dir(r_dir) else { + // Normal for packages without an `R/` directory (data-only, + // header-only). Don't log. + return Vec::new(); + }; + + for entry in read_dir.flatten() { + let path = entry.path(); + if !is_r_file(&path) { + continue; + } + let contents = match fs::read_to_string(&path) { + Ok(c) => c, + Err(err) => { + log::warn!("Failed to read R file {}: {err}", path.display()); + continue; + }, + }; + entries.push((path, contents)); + } + + entries.sort_by(|a, b| { + let a_name = a.0.file_name().map(|n| n.to_ascii_lowercase()); + let b_name = b.0.file_name().map(|n| n.to_ascii_lowercase()); + a_name.cmp(&b_name) + }); + + entries + .into_iter() + .filter_map(|(path, contents)| { + let url = UrlId::from_file_path(&path).ok()?; + Some(FileEntry { url, contents }) + }) + .collect() +} + +fn is_r_file(path: &Path) -> bool { + if !path.is_file() { + return false; + } + let Some(ext) = path.extension() else { + return false; + }; + ext.eq_ignore_ascii_case("r") +} diff --git a/crates/oak_scan/src/stale.rs b/crates/oak_scan/src/stale.rs new file mode 100644 index 000000000..f6ff49332 --- /dev/null +++ b/crates/oak_scan/src/stale.rs @@ -0,0 +1,109 @@ +//! Eviction logic shared by the library and workspace `set_*_paths` helpers. +//! +//! When a `set_*_paths` call drops a `Root`, its `File` and `Package` entities +//! are still alive in salsa storage (no GC). We rehome them based on whether +//! they're editor-owned: +//! +//! - **Editor-owned files** -> `OrphanRoot`. The user has a buffer open; +//! the file should keep showing up in analysis even though the +//! workspace folder went away. The buffer is the source of truth for +//! its contents until `didClose`. +//! +//! `OrphanRoot` has no `packages` field, so an evicted package file +//! loses its package association: with the file in orphan and its old +//! package no longer in a live root, `File::package` derives `None` and +//! analysis treats it as a standalone script for as long as the +//! workspace is removed. If the workspace comes back, `upsert_file` +//! finds the same `File` via `OrphanRoot` and re-promotes it into +//! `pkg.files`, restoring the package context. +//! +//! - **Everything else** -> `StaleRoot`. Invisible to analysis, available +//! for entity reuse on the next `set_*_paths` that brings the path +//! back (can happen e.g. in multi-repo workflows where a workspace path is +//! added and removed). +//! +//! `Package` entities always go to stale: there's no editor-owned analogue for +//! packages. + +use std::collections::HashSet; + +use aether_url::UrlId; +use oak_db::Db; +use oak_db::DbInputs; +use oak_db::File; +use oak_db::Package; +use oak_db::Root; +use salsa::Setter; + +/// Drop `root` from its live container, rehoming files and packages to +/// `OrphanRoot` / `StaleRoot` as described in the module doc. +/// +/// `editor_owned` is `None` for callers without an editor concept (the library +/// scanner) and `Some(&set)` for the workspace scanner. Files in `editor_owned` +/// go to `OrphanRoot`. The rest go to `StaleRoot`. +/// +/// Doesn't touch `LibraryRoots` / `WorkspaceRoots`. The caller is responsible +/// for rebuilding those Vec inputs with `root` excluded. +/// +/// Internal implementation: the public API is [`crate::RootExt::set_stale`]. +pub(crate) fn set_root_stale( + db: &mut DB, + root: Root, + editor_owned: Option<&HashSet>, +) { + let packages: Vec = root.packages(db).clone(); + + let mut all_files: Vec = root.scripts(db).to_vec(); + for &pkg in &packages { + all_files.extend(pkg.files(db).iter().copied()); + } + + // Ownership is derived from live containment, so there's no back-pointer + // to clear: once these files leave the root (to orphan / stale) and the + // root's packages are emptied below, `File::package` derives `None`. + let (to_orphan, to_stale): (Vec, Vec) = all_files + .into_iter() + .partition(|f| editor_owned.is_some_and(|owned| owned.contains(f.url(db)))); + + if !to_orphan.is_empty() { + let orphan = db.orphan_root(); + let mut files = orphan.files(db).clone(); + for file in to_orphan { + if !files.contains(&file) { + files.push(file); + } + } + orphan.set_files(db).to(files); + } + + let stale = db.stale_root(); + if !to_stale.is_empty() { + let mut files = stale.files(db).clone(); + for file in to_stale { + if !files.contains(&file) { + files.push(file); + } + } + stale.set_files(db).to(files); + } + + if !packages.is_empty() { + let mut stale_packages = stale.packages(db).clone(); + for pkg in &packages { + if !stale_packages.contains(pkg) { + stale_packages.push(*pkg); + } + } + stale.set_packages(db).to(stale_packages); + } + + // Clear the dropped root's containers and each package's files vec. + // The packages themselves now live in `stale_root.packages`; keeping + // their `files` populated would leave stale references that + // `package_by_url` can resurrect with inconsistent contents. + root.set_scripts(db).to(Vec::new()); + for &pkg in &packages { + pkg.set_files(db).to(Vec::new()); + } + root.set_packages(db).to(Vec::new()); +} diff --git a/crates/oak_scan/src/tests.rs b/crates/oak_scan/src/tests.rs new file mode 100644 index 000000000..52df9fa02 --- /dev/null +++ b/crates/oak_scan/src/tests.rs @@ -0,0 +1 @@ +mod stale; diff --git a/crates/oak_scan/src/tests/stale.rs b/crates/oak_scan/src/tests/stale.rs new file mode 100644 index 000000000..c9147578c --- /dev/null +++ b/crates/oak_scan/src/tests/stale.rs @@ -0,0 +1,83 @@ +//! Unit tests for [`crate::stale`] eviction routing, written against +//! the public API ([`RootExt::set_stale`]) rather than the internal +//! free function so they double as call-pattern documentation. + +use std::collections::HashSet; + +use aether_url::UrlId; +use oak_db::DbInputs; +use oak_db::File; +use oak_db::OakDatabase; +use oak_db::Package; +use oak_db::Root; +use oak_db::RootKind; +use oak_package_metadata::namespace::Namespace; +use salsa::Setter; +use url::Url; + +use crate::inputs::RootExt; + +fn file_url(s: &str) -> UrlId { + UrlId::from_canonical(Url::parse(&format!("file://{s}")).unwrap()) +} + +#[test] +fn test_set_stale_routes_editor_owned_to_orphan() { + let mut db = OakDatabase::new(); + let root = Root::new(&db, file_url("/proj"), RootKind::Workspace, vec![], vec![]); + let file = File::new(&db, file_url("/proj/foo.R"), "x".to_string()); + root.set_scripts(&mut db).to(vec![file]); + db.workspace_roots().set_roots(&mut db).to(vec![root]); + + let mut owned = HashSet::new(); + owned.insert(file_url("/proj/foo.R")); + root.set_stale(&mut db, Some(&owned)); + + assert!(db.orphan_root().files(&db).contains(&file)); + assert!(!db.stale_root().files(&db).contains(&file)); + assert_eq!(file.package(&db), None); +} + +#[test] +fn test_set_stale_routes_non_editor_owned_to_stale() { + let mut db = OakDatabase::new(); + let root = Root::new(&db, file_url("/proj"), RootKind::Workspace, vec![], vec![]); + let file = File::new(&db, file_url("/proj/foo.R"), "x".to_string()); + root.set_scripts(&mut db).to(vec![file]); + db.workspace_roots().set_roots(&mut db).to(vec![root]); + + root.set_stale(&mut db, None); + + assert!(!db.orphan_root().files(&db).contains(&file)); + assert!(db.stale_root().files(&db).contains(&file)); +} + +#[test] +fn test_set_stale_clears_package_on_editor_owned_package_file() { + // The doc claim being tested: an evicted editor-owned package + // file loses its package association when it lands in orphan. + // The package itself goes to stale. + let mut db = OakDatabase::new(); + let root = Root::new(&db, file_url("/proj"), RootKind::Workspace, vec![], vec![]); + let pkg = Package::new( + &db, + file_url("/proj/DESCRIPTION"), + "p".to_string(), + None, + Namespace::default(), + vec![], + None, + ); + let file = File::new(&db, file_url("/proj/R/a.R"), "x".to_string()); + pkg.set_files(&mut db).to(vec![file]); + root.set_packages(&mut db).to(vec![pkg]); + db.workspace_roots().set_roots(&mut db).to(vec![root]); + + let mut owned = HashSet::new(); + owned.insert(file_url("/proj/R/a.R")); + root.set_stale(&mut db, Some(&owned)); + + assert!(db.orphan_root().files(&db).contains(&file)); + assert_eq!(file.package(&db), None); + assert!(db.stale_root().packages(&db).contains(&pkg)); +} diff --git a/crates/oak_scan/tests/library.rs b/crates/oak_scan/tests/library.rs new file mode 100644 index 000000000..2042417d6 --- /dev/null +++ b/crates/oak_scan/tests/library.rs @@ -0,0 +1,550 @@ +use std::fs; +use std::path::Path; +use std::path::PathBuf; + +use aether_url::UrlId; +use oak_db::Db; +use oak_db::DbInputs; +use oak_db::OakDatabase; +use oak_db::Package; +use oak_db::Root; +use oak_db::RootKind; +use oak_package_metadata::namespace::Namespace; +use oak_scan::DbExt; +use oak_scan::RootExt; +use salsa::Setter; +use url::Url; + +/// Write a minimal R package layout under `dir`: a `DESCRIPTION` file +/// with `Package: {name}, Version: 1.0.0`, plus the `R/` files in +/// `r_files` (basename -> contents). +fn write_package(dir: &Path, name: &str, r_files: &[(&str, &str)]) { + fs::create_dir_all(dir.join("R")).unwrap(); + fs::write( + dir.join("DESCRIPTION"), + format!("Package: {name}\nVersion: 1.0.0\n"), + ) + .unwrap(); + for (basename, contents) in r_files { + fs::write(dir.join("R").join(basename), contents).unwrap(); + } +} + +#[test] +fn test_scan_empty_library_path_registers_empty_root() { + let tmp = tempfile::tempdir().unwrap(); + let mut db = OakDatabase::new(); + + db.set_library_paths(&[tmp.path().to_path_buf()]); + + let roots = db.library_roots().roots(&db).clone(); + assert_eq!(roots.len(), 1); + let root = roots[0]; + assert_eq!(root.kind(&db), RootKind::Library); + assert!(root.packages(&db).is_empty()); +} + +#[test] +fn test_scan_library_discovers_package() { + let tmp = tempfile::tempdir().unwrap(); + write_package(&tmp.path().join("dplyr"), "dplyr", &[ + ("mutate.R", "mutate <- function(x) x\n"), + ("select.R", "select <- function(x) x\n"), + ]); + let mut db = OakDatabase::new(); + + db.set_library_paths(&[tmp.path().to_path_buf()]); + + let roots = db.library_roots().roots(&db).clone(); + let packages = roots[0].packages(&db).clone(); + assert_eq!(packages.len(), 1); + assert_eq!(packages[0].name(&db), "dplyr"); + assert_eq!(packages[0].version(&db), &Some("1.0.0".to_string()),); + + let files = packages[0].files(&db).clone(); + assert_eq!(files.len(), 2); + // Alphabetical by basename: mutate.R, select.R. + assert!(files[0].url(&db).as_url().path().ends_with("mutate.R")); + assert!(files[1].url(&db).as_url().path().ends_with("select.R")); +} + +#[test] +fn test_scan_library_files_are_findable_by_url() { + let tmp = tempfile::tempdir().unwrap(); + write_package(&tmp.path().join("pkg"), "pkg", &[("a.R", "x <- 1\n")]); + let mut db = OakDatabase::new(); + + db.set_library_paths(&[tmp.path().to_path_buf()]); + + let r_path = tmp.path().join("pkg").join("R").join("a.R"); + let url = UrlId::from_file_path(&r_path).unwrap(); + let file = db + .file_by_url(&url) + .expect("scanned file should be findable"); + assert_eq!(file.contents(&db), "x <- 1\n"); +} + +#[test] +fn test_scan_multiple_library_paths_preserve_order() { + let tmp1 = tempfile::tempdir().unwrap(); + let tmp2 = tempfile::tempdir().unwrap(); + write_package(&tmp1.path().join("pkg1"), "pkg1", &[]); + write_package(&tmp2.path().join("pkg2"), "pkg2", &[]); + let mut db = OakDatabase::new(); + + let paths: Vec = vec![tmp1.path().to_path_buf(), tmp2.path().to_path_buf()]; + db.set_library_paths(&paths); + + let roots = db.library_roots().roots(&db).clone(); + assert_eq!(roots.len(), 2); + assert_eq!(roots[0].packages(&db)[0].name(&db), "pkg1"); + assert_eq!(roots[1].packages(&db)[0].name(&db), "pkg2"); +} + +#[test] +fn test_scan_skips_directory_without_description() { + let tmp = tempfile::tempdir().unwrap(); + // Looks like a package dir but missing DESCRIPTION. + fs::create_dir_all(tmp.path().join("not-a-pkg").join("R")).unwrap(); + fs::write( + tmp.path().join("not-a-pkg").join("R").join("a.R"), + "x <- 1\n", + ) + .unwrap(); + let mut db = OakDatabase::new(); + + db.set_library_paths(&[tmp.path().to_path_buf()]); + + let roots = db.library_roots().roots(&db).clone(); + assert!(roots[0].packages(&db).is_empty()); +} + +#[test] +fn test_rescan_preserves_root_identity() { + use salsa::plumbing::AsId; + + let tmp = tempfile::tempdir().unwrap(); + write_package(&tmp.path().join("pkg"), "pkg", &[("a.R", "x <- 1\n")]); + let mut db = OakDatabase::new(); + + db.set_library_paths(&[tmp.path().to_path_buf()]); + let root_id_1 = db.library_roots().roots(&db)[0].as_id(); + + db.set_library_paths(&[tmp.path().to_path_buf()]); + let root_id_2 = db.library_roots().roots(&db)[0].as_id(); + + assert_eq!(root_id_1, root_id_2); +} + +#[test] +fn test_rescan_preserves_package_identity_by_description_name() { + use salsa::plumbing::AsId; + + let tmp = tempfile::tempdir().unwrap(); + write_package(&tmp.path().join("pkg"), "dplyr", &[("a.R", "x <- 1\n")]); + let mut db = OakDatabase::new(); + + db.set_library_paths(&[tmp.path().to_path_buf()]); + let pkg_id_1 = db.library_roots().roots(&db)[0].packages(&db)[0].as_id(); + + // Rescan with no changes on disk. + db.set_library_paths(&[tmp.path().to_path_buf()]); + let pkg_id_2 = db.library_roots().roots(&db)[0].packages(&db)[0].as_id(); + + assert_eq!(pkg_id_1, pkg_id_2); +} + +#[test] +fn test_rescan_preserves_file_identity_by_url() { + use salsa::plumbing::AsId; + + let tmp = tempfile::tempdir().unwrap(); + write_package(&tmp.path().join("pkg"), "pkg", &[("a.R", "x <- 1\n")]); + let mut db = OakDatabase::new(); + + db.set_library_paths(&[tmp.path().to_path_buf()]); + let file_id_1 = db.library_roots().roots(&db)[0].packages(&db)[0].files(&db)[0].as_id(); + + db.set_library_paths(&[tmp.path().to_path_buf()]); + let file_id_2 = db.library_roots().roots(&db)[0].packages(&db)[0].files(&db)[0].as_id(); + + assert_eq!(file_id_1, file_id_2); +} + +#[test] +fn test_rescan_renamed_package_dir_keeps_package_identity() { + // Identity is `(root, DESCRIPTION name)`, not the directory path. + // Renaming the package directory but keeping the same DESCRIPTION + // name (and same library root) preserves the salsa `Package` id. + use salsa::plumbing::AsId; + + let tmp = tempfile::tempdir().unwrap(); + write_package(&tmp.path().join("v1"), "mypkg", &[("a.R", "x <- 1\n")]); + let mut db = OakDatabase::new(); + + db.set_library_paths(&[tmp.path().to_path_buf()]); + let pkg_id_1 = db.library_roots().roots(&db)[0].packages(&db)[0].as_id(); + + // Rename the package directory. DESCRIPTION still says `mypkg`. + fs::rename(tmp.path().join("v1"), tmp.path().join("v2")).unwrap(); + + db.set_library_paths(&[tmp.path().to_path_buf()]); + let pkg_id_2 = db.library_roots().roots(&db)[0].packages(&db)[0].as_id(); + + assert_eq!(pkg_id_1, pkg_id_2); +} + +#[test] +fn test_scan_picks_up_collation_field() { + let tmp = tempfile::tempdir().unwrap(); + let pkg_dir = tmp.path().join("pkg"); + fs::create_dir_all(pkg_dir.join("R")).unwrap(); + fs::write( + pkg_dir.join("DESCRIPTION"), + "Package: pkg\nVersion: 1.0.0\nCollate: b.R a.R\n", + ) + .unwrap(); + fs::write(pkg_dir.join("R").join("a.R"), "x <- 1\n").unwrap(); + fs::write(pkg_dir.join("R").join("b.R"), "y <- 2\n").unwrap(); + let mut db = OakDatabase::new(); + + db.set_library_paths(&[tmp.path().to_path_buf()]); + + let pkg = db.library_roots().roots(&db)[0].packages(&db)[0]; + assert_eq!( + pkg.collation(&db), + &Some(vec!["b.R".to_string(), "a.R".to_string()]), + ); +} + +#[test] +fn test_set_library_paths_removed_path_evicts_root() { + let tmp1 = tempfile::tempdir().unwrap(); + let tmp2 = tempfile::tempdir().unwrap(); + write_package(&tmp1.path().join("pkg1"), "pkg1", &[]); + write_package(&tmp2.path().join("pkg2"), "pkg2", &[]); + let mut db = OakDatabase::new(); + + db.set_library_paths(&[tmp1.path().to_path_buf(), tmp2.path().to_path_buf()]); + assert_eq!(db.library_roots().roots(&db).len(), 2); + + // Drop the second library. The remaining root keeps its identity + // and its package. + db.set_library_paths(&[tmp1.path().to_path_buf()]); + + let roots = db.library_roots().roots(&db).clone(); + assert_eq!(roots.len(), 1); + assert_eq!(roots[0].packages(&db)[0].name(&db), "pkg1"); +} + +#[test] +fn test_set_library_paths_re_add_preserves_file_identity() { + // The motivating case for `StaleRoot`. Adding, removing, then re-adding + // a library path returns the same `File` entity for files at the same + // URL, so downstream salsa caches stay warm and don't bloat on every + // workspace folder toggle. + use salsa::plumbing::AsId; + + let tmp = tempfile::tempdir().unwrap(); + write_package(&tmp.path().join("pkg"), "pkg", &[("a.R", "x <- 1\n")]); + let mut db = OakDatabase::new(); + + db.set_library_paths(&[tmp.path().to_path_buf()]); + let file_id_before = db.library_roots().roots(&db)[0].packages(&db)[0].files(&db)[0].as_id(); + + db.set_library_paths(&[]); + // After removal, the file is not reachable through analysis. + let r_path = tmp.path().join("pkg").join("R").join("a.R"); + let url = UrlId::from_file_path(&r_path).unwrap(); + assert!(db.file_by_url(&url).is_none()); + + db.set_library_paths(&[tmp.path().to_path_buf()]); + let file_id_after = db.library_roots().roots(&db)[0].packages(&db)[0].files(&db)[0].as_id(); + + assert_eq!(file_id_before, file_id_after); + // And it's findable again. + assert!(db.file_by_url(&url).is_some()); +} + +#[test] +fn test_set_library_paths_re_add_preserves_package_identity() { + use salsa::plumbing::AsId; + + let tmp = tempfile::tempdir().unwrap(); + write_package(&tmp.path().join("pkg"), "dplyr", &[("a.R", "x <- 1\n")]); + let mut db = OakDatabase::new(); + + db.set_library_paths(&[tmp.path().to_path_buf()]); + let pkg_id_before = db.library_roots().roots(&db)[0].packages(&db)[0].as_id(); + + db.set_library_paths(&[]); + // Package is not reachable via name lookup while the library is removed. + assert!(db.package_by_name("dplyr").is_none()); + + db.set_library_paths(&[tmp.path().to_path_buf()]); + let pkg_id_after = db.library_roots().roots(&db)[0].packages(&db)[0].as_id(); + + assert_eq!(pkg_id_before, pkg_id_after); + assert!(db.package_by_name("dplyr").is_some()); +} + +#[test] +fn test_set_library_paths_stale_invisible_to_analysis() { + // Stale files/packages must not show up in `file_by_url` / + // `package_by_name`. They're entity-reuse storage, not part of the + // analysis universe. + let tmp = tempfile::tempdir().unwrap(); + write_package(&tmp.path().join("dplyr"), "dplyr", &[("a.R", "x <- 1\n")]); + let mut db = OakDatabase::new(); + + db.set_library_paths(&[tmp.path().to_path_buf()]); + let r_path = tmp.path().join("dplyr").join("R").join("a.R"); + let url = UrlId::from_file_path(&r_path).unwrap(); + assert!(db.file_by_url(&url).is_some()); + assert!(db.package_by_name("dplyr").is_some()); + + db.set_library_paths(&[]); + + // Both lookups miss; the entities are in stale, not in the live universe. + assert!(db.file_by_url(&url).is_none()); + assert!(db.package_by_name("dplyr").is_none()); + // But the stale buckets do hold them. + assert_eq!(db.stale_root().files(&db).len(), 1); + assert_eq!(db.stale_root().packages(&db).len(), 1); +} + +#[test] +fn test_set_library_paths_stale_no_duplicates_across_cycles() { + // Repeated add/remove/add must not duplicate entities in stale: on + // re-add the entity comes back out of stale, so by the time we + // remove it again there's only one copy to push back in. + let tmp = tempfile::tempdir().unwrap(); + write_package(&tmp.path().join("pkg"), "pkg", &[("a.R", "x <- 1\n")]); + let mut db = OakDatabase::new(); + + for _ in 0..3 { + db.set_library_paths(&[tmp.path().to_path_buf()]); + db.set_library_paths(&[]); + } + + let stale = db.stale_root(); + assert_eq!(stale.files(&db).len(), 1); + assert_eq!(stale.packages(&db).len(), 1); +} + +#[test] +fn test_set_library_paths_resurrected_file_picks_up_disk_contents() { + // Eviction doesn't snapshot disk contents. When a file is + // resurrected from stale, it should reflect the current disk state, + // not whatever it had when evicted. + let tmp = tempfile::tempdir().unwrap(); + write_package(&tmp.path().join("pkg"), "pkg", &[("a.R", "v1\n")]); + let mut db = OakDatabase::new(); + + db.set_library_paths(&[tmp.path().to_path_buf()]); + db.set_library_paths(&[]); + + let r_path = tmp.path().join("pkg").join("R").join("a.R"); + fs::write(&r_path, "v2\n").unwrap(); + + db.set_library_paths(&[tmp.path().to_path_buf()]); + let url = UrlId::from_file_path(&r_path).unwrap(); + let file = db.file_by_url(&url).unwrap(); + assert_eq!(file.contents(&db), "v2\n"); +} + +// --- nested-root resolution (longest-path-wins) --- +// +// The library scanner walks depth-1 so it can't naturally produce two +// library roots that both claim the same DESCRIPTION (a nested library +// root at `/lib/foo` would walk children of `/lib/foo`, not +// `/lib/foo/DESCRIPTION` itself). These tests exercise `RootExt::set_package` +// directly with hand-built `Root` entities so they cover the shared +// resolution logic that PR 12's workspace scanner -- which walks any depth +// and is the realistic trigger -- depends on. + +fn file_url(s: &str) -> UrlId { + // Bypass `UrlId::from_file_path`'s canonicalization (these paths + // don't exist on disk). + UrlId::from_canonical(Url::parse(&format!("file://{s}")).unwrap()) +} + +fn empty_library_root(db: &OakDatabase, path: &str) -> Root { + Root::new(db, file_url(path), RootKind::Library, vec![], vec![]) +} + +/// Stash `pkg` in `root.packages` and register `root` on +/// `library_roots`, mirroring what the library scanner does after +/// `set_package` returns. Without this step `package_by_url` can't see +/// the package on subsequent `set_package` calls. +fn register_package(db: &mut OakDatabase, root: Root, pkg: Package) { + root.set_packages(db).to(vec![pkg]); + let mut roots = db.library_roots().roots(db).clone(); + if !roots.contains(&root) { + roots.push(root); + } + db.library_roots().set_roots(db).to(roots); +} + +#[test] +fn test_set_package_longer_root_wins_after_shorter_claims_first() { + let mut db = OakDatabase::new(); + let short = empty_library_root(&db, "/lib"); + let long = empty_library_root(&db, "/lib/sub"); + let desc_url = file_url("/lib/sub/DESCRIPTION"); + + let p1 = short.set_package( + &mut db, + desc_url.clone(), + "pkg".to_string(), + None, + Namespace::default(), + Vec::new(), + None, + ); + register_package(&mut db, short, p1); + assert_eq!(db.root_by_package(p1), Some(short)); + + let p2 = long.set_package( + &mut db, + desc_url, + "pkg".to_string(), + None, + Namespace::default(), + Vec::new(), + None, + ); + register_package(&mut db, long, p2); + // Same entity; now in both roots' `packages`. `root_by_package` prefers + // the longer (more specific) root. + assert_eq!(p1, p2); + assert_eq!(db.root_by_package(p1), Some(long)); +} + +#[test] +fn test_set_package_shorter_root_does_not_steal_from_longer() { + let mut db = OakDatabase::new(); + let short = empty_library_root(&db, "/lib"); + let long = empty_library_root(&db, "/lib/sub"); + let desc_url = file_url("/lib/sub/DESCRIPTION"); + + let p1 = long.set_package( + &mut db, + desc_url.clone(), + "pkg".to_string(), + None, + Namespace::default(), + Vec::new(), + None, + ); + register_package(&mut db, long, p1); + assert_eq!(db.root_by_package(p1), Some(long)); + + let p2 = short.set_package( + &mut db, + desc_url, + "pkg".to_string(), + None, + Namespace::default(), + Vec::new(), + None, + ); + register_package(&mut db, short, p2); + // Same entity; now in both roots' `packages`. `root_by_package` keeps the + // longer root as the owner. + assert_eq!(p1, p2); + assert_eq!(db.root_by_package(p1), Some(long)); +} + +#[test] +fn test_upsert_re_promotes_editor_owned_file_from_orphan() { + // Mirrors the doc claim in `eviction.rs`: an editor-open file that + // was evicted to `OrphanRoot` should come back into `pkg.files` + // when the package's root is re-added. Same `File` entity, editor + // contents preserved (the scan's disk snapshot doesn't overwrite), + // and the orphan reference is cleaned up so the orphan-placement + // invariant (`package == None`) stays honest. + use oak_db::File; + use oak_scan::FileEntry; + + let mut db = OakDatabase::new(); + + // Editor opens the file before any scan -> orphan. + let r_url = file_url("/lib/pkg/R/a.R"); + let file = File::new(&db, r_url.clone(), "editor content".to_string()); + db.orphan_root().set_files(&mut db).to(vec![file]); + assert_eq!(file.package(&db), None); + + // Now a library scan picks up the same URL as part of a package. + let lib = empty_library_root(&db, "/lib"); + let pkg = lib.set_package( + &mut db, + file_url("/lib/pkg/DESCRIPTION"), + "pkg".to_string(), + None, + Namespace::default(), + vec![FileEntry { + url: r_url.clone(), + contents: "disk content".to_string(), + }], + None, + ); + // Wire the package into a live root so `File::package` (derived from + // live containment) can see it, mirroring the real scanner. + register_package(&mut db, lib, pkg); + + // Same `File` entity, editor content preserved, package ownership + // derived from `pkg.files`. + let pkg_file = pkg.files(&db)[0]; + assert_eq!(pkg_file, file); + assert_eq!(file.contents(&db), "editor content"); + assert_eq!(file.package(&db), Some(pkg)); + + // Orphan reference cleaned up. + assert!(!db.orphan_root().files(&db).contains(&file)); +} + +#[test] +fn test_set_package_stale_resurrection_changes_owning_root() { + // Stale resurrection: the previous `Root` entity at this path was + // evicted (so it's no longer in `library_roots`), and a fresh `Root` + // is created at the same path. The resurrected `Package` ends up in + // the new root's `packages` vec, and `root_by_package` reports the new + // root accordingly. + let mut db = OakDatabase::new(); + let old = empty_library_root(&db, "/lib"); + let new = empty_library_root(&db, "/lib"); + assert_ne!(old, new); + let desc_url = file_url("/lib/pkg/DESCRIPTION"); + + let p1 = old.set_package( + &mut db, + desc_url.clone(), + "pkg".to_string(), + None, + Namespace::default(), + Vec::new(), + None, + ); + register_package(&mut db, old, p1); + assert_eq!(db.root_by_package(p1), Some(old)); + + // Simulate eviction: drop `old` from `library_roots` and move the + // package into `stale_root.packages`. + db.stale_root().set_packages(&mut db).to(vec![p1]); + db.library_roots().set_roots(&mut db).to(vec![]); + assert_eq!(db.root_by_package(p1), None); + + let p2 = new.set_package( + &mut db, + desc_url, + "pkg".to_string(), + None, + Namespace::default(), + Vec::new(), + None, + ); + register_package(&mut db, new, p2); + assert_eq!(p1, p2); + assert_eq!(db.root_by_package(p1), Some(new)); +}