diff --git a/src/cache.rs b/src/cache.rs index 8d54f362..71afa3cd 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -16,6 +16,7 @@ use tokio::sync::OnceCell as OnceLock; use crate::{ context::ResolveContext as Ctx, + hashing::{hash_path, IdentityHasher}, package_json::{off_to_location, PackageJson}, path::PathUtil, FileMetadata, FileSystem, JSONError, ResolveError, ResolveOptions, TsConfig, @@ -43,11 +44,7 @@ impl Cache { } pub fn value(&self, path: &Path) -> CachedPath { - let hash = { - let mut hasher = FxHasher::default(); - path.hash(&mut hasher); - hasher.finish() - }; + let hash = hash_path(path); if let Some(cache_entry) = self.paths.get((hash, path).borrow() as &dyn CacheKey) { return cache_entry.clone(); } @@ -116,8 +113,9 @@ impl Hash for CachedPath { } impl PartialEq for CachedPath { + #[inline] fn eq(&self, other: &Self) -> bool { - self.0.path == other.0.path + self.0.hash == other.0.hash && (Arc::ptr_eq(&self.0, &other.0) || self.0.path == other.0.path) } } impl Eq for CachedPath {} @@ -156,6 +154,7 @@ pub struct CachedPathImpl { canonicalized: OnceLock>, node_modules: OnceLock>, package_json: OnceLock>>, + package_json_path: OnceLock, } impl CachedPathImpl { @@ -168,6 +167,7 @@ impl CachedPathImpl { canonicalized: OnceLock::new(), node_modules: OnceLock::new(), package_json: OnceLock::new(), + package_json_path: OnceLock::new(), } } @@ -192,10 +192,10 @@ impl CachedPathImpl { pub async fn is_file(&self, fs: &Fs, ctx: &mut Ctx) -> bool { if let Some(meta) = self.meta(fs).await { - ctx.add_file_dependency(self.path()); + ctx.add_file_dependency_with_hash(self.to_path_buf(), self.hash); meta.is_file } else { - ctx.add_missing_dependency(self.path()); + ctx.add_missing_dependency_with_hash(self.to_path_buf(), self.hash); false } } @@ -203,7 +203,7 @@ impl CachedPathImpl { pub async fn is_dir(&self, fs: &Fs, ctx: &mut Ctx) -> bool { self.meta(fs).await.map_or_else( || { - ctx.add_missing_dependency(self.path()); + ctx.add_missing_dependency_with_hash(self.to_path_buf(), self.hash); false }, |meta| meta.is_dir, @@ -308,11 +308,14 @@ impl CachedPathImpl { options: &ResolveOptions, ctx: &mut Ctx, ) -> Result>, ResolveError> { + let package_json_path = self + .package_json_path + .get_or_init(|| async { self.path.join("package.json") }) + .await; // Change to `std::sync::OnceLock::get_or_try_init` when it is stable. let result = self .package_json .get_or_try_init(|| async { - let package_json_path = self.path.join("package.json"); let Ok(package_json_string) = fs.read(&package_json_path).await else { return Ok(None); }; @@ -324,7 +327,6 @@ impl CachedPathImpl { match PackageJson::parse(package_json_path.clone(), real_path, package_json_string) { Ok(v) => Ok(Some(Arc::new(v))), Err(parse_err) => { - let package_json_path = self.path.join("package.json"); let package_json_string = match fs.read_to_string(&package_json_path).await { Ok(c) => c, Err(io_err) => { @@ -335,7 +337,7 @@ impl CachedPathImpl { if let Some(err) = serde_err { Err(ResolveError::from_serde_json_error( - package_json_path, + package_json_path.clone(), &err, Some(package_json_string), )) @@ -343,7 +345,7 @@ impl CachedPathImpl { let (line, column) = off_to_location(&package_json_string, parse_err.index()); Err(ResolveError::JSON(JSONError { - path: package_json_path, + path: package_json_path.clone(), message: parse_err.error().to_string(), line, column, @@ -363,14 +365,10 @@ impl CachedPathImpl { } Ok(None) => { // Avoid an allocation by making this lazy - if let Some(deps) = &mut ctx.missing_dependencies { - deps.push(self.path.join("package.json")); - } + ctx.add_missing_dependency(package_json_path); } Err(_) => { - if let Some(deps) = &mut ctx.file_dependencies { - deps.push(self.path.join("package.json")); - } + ctx.add_file_dependency(package_json_path); } } result @@ -390,7 +388,9 @@ impl Hash for dyn CacheKey + '_ { impl PartialEq for dyn CacheKey + '_ { fn eq(&self, other: &Self) -> bool { - self.tuple().1 == other.tuple().1 + let self_tuple = self.tuple(); + let other_tuple = other.tuple(); + self_tuple.0 == other_tuple.0 && self_tuple.1 == other_tuple.1 } } @@ -407,20 +407,3 @@ impl<'a> Borrow for (u64, &'a Path) { self } } - -/// Since the cache key is memoized, use an identity hasher -/// to avoid double cache. -#[derive(Default)] -struct IdentityHasher(u64); - -impl Hasher for IdentityHasher { - fn write(&mut self, _: &[u8]) { - unreachable!("Invalid use of IdentityHasher") - } - fn write_u64(&mut self, n: u64) { - self.0 = n; - } - fn finish(&self) -> u64 { - self.0 - } -} diff --git a/src/context.rs b/src/context.rs index d03ce8c0..6bad43c2 100644 --- a/src/context.rs +++ b/src/context.rs @@ -3,7 +3,7 @@ use std::{ path::{Path, PathBuf}, }; -use crate::error::ResolveError; +use crate::{error::ResolveError, PathDependency}; #[derive(Debug, Default, Clone)] pub struct ResolveContext(ResolveContextImpl); @@ -22,6 +22,12 @@ pub struct ResolveContextImpl { /// Files that was found on file system pub missing_dependencies: Option>, + /// Files that was found on file system, with precomputed hashes. + pub prehashed_file_dependencies: Option>, + + /// Files that were not found on file system, with precomputed hashes. + pub prehashed_missing_dependencies: Option>, + /// The current resolving alias for bailing recursion alias. pub resolving_alias: Option, @@ -62,18 +68,43 @@ impl ResolveContext { self.missing_dependencies.replace(vec![]); } + pub fn init_prehashed_dependencies(&mut self) { + self.prehashed_file_dependencies.replace(vec![]); + self.prehashed_missing_dependencies.replace(vec![]); + } + pub fn add_file_dependency(&mut self, dep: &Path) { - if let Some(deps) = &mut self.file_dependencies { + if let Some(deps) = &mut self.prehashed_file_dependencies { + deps.push(PathDependency::new(dep.to_path_buf())); + } else if let Some(deps) = &mut self.file_dependencies { deps.push(dep.to_path_buf()); } } + pub fn add_file_dependency_with_hash(&mut self, dep: PathBuf, hash: u64) { + if let Some(deps) = &mut self.prehashed_file_dependencies { + deps.push(PathDependency::with_hash(dep, hash)); + } else if let Some(deps) = &mut self.file_dependencies { + deps.push(dep); + } + } + pub fn add_missing_dependency(&mut self, dep: &Path) { - if let Some(deps) = &mut self.missing_dependencies { + if let Some(deps) = &mut self.prehashed_missing_dependencies { + deps.push(PathDependency::new(dep.to_path_buf())); + } else if let Some(deps) = &mut self.missing_dependencies { deps.push(dep.to_path_buf()); } } + pub fn add_missing_dependency_with_hash(&mut self, dep: PathBuf, hash: u64) { + if let Some(deps) = &mut self.prehashed_missing_dependencies { + deps.push(PathDependency::with_hash(dep, hash)); + } else if let Some(deps) = &mut self.missing_dependencies { + deps.push(dep); + } + } + pub fn with_resolving_alias(&mut self, alias: String) { self.resolving_alias = Some(alias); } diff --git a/src/hashing.rs b/src/hashing.rs new file mode 100644 index 00000000..1e7cb7d0 --- /dev/null +++ b/src/hashing.rs @@ -0,0 +1,30 @@ +use std::{ + hash::{Hash, Hasher}, + path::Path, +}; + +use rustc_hash::FxHasher; + +#[derive(Default)] +pub struct IdentityHasher(u64); + +impl Hasher for IdentityHasher { + fn write(&mut self, _: &[u8]) { + unreachable!("Invalid use of IdentityHasher") + } + + fn write_u64(&mut self, n: u64) { + self.0 = n; + } + + fn finish(&self) -> u64 { + self.0 + } +} + +#[inline] +pub fn hash_path(path: &Path) -> u64 { + let mut hasher = FxHasher::default(); + path.hash(&mut hasher); + hasher.finish() +} diff --git a/src/lib.rs b/src/lib.rs index 8b1a13b1..8b8fd58d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -52,6 +52,7 @@ mod cache; mod context; mod error; mod file_system; +mod hashing; mod options; mod package_json; mod path; @@ -65,8 +66,10 @@ mod tests; use std::{ borrow::Cow, cmp::Ordering, + collections::HashSet, ffi::OsStr, fmt, + hash::{BuildHasherDefault, Hash, Hasher}, path::{Component, Path, PathBuf}, sync::{Arc, OnceLock}, }; @@ -79,6 +82,7 @@ pub use crate::{ builtins::NODEJS_BUILTINS, error::{JSONError, ResolveError, SpecifierError}, file_system::{FileMetadata, FileSystem, FileSystemOptions, FileSystemOs}, + hashing::IdentityHasher, options::{ Alias, AliasValue, EnforceExtension, ResolveOptions, Restriction, TsconfigOptions, TsconfigReferences, @@ -89,6 +93,7 @@ pub use crate::{ use crate::{ cache::{Cache, CachedPath}, context::ResolveContext as Ctx, + hashing::hash_path, package_json::JSONMap, path::{PathUtil, SLASH_START}, specifier::Specifier, @@ -107,6 +112,90 @@ pub struct ResolveContext { pub missing_dependencies: FxHashSet, } +pub type PathDependencySet = HashSet>; + +#[derive(Debug, Clone)] +pub struct PathDependency { + path: PathBuf, + hash: u64, +} + +impl PathDependency { + #[inline] + pub fn new(path: PathBuf) -> Self { + let hash = hash_path(&path); + Self { path, hash } + } + + #[inline] + pub fn with_hash(path: PathBuf, hash: u64) -> Self { + debug_assert_eq!(hash, hash_path(&path)); + Self { path, hash } + } + + #[inline] + pub fn path(&self) -> &Path { + &self.path + } + + #[inline] + pub fn precomputed_hash(&self) -> u64 { + self.hash + } + + #[inline] + pub fn into_path_buf(self) -> PathBuf { + self.path + } +} + +impl From for PathDependency { + #[inline] + fn from(value: PathBuf) -> Self { + Self::new(value) + } +} + +impl From for PathBuf { + #[inline] + fn from(value: PathDependency) -> Self { + value.path + } +} + +impl AsRef for PathDependency { + #[inline] + fn as_ref(&self) -> &Path { + self.path() + } +} + +impl Hash for PathDependency { + #[inline] + fn hash(&self, state: &mut H) { + state.write_u64(self.hash); + } +} + +impl PartialEq for PathDependency { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.hash == other.hash && self.path == other.path + } +} + +impl Eq for PathDependency {} + +/// Context returned from the [Resolver::resolve_with_prehashed_context] API. +#[derive(Debug, Default, Clone)] +pub struct ResolvePreHashedContext { + /// Files that was found on file system + pub file_dependencies: PathDependencySet, + + /// Dependencies that was not found on file system + pub missing_dependencies: PathDependencySet, +} + /// Resolver with the current operating system as the file system pub type Resolver = ResolverGeneric; @@ -238,6 +327,32 @@ impl ResolverGeneric { result } + /// Resolve `specifier` at absolute `path` and return dependencies with + /// their precomputed path hash. + /// + /// # Errors + /// + /// * See [ResolveError] + pub async fn resolve_with_prehashed_context>( + &self, + directory: P, + specifier: &str, + resolve_context: &mut ResolvePreHashedContext, + ) -> Result { + let mut ctx = Ctx::default(); + ctx.init_prehashed_dependencies(); + let result = self + .resolve_tracing(directory.as_ref(), specifier, &mut ctx) + .await; + if let Some(deps) = &mut ctx.prehashed_file_dependencies { + resolve_context.file_dependencies.extend(deps.drain(..)); + } + if let Some(deps) = &mut ctx.prehashed_missing_dependencies { + resolve_context.missing_dependencies.extend(deps.drain(..)); + } + result + } + /// Wrap `resolve_impl` with `tracing` information #[cfg_attr(feature="enable_instrument", tracing::instrument(level=tracing::Level::DEBUG, skip_all, fields(path = %directory.to_string_lossy(), specifier = specifier)))] async fn resolve_tracing( diff --git a/src/tests/dependencies.rs b/src/tests/dependencies.rs index cf0e3657..36a6332c 100644 --- a/src/tests/dependencies.rs +++ b/src/tests/dependencies.rs @@ -2,12 +2,21 @@ #[cfg(not(target_os = "windows"))] // MemoryFS's path separator is always `/` so the test will not pass in windows. mod windows { - use std::path::PathBuf; + use std::{ + hash::{Hash, Hasher}, + path::{Path, PathBuf}, + }; - use rustc_hash::FxHashSet; + use rustc_hash::{FxHashSet, FxHasher}; use super::super::memory_fs::MemoryFS; - use crate::{ResolveContext, ResolveOptions, ResolverGeneric}; + use crate::{ResolveContext, ResolveOptions, ResolvePreHashedContext, ResolverGeneric}; + + fn path_hash(path: &Path) -> u64 { + let mut hasher = FxHasher::default(); + path.hash(&mut hasher); + hasher.finish() + } fn file_system() -> MemoryFS { MemoryFS::new(&[ @@ -99,16 +108,57 @@ mod windows { for (name, context, request, result, file_dependencies, missing_dependencies) in data { let mut ctx = ResolveContext::default(); let path = PathBuf::from(context); - let resolved = resolver - .resolve_with_context(path, request, &mut ctx) + let resolved_path = resolver + .resolve_with_context(&path, request, &mut ctx) + .await + .map(|r| r.full_path()); + assert_eq!(resolved_path, Ok(PathBuf::from(result))); + let expected_file_dependencies = file_dependencies + .iter() + .map(PathBuf::from) + .collect::>(); + let expected_missing_dependencies = missing_dependencies + .iter() + .map(PathBuf::from) + .collect::>(); + assert_eq!(ctx.file_dependencies, expected_file_dependencies, "{name}"); + assert_eq!( + ctx.missing_dependencies, expected_missing_dependencies, + "{name}" + ); + + let mut prehashed_ctx = ResolvePreHashedContext::default(); + let prehashed_resolved = resolver + .resolve_with_prehashed_context(&path, request, &mut prehashed_ctx) .await .map(|r| r.full_path()); - assert_eq!(resolved, Ok(PathBuf::from(result))); - let file_dependencies = FxHashSet::from_iter(file_dependencies.iter().map(PathBuf::from)); - let missing_dependencies = - FxHashSet::from_iter(missing_dependencies.iter().map(PathBuf::from)); - assert_eq!(ctx.file_dependencies, file_dependencies, "{name}"); - assert_eq!(ctx.missing_dependencies, missing_dependencies, "{name}"); + assert_eq!(prehashed_resolved, Ok(PathBuf::from(result))); + assert!(prehashed_ctx + .file_dependencies + .iter() + .all(|dependency| { dependency.precomputed_hash() == path_hash(dependency.path()) })); + assert!(prehashed_ctx + .missing_dependencies + .iter() + .all(|dependency| { dependency.precomputed_hash() == path_hash(dependency.path()) })); + let prehashed_file_dependencies = prehashed_ctx + .file_dependencies + .iter() + .map(|d| d.path().to_owned()) + .collect::>(); + let prehashed_missing_dependencies = prehashed_ctx + .missing_dependencies + .iter() + .map(|d| d.path().to_owned()) + .collect::>(); + assert_eq!( + prehashed_file_dependencies, expected_file_dependencies, + "{name}" + ); + assert_eq!( + prehashed_missing_dependencies, expected_missing_dependencies, + "{name}" + ); } } }