Skip to content

Commit b138142

Browse files
authored
perf(deps): carry precomputed hash via ResolverPath in resolve context (#232)
1 parent c8af902 commit b138142

13 files changed

Lines changed: 295 additions & 64 deletions

examples/resolver.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,10 @@ async fn main() {
4040
};
4141

4242
let mut sorted_file_deps = ctx.file_dependencies.iter().collect::<Vec<_>>();
43-
sorted_file_deps.sort();
43+
sorted_file_deps.sort_by_key(|p| p.as_path());
4444
println!("file_deps: {:#?}", sorted_file_deps);
4545

4646
let mut sorted_missing = ctx.missing_dependencies.iter().collect::<Vec<_>>();
47-
sorted_missing.sort();
47+
sorted_missing.sort_by_key(|p| p.as_path());
4848
println!("missing_deps: {:#?}", sorted_missing);
4949
}

src/cache.rs

Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
#[cfg(unix)]
2-
use std::os::unix::ffi::OsStrExt;
31
use std::{
42
borrow::{Borrow, Cow},
53
convert::AsRef,
@@ -20,6 +18,7 @@ use crate::{
2018
context::ResolveContext as Ctx,
2119
package_json::{off_to_location, PackageJson},
2220
path::PathUtil,
21+
resolver_path::{hash_path, ResolverPath},
2322
FileMetadata, FileSystem, JSONError, ResolveError, ResolveOptions, TsConfig,
2423
};
2524

@@ -45,17 +44,7 @@ impl<Fs: Send + Sync + FileSystem> Cache<Fs> {
4544
}
4645

4746
pub fn value(&self, path: &Path) -> CachedPath {
48-
let hash = {
49-
let mut hasher = FxHasher::default();
50-
// On Unix, hash the raw path bytes in one bulk `Hasher::write`. The std
51-
// `Path::hash` impl walks components (utf8 split + per-segment write)
52-
// and dominated profile samples on this cache-lookup hot path.
53-
#[cfg(unix)]
54-
hasher.write(path.as_os_str().as_bytes());
55-
#[cfg(not(unix))]
56-
path.hash(&mut hasher);
57-
hasher.finish()
58-
};
47+
let hash = hash_path(path);
5948
if let Some(cache_entry) = self.paths.get((hash, path).borrow() as &dyn CacheKey) {
6049
return cache_entry.clone();
6150
}
@@ -166,6 +155,15 @@ pub struct CachedPathImpl {
166155
package_json: OnceLock<Option<Arc<PackageJson>>>,
167156
}
168157

158+
impl From<&CachedPathImpl> for ResolverPath {
159+
/// Reuse the cache-side `FxHash` (already computed in `Cache::value`); the
160+
/// only remaining work is one `Arc::from(&Path)` to materialize the shared
161+
/// path buffer for the `ResolveContext` sink.
162+
fn from(cached: &CachedPathImpl) -> Self {
163+
Self::from_parts(cached.hash, Arc::from(&*cached.path))
164+
}
165+
}
166+
169167
impl CachedPathImpl {
170168
fn new(hash: u64, path: Box<Path>, parent: Option<CachedPath>) -> Self {
171169
Self {
@@ -205,18 +203,18 @@ impl CachedPathImpl {
205203

206204
pub async fn is_file<Fs: Send + Sync + FileSystem>(&self, fs: &Fs, ctx: &mut Ctx) -> bool {
207205
if let Some(meta) = self.meta(fs).await {
208-
ctx.add_file_dependency(self.path());
206+
ctx.add_file_dependency(self);
209207
meta.is_file
210208
} else {
211-
ctx.add_missing_dependency(self.path());
209+
ctx.add_missing_dependency(self);
212210
false
213211
}
214212
}
215213

216214
pub async fn is_dir<Fs: Send + Sync + FileSystem>(&self, fs: &Fs, ctx: &mut Ctx) -> bool {
217215
self.meta(fs).await.map_or_else(
218216
|| {
219-
ctx.add_missing_dependency(self.path());
217+
ctx.add_missing_dependency(self);
220218
false
221219
},
222220
|meta| meta.is_dir,
@@ -338,8 +336,8 @@ impl CachedPathImpl {
338336
match pkg {
339337
Some(package_json) => ctx.add_file_dependency(&package_json.path),
340338
None => {
341-
if let Some(deps) = &mut ctx.missing_dependencies {
342-
deps.push(self.path.join("package.json"));
339+
if ctx.missing_dependencies.is_some() {
340+
ctx.add_missing_dependency(self.path.join("package.json"));
343341
}
344342
}
345343
}
@@ -400,13 +398,13 @@ impl CachedPathImpl {
400398
}
401399
Ok(None) => {
402400
// Avoid an allocation by making this lazy
403-
if let Some(deps) = &mut ctx.missing_dependencies {
404-
deps.push(self.path.join("package.json"));
401+
if ctx.missing_dependencies.is_some() {
402+
ctx.add_missing_dependency(self.path.join("package.json"));
405403
}
406404
}
407405
Err(_) => {
408-
if let Some(deps) = &mut ctx.file_dependencies {
409-
deps.push(self.path.join("package.json"));
406+
if ctx.file_dependencies.is_some() {
407+
ctx.add_file_dependency(self.path.join("package.json"));
410408
}
411409
}
412410
}

src/context.rs

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
1-
use std::{
2-
ops::{Deref, DerefMut},
3-
path::{Path, PathBuf},
4-
};
1+
use std::ops::{Deref, DerefMut};
52

6-
use crate::error::ResolveError;
3+
use crate::{error::ResolveError, resolver_path::ResolverPath};
74

85
#[derive(Debug, Default, Clone)]
96
pub struct ResolveContext(ResolveContextImpl);
@@ -16,11 +13,11 @@ pub struct ResolveContextImpl {
1613

1714
pub fragment: Option<String>,
1815

19-
/// Files that was found on file system
20-
pub file_dependencies: Option<Vec<PathBuf>>,
16+
/// Files that were found on file system
17+
pub file_dependencies: Option<Vec<ResolverPath>>,
2118

22-
/// Files that was found on file system
23-
pub missing_dependencies: Option<Vec<PathBuf>>,
19+
/// Files that were not found on file system
20+
pub missing_dependencies: Option<Vec<ResolverPath>>,
2421

2522
/// The current resolving alias for bailing recursion alias.
2623
pub resolving_alias: Option<String>,
@@ -62,15 +59,19 @@ impl ResolveContext {
6259
self.missing_dependencies.replace(vec![]);
6360
}
6461

65-
pub fn add_file_dependency(&mut self, dep: &Path) {
62+
// Accepts anything convertible to `ResolverPath`. The conversion (which
63+
// includes the `Arc<Path>` allocation for `&Path` / `PathBuf` callers, or
64+
// hash reuse for `&CachedPathImpl`) only runs inside the `Some` branch, so
65+
// `resolve()` calls without a context still pay zero.
66+
pub fn add_file_dependency<P: Into<ResolverPath>>(&mut self, dep: P) {
6667
if let Some(deps) = &mut self.file_dependencies {
67-
deps.push(dep.to_path_buf());
68+
deps.push(dep.into());
6869
}
6970
}
7071

71-
pub fn add_missing_dependency(&mut self, dep: &Path) {
72+
pub fn add_missing_dependency<P: Into<ResolverPath>>(&mut self, dep: P) {
7273
if let Some(deps) = &mut self.missing_dependencies {
73-
deps.push(dep.to_path_buf());
74+
deps.push(dep.into());
7475
}
7576
}
7677

src/lib.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ mod options;
5656
mod package_json;
5757
mod path;
5858
mod resolution;
59+
mod resolver_path;
5960
mod specifier;
6061
mod tsconfig;
6162

@@ -85,6 +86,7 @@ pub use crate::{
8586
},
8687
package_json::{JSONValue, ModuleType, PackageJson},
8788
resolution::Resolution,
89+
resolver_path::ResolverPath,
8890
};
8991
use crate::{
9092
cache::{Cache, CachedPath},
@@ -100,11 +102,11 @@ type ResolveResult = Result<Option<CachedPath>, ResolveError>;
100102
/// Context returned from the [Resolver::resolve_with_context] API
101103
#[derive(Debug, Default, Clone)]
102104
pub struct ResolveContext {
103-
/// Files that was found on file system
104-
pub file_dependencies: FxHashSet<PathBuf>,
105+
/// Files that were found on file system
106+
pub file_dependencies: FxHashSet<ResolverPath>,
105107

106-
/// Dependencies that was not found on file system
107-
pub missing_dependencies: FxHashSet<PathBuf>,
108+
/// Dependencies that were not found on file system
109+
pub missing_dependencies: FxHashSet<ResolverPath>,
108110
}
109111

110112
/// Resolver with the current operating system as the file system

src/resolver_path.rs

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
#[cfg(unix)]
2+
use std::os::unix::ffi::OsStrExt;
3+
use std::{
4+
fmt,
5+
hash::{Hash, Hasher},
6+
ops::Deref,
7+
path::{Path, PathBuf},
8+
sync::Arc,
9+
};
10+
11+
use rustc_hash::FxHasher;
12+
13+
/// A path returned in [`crate::ResolveContext`] dependencies, paired with a
14+
/// precomputed `FxHash` of the path bytes.
15+
///
16+
/// Downstream consumers (rspack) place these into hash collections keyed by
17+
/// the precomputed hash, avoiding repeated hashing of long absolute paths on
18+
/// every insert and lookup.
19+
///
20+
/// Hash and equality are kept aligned per platform so the standard
21+
/// `a == b ⇒ hash(a) == hash(b)` contract holds:
22+
/// - On Unix, both hash and equality compare the raw `OsStr` bytes (fast bulk
23+
/// `write`; matches the resolver's cache-side `Cache::value` hash).
24+
/// - On other platforms, both go through `Path` (component-walked hash and
25+
/// normalized equality), so e.g. `pack1/foo` and `pack1\\foo` are treated as
26+
/// the same dependency on Windows — same behavior `PathBuf` had before.
27+
#[derive(Clone)]
28+
pub struct ResolverPath {
29+
hash: u64,
30+
path: Arc<Path>,
31+
}
32+
33+
impl ResolverPath {
34+
pub fn new(path: Arc<Path>) -> Self {
35+
let hash = hash_path(&path);
36+
Self { hash, path }
37+
}
38+
39+
/// Construct without recomputing the hash.
40+
///
41+
/// # Precondition
42+
/// `hash` MUST equal `hash_path(path)`. Violating this breaks `HashSet`'s
43+
/// bucketing invariant — entries become unfindable and deduplication stops
44+
/// working. Not `unsafe` because the failure mode is a logic bug rather
45+
/// than UB.
46+
#[inline]
47+
pub(crate) fn from_parts(hash: u64, path: Arc<Path>) -> Self {
48+
Self { hash, path }
49+
}
50+
51+
#[inline]
52+
pub fn as_path(&self) -> &Path {
53+
&self.path
54+
}
55+
56+
#[inline]
57+
pub fn as_arc(&self) -> &Arc<Path> {
58+
&self.path
59+
}
60+
61+
#[inline]
62+
pub fn into_arc(self) -> Arc<Path> {
63+
self.path
64+
}
65+
66+
/// The precomputed `FxHash` of the path bytes.
67+
#[inline]
68+
pub fn precomputed_hash(&self) -> u64 {
69+
self.hash
70+
}
71+
}
72+
73+
/// Hash a path with `FxHasher`, matching the bytes-on-unix optimization used by
74+
/// the resolver's internal cache so [`ResolverPath`] values constructed from a
75+
/// `CachedPath` produce the same `u64` as values constructed from a `&Path`.
76+
#[inline]
77+
pub fn hash_path(path: &Path) -> u64 {
78+
let mut hasher = FxHasher::default();
79+
// The std `Path::hash` impl walks components (utf8 split + per-segment
80+
// write); a single bulk `write` of the raw bytes is materially cheaper on
81+
// the resolver hot path.
82+
#[cfg(unix)]
83+
hasher.write(path.as_os_str().as_bytes());
84+
#[cfg(not(unix))]
85+
path.hash(&mut hasher);
86+
hasher.finish()
87+
}
88+
89+
impl Hash for ResolverPath {
90+
#[inline]
91+
fn hash<H: Hasher>(&self, state: &mut H) {
92+
state.write_u64(self.hash);
93+
}
94+
}
95+
96+
impl PartialEq for ResolverPath {
97+
/// Mirror `hash_path`'s per-platform scheme so the `a == b ⇒ hash(a) ==
98+
/// hash(b)` invariant holds: raw `OsStr` bytes on Unix (matches the
99+
/// bulk-byte hash), component-normalized `Path::eq` elsewhere (matches
100+
/// `Path::hash`).
101+
fn eq(&self, other: &Self) -> bool {
102+
#[cfg(unix)]
103+
{
104+
self.path.as_os_str() == other.path.as_os_str()
105+
}
106+
#[cfg(not(unix))]
107+
{
108+
self.path == other.path
109+
}
110+
}
111+
}
112+
113+
impl Eq for ResolverPath {}
114+
115+
impl Deref for ResolverPath {
116+
type Target = Path;
117+
118+
fn deref(&self) -> &Self::Target {
119+
&self.path
120+
}
121+
}
122+
123+
impl AsRef<Path> for ResolverPath {
124+
fn as_ref(&self) -> &Path {
125+
&self.path
126+
}
127+
}
128+
129+
impl From<PathBuf> for ResolverPath {
130+
fn from(path: PathBuf) -> Self {
131+
Self::new(Arc::from(path))
132+
}
133+
}
134+
135+
impl From<&Path> for ResolverPath {
136+
fn from(path: &Path) -> Self {
137+
Self::new(Arc::from(path))
138+
}
139+
}
140+
141+
impl From<&PathBuf> for ResolverPath {
142+
fn from(path: &PathBuf) -> Self {
143+
Self::new(Arc::from(path.as_path()))
144+
}
145+
}
146+
147+
impl From<Arc<Path>> for ResolverPath {
148+
fn from(path: Arc<Path>) -> Self {
149+
Self::new(path)
150+
}
151+
}
152+
153+
impl fmt::Debug for ResolverPath {
154+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
155+
self.path.fmt(f)
156+
}
157+
}
158+
159+
#[cfg(test)]
160+
mod tests {
161+
use super::*;
162+
163+
#[test]
164+
fn hash_is_path_byte_hash() {
165+
let p: &Path = Path::new("/a/b/c.js");
166+
let rp = ResolverPath::from(p);
167+
assert_eq!(rp.precomputed_hash(), hash_path(p));
168+
}
169+
170+
#[test]
171+
fn equal_paths_have_equal_hashes() {
172+
let a = ResolverPath::from(PathBuf::from("/x/y"));
173+
let b = ResolverPath::from(Path::new("/x/y"));
174+
assert_eq!(a, b);
175+
assert_eq!(a.precomputed_hash(), b.precomputed_hash());
176+
}
177+
178+
#[test]
179+
fn writes_u64_into_hasher() {
180+
use std::{collections::HashSet, hash::BuildHasherDefault};
181+
182+
#[derive(Default)]
183+
struct IdHasher(u64);
184+
impl Hasher for IdHasher {
185+
fn write(&mut self, _: &[u8]) {
186+
unreachable!()
187+
}
188+
fn write_u64(&mut self, n: u64) {
189+
self.0 = n;
190+
}
191+
fn finish(&self) -> u64 {
192+
self.0
193+
}
194+
}
195+
196+
let mut set: HashSet<ResolverPath, BuildHasherDefault<IdHasher>> = HashSet::default();
197+
set.insert(ResolverPath::from(Path::new("/a/b")));
198+
assert!(set.contains(&ResolverPath::from(PathBuf::from("/a/b"))));
199+
}
200+
}

0 commit comments

Comments
 (0)