Skip to content

Commit 6381639

Browse files
stormslowlyCopilot
andauthored
perf(alias): replace strip_prefix loop in load_alias with a byte trie (#227)
Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
1 parent b138142 commit 6381639

2 files changed

Lines changed: 335 additions & 27 deletions

File tree

src/alias_trie.rs

Lines changed: 272 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,272 @@
1+
//! Byte-trie accelerator for [`crate::ResolveOptions::alias`] matching.
2+
//!
3+
//! Replaces the linear `strip_prefix` scan in `load_alias` with a trie walk:
4+
//! descend the trie one byte of the specifier at a time, collect terminal
5+
//! aliases along the way, and yield them in the order they were originally
6+
//! declared.
7+
8+
use crate::Alias;
9+
10+
/// A successful alias-key match against a specifier.
11+
#[derive(Debug, PartialEq, Eq)]
12+
pub struct AliasMatch {
13+
/// Index of the entry in the original `Alias` vec, used to preserve the
14+
/// `aliases.iter()` ordering callers rely on.
15+
pub(crate) index: usize,
16+
/// Length in bytes of the matched key (after stripping any `$` suffix).
17+
/// Lets callers compute the specifier tail without re-running the prefix.
18+
pub(crate) key_len: usize,
19+
/// True for `$`-suffixed keys — caller should treat the match as exact.
20+
pub(crate) is_exact: bool,
21+
}
22+
23+
/// Aliases ending at a trie node. In the rare case multiple aliases share the
24+
/// same key string, the loader is expected to try them in declared order.
25+
type TerminalList = Vec<Terminal>;
26+
27+
#[derive(Debug)]
28+
struct Terminal {
29+
alias_index: usize,
30+
key_len: usize,
31+
is_exact: bool,
32+
}
33+
34+
#[derive(Debug, Default)]
35+
struct Node {
36+
/// Sparse children indexed by edge byte. Low fanout in practice, linear
37+
/// scan beats a `[Option<...>; 256]` for memory and cache locality.
38+
children: Vec<(u8, Box<Self>)>,
39+
terminals: TerminalList,
40+
}
41+
42+
impl Node {
43+
fn descend(&self, byte: u8) -> Option<&Self> {
44+
self
45+
.children
46+
.iter()
47+
.find_map(|(b, n)| (*b == byte).then(|| n.as_ref()))
48+
}
49+
50+
fn descend_mut_or_insert(&mut self, byte: u8) -> &mut Self {
51+
if let Some(pos) = self.children.iter().position(|(b, _)| *b == byte) {
52+
return &mut self.children[pos].1;
53+
}
54+
self.children.push((byte, Box::new(Self::default())));
55+
&mut self.children.last_mut().unwrap().1
56+
}
57+
}
58+
59+
pub struct AliasTrie {
60+
root: Node,
61+
}
62+
63+
impl AliasTrie {
64+
pub(crate) fn build(aliases: &Alias) -> Self {
65+
let mut root = Node::default();
66+
for (index, (key, _)) in aliases.iter().enumerate() {
67+
// `$`-suffixed keys are exact-match aliases — index by the stripped key.
68+
let (effective, is_exact) = key
69+
.strip_suffix('$')
70+
.map_or((key.as_str(), false), |stripped| (stripped, true));
71+
let mut node = &mut root;
72+
for byte in effective.as_bytes() {
73+
node = node.descend_mut_or_insert(*byte);
74+
}
75+
node.terminals.push(Terminal {
76+
alias_index: index,
77+
key_len: effective.len(),
78+
is_exact,
79+
});
80+
}
81+
Self { root }
82+
}
83+
84+
pub(crate) fn matches(&self, specifier: &str) -> Vec<AliasMatch> {
85+
let bytes = specifier.as_bytes();
86+
let mut out = Vec::new();
87+
collect_terminals(&self.root, bytes, 0, &mut out);
88+
let mut node = &self.root;
89+
for (i, byte) in bytes.iter().enumerate() {
90+
let Some(next) = node.descend(*byte) else {
91+
break;
92+
};
93+
node = next;
94+
collect_terminals(node, bytes, i + 1, &mut out);
95+
}
96+
// Trie walk yields matches by key length; callers expect declared order so
97+
// they can try AliasValue lists in the order the user wrote them.
98+
if out.len() > 1 {
99+
out.sort_unstable_by_key(|m| m.index);
100+
}
101+
out
102+
}
103+
}
104+
105+
fn collect_terminals(node: &Node, bytes: &[u8], consumed: usize, out: &mut Vec<AliasMatch>) {
106+
if node.terminals.is_empty() {
107+
return;
108+
}
109+
let tail = &bytes[consumed..];
110+
let tail_empty = tail.is_empty();
111+
let tail_slash = matches!(tail.first(), Some(b'/' | b'\\'));
112+
for term in &node.terminals {
113+
let acceptable = if term.is_exact {
114+
tail_empty
115+
} else {
116+
tail_empty || tail_slash
117+
};
118+
if acceptable {
119+
out.push(AliasMatch {
120+
index: term.alias_index,
121+
key_len: term.key_len,
122+
is_exact: term.is_exact,
123+
});
124+
}
125+
}
126+
}
127+
128+
#[cfg(test)]
129+
mod tests {
130+
use super::*;
131+
use crate::AliasValue;
132+
133+
fn aliases(entries: &[(&str, &[&str])]) -> Alias {
134+
entries
135+
.iter()
136+
.map(|(k, vs)| {
137+
(
138+
(*k).to_string(),
139+
vs.iter().map(|v| AliasValue::from(*v)).collect(),
140+
)
141+
})
142+
.collect()
143+
}
144+
145+
#[test]
146+
fn empty_trie_yields_no_matches() {
147+
let aliases: Alias = Vec::new();
148+
let trie = AliasTrie::build(&aliases);
149+
let matches = trie.matches("anything");
150+
assert!(matches.is_empty(), "expected no matches, got {matches:?}");
151+
}
152+
153+
#[test]
154+
fn matches_prefix_key_with_trailing_slash() {
155+
// Alias "react" matches specifier "react/foo" (prefix + slash).
156+
let aliases = aliases(&[("react", &["./src/react"])]);
157+
let trie = AliasTrie::build(&aliases);
158+
let matches = trie.matches("react/foo");
159+
assert_eq!(
160+
matches,
161+
vec![AliasMatch {
162+
index: 0,
163+
key_len: 5,
164+
is_exact: false
165+
}]
166+
);
167+
}
168+
169+
#[test]
170+
fn matches_prefix_key_with_exact_specifier() {
171+
// Alias "react" also matches the bare specifier "react".
172+
let aliases = aliases(&[("react", &["./src/react"])]);
173+
let trie = AliasTrie::build(&aliases);
174+
let matches = trie.matches("react");
175+
assert_eq!(matches.len(), 1);
176+
assert_eq!(matches[0].index, 0);
177+
}
178+
179+
#[test]
180+
fn rejects_prefix_followed_by_non_slash() {
181+
// "react-dom" must NOT match the "react" prefix alias — tail starts with
182+
// `-`, failing the SLASH_START filter.
183+
let aliases = aliases(&[("react", &["./src/react"])]);
184+
let trie = AliasTrie::build(&aliases);
185+
let matches = trie.matches("react-dom");
186+
assert!(matches.is_empty(), "expected no matches, got {matches:?}");
187+
}
188+
189+
#[test]
190+
fn accepts_prefix_followed_by_backslash() {
191+
// SLASH_START accepts `\\` too (Windows paths).
192+
let aliases = aliases(&[("react", &["./src/react"])]);
193+
let trie = AliasTrie::build(&aliases);
194+
let matches = trie.matches("react\\foo");
195+
assert_eq!(matches.len(), 1);
196+
assert_eq!(matches[0].index, 0);
197+
}
198+
199+
#[test]
200+
fn empty_alias_key_matches_slash_prefixed_specifier() {
201+
// `("", v)` is enhanced-resolve's emergent "match any slash-prefixed
202+
// specifier" wildcard. The trie must report it without consuming any
203+
// bytes of the specifier.
204+
let aliases = aliases(&[("", &["./redirect"])]);
205+
let trie = AliasTrie::build(&aliases);
206+
let matches = trie.matches("/foo");
207+
assert_eq!(
208+
matches,
209+
vec![AliasMatch {
210+
index: 0,
211+
key_len: 0,
212+
is_exact: false
213+
}]
214+
);
215+
}
216+
217+
#[test]
218+
fn matches_preserve_declared_order_long_before_short() {
219+
// Same path can match both aliases. The caller (load_alias) tries entries
220+
// in declared order until one succeeds — so the trie must return them in
221+
// that order even when the trie naturally encounters the shorter key
222+
// first during the walk.
223+
let aliases = aliases(&[("a/long/path", &["alpha"]), ("a", &["bravo"])]);
224+
let trie = AliasTrie::build(&aliases);
225+
let matches = trie.matches("a/long/path/foo");
226+
let indices: Vec<_> = matches.iter().map(|m| m.index).collect();
227+
assert_eq!(indices, vec![0, 1], "got {matches:?}");
228+
}
229+
230+
#[test]
231+
fn duplicate_keys_return_all_terminals_in_declared_order() {
232+
// Same key string registered multiple times lands on the same trie node
233+
// with multiple `Terminal` entries (see `TerminalList`). `matches` must
234+
// surface every duplicate and keep them in declared order, so the loader
235+
// can try each AliasValue list in the order the user wrote them.
236+
let aliases = aliases(&[
237+
("react", &["./alpha"]), // idx=0
238+
("other", &["./unrelated"]), // idx=1, interleaved to ensure sort isn't a no-op
239+
("react", &["./bravo"]), // idx=2
240+
("react", &["./charlie"]), // idx=3
241+
]);
242+
let trie = AliasTrie::build(&aliases);
243+
let matches = trie.matches("react/foo");
244+
let indices: Vec<_> = matches.iter().map(|m| m.index).collect();
245+
assert_eq!(indices, vec![0, 2, 3], "got {matches:?}");
246+
assert!(
247+
matches.iter().all(|m| m.key_len == 5 && !m.is_exact),
248+
"all duplicates of `react` must share key_len=5 and is_exact=false, got {matches:?}"
249+
);
250+
}
251+
252+
#[test]
253+
fn dollar_exact_key_rejects_specifier_with_tail() {
254+
// "b$" is exact-match for "b" only; "b/index" must NOT match.
255+
let aliases = aliases(&[("b$", &["a/index"])]);
256+
let trie = AliasTrie::build(&aliases);
257+
let with_tail = trie.matches("b/index");
258+
assert!(
259+
with_tail.is_empty(),
260+
"exact-match alias should not accept tail, got {with_tail:?}"
261+
);
262+
let exact = trie.matches("b");
263+
assert_eq!(
264+
exact,
265+
vec![AliasMatch {
266+
index: 0,
267+
key_len: 1,
268+
is_exact: true
269+
}]
270+
);
271+
}
272+
}

0 commit comments

Comments
 (0)