Skip to content

Commit a4bf205

Browse files
committed
fix: per-async-chunk BFS to correctly handle multi-origin shared modules
Previously the BFS used a shared visited set with single-origin tracking per module. This was buggy when a module was reachable from multiple async chunks whose ancestors differ: - Module M lives in chunks [P1, P2] - P1 is ancestor of async chunk A1, P2 is ancestor of A2 - A1's BFS reaches M first, records extract from P1 - A2's BFS skips M via visited, never records extract from P2 - Result: cycle between A2 and P2 remains Fix: run an independent BFS per async chunk with its own visited set. Each module is analyzed in the context of the async chunk actually traversing it, so extractions for different ancestor→async edges don't collide. Complexity goes from O(N+E+A·G) back to O(A·(N+E+G)), but correctness trumps the marginal speedup — A is usually small. Also add a note about the Phase 1 over-inclusion of function-scoped import()s; accepted as safe because extraction only fires when real cross-chunk static cycles exist.
1 parent 1af5bfb commit a4bf205

1 file changed

Lines changed: 67 additions & 70 deletions

File tree

crates/rspack_plugin_esm_library/src/optimize_chunks.rs

Lines changed: 67 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,14 @@ pub(crate) fn extract_tla_shared_modules(compilation: &mut Compilation) -> bool
3030

3131
// Phase 1: Detect at-risk async chunks.
3232
// Source: modules with `has_top_level_await`.
33-
// Their TOP-LEVEL `AsyncDependenciesBlock`s (we only iterate `module.get_blocks()`,
34-
// which returns blocks attached to the module itself, not nested function blocks)
35-
// target async chunk groups loaded via `await import()`. Those chunk groups'
36-
// chunks are the ones at risk of deadlocking with the parent.
33+
// We iterate `module.get_blocks()` (blocks attached directly to the module).
34+
// This is conservative: a function-scoped `import()` also shows up here when
35+
// the parser attaches its block to the module. We accept that over-inclusion
36+
// — extraction only actually happens in later phases when there's a real
37+
// cross-chunk static dependency back to an ancestor chunk, so function-only
38+
// imports without cycles incur only a cheap BFS and no chunk mutation.
39+
// Precise filtering would need AST-level "is the import() directly awaited
40+
// at module top level" info which is not exposed on the block.
3741
let mut async_chunks_set: FxHashSet<ChunkUkey> = FxHashSet::default();
3842
for (module_id, module) in module_graph.modules() {
3943
if !module.build_meta().has_top_level_await {
@@ -71,15 +75,17 @@ pub(crate) fn extract_tla_shared_modules(compilation: &mut Compilation) -> bool
7175
return false;
7276
}
7377

74-
// Phase 2: Reverse map `ancestor_chunk → {async_chunks that consider it an ancestor}`.
75-
// This replaces per-async-chunk repeated ancestor walks with one O(1) lookup
76-
// during BFS.
78+
// Phase 2 + 3: For each at-risk async chunk, compute its ancestor chunks and
79+
// run a BFS through static outgoing edges. Each async chunk uses its OWN
80+
// `visited` set so that a module reachable from multiple async chunks is
81+
// analyzed once per origin — this matters when a shared module lives in
82+
// DIFFERENT ancestor chunks for different async chunks (e.g. multi-entry
83+
// builds where an async chunk hangs off each entry).
7784
let async_chunks: Vec<ChunkUkey> = async_chunks_set.iter().copied().collect();
78-
let mut chunk_to_ancestor_of: FxHashMap<ChunkUkey, FxHashSet<ChunkUkey>> = FxHashMap::default();
79-
let mut module_to_async_chunk: IdentifierMap<ChunkUkey> = IdentifierMap::default();
80-
let mut queue: VecDeque<ModuleIdentifier> = VecDeque::new();
85+
let mut modules_to_extract: IdentifierMap<FxHashSet<ChunkUkey>> = IdentifierMap::default();
8186

8287
for &async_chunk_ukey in &async_chunks {
88+
// Collect ancestor chunks via chunk group parent traversal
8389
let chunk = compilation
8490
.build_chunk_graph_artifact
8591
.chunk_by_ukey
@@ -89,77 +95,68 @@ pub(crate) fn extract_tla_shared_modules(compilation: &mut Compilation) -> bool
8995
let group = chunk_group_by_ukey.expect_get(group_ukey);
9096
ancestor_groups.extend(group.ancestors(chunk_group_by_ukey));
9197
}
98+
let mut ancestor_chunks: FxHashSet<ChunkUkey> = FxHashSet::default();
9299
for g in &ancestor_groups {
93-
for &ancestor_chunk in &chunk_group_by_ukey.expect_get(g).chunks {
94-
chunk_to_ancestor_of
95-
.entry(ancestor_chunk)
96-
.or_default()
97-
.insert(async_chunk_ukey);
98-
}
99-
}
100-
101-
// Seed BFS with modules in this async chunk
102-
for &m in chunk_graph.get_chunk_modules_identifier(&async_chunk_ukey) {
103-
module_to_async_chunk.insert(m, async_chunk_ukey);
104-
queue.push_back(m);
100+
ancestor_chunks.extend(chunk_group_by_ukey.expect_get(g).chunks.iter().copied());
105101
}
106-
}
107102

108-
// Phase 3: Single BFS with shared `visited`. For each module visited, walk all
109-
// active outgoing connections (skipping `DynamicImport` — those produce
110-
// promises and don't require the target chunk to be fully executed in the
111-
// current evaluation frame). If the target is in an ancestor chunk of this
112-
// module's owning async chunk, record it for extraction.
113-
let mut modules_to_extract: IdentifierMap<FxHashSet<ChunkUkey>> = IdentifierMap::default();
114-
let mut visited = IdentifierSet::default();
115-
116-
while let Some(module_id) = queue.pop_front() {
117-
if !visited.insert(module_id) {
103+
if ancestor_chunks.is_empty() {
118104
continue;
119105
}
120-
let Some(&origin_async_chunk) = module_to_async_chunk.get(&module_id) else {
121-
continue;
122-
};
123106

124-
for conn in module_graph.get_outgoing_connections(&module_id) {
125-
// Dynamic imports don't cause sync-load deadlocks — they hand out a
126-
// promise; skip.
127-
let dep = module_graph.dependency_by_id(&conn.dependency_id);
128-
if dep.dependency_type() == &DependencyType::DynamicImport {
129-
continue;
130-
}
131-
if !conn.is_target_active(
132-
module_graph,
133-
None,
134-
&compilation.module_graph_cache_artifact,
135-
&compilation
136-
.build_module_graph_artifact
137-
.side_effects_state_artifact,
138-
&compilation.exports_info_artifact,
139-
) {
107+
// Per-async-chunk BFS with a local `visited` set. Starts from all modules
108+
// that live in this async chunk.
109+
let mut visited = IdentifierSet::default();
110+
let mut queue: VecDeque<ModuleIdentifier> = chunk_graph
111+
.get_chunk_modules_identifier(&async_chunk_ukey)
112+
.iter()
113+
.copied()
114+
.collect();
115+
116+
while let Some(module_id) = queue.pop_front() {
117+
if !visited.insert(module_id) {
140118
continue;
141119
}
142-
let Some(target) = module_graph.module_identifier_by_dependency_id(&conn.dependency_id)
143-
else {
144-
continue;
145-
};
146-
let target_chunks = chunk_graph.get_module_chunks(*target);
147120

148-
for &target_chunk in target_chunks {
149-
if let Some(ancestor_of) = chunk_to_ancestor_of.get(&target_chunk)
150-
&& ancestor_of.contains(&origin_async_chunk)
151-
{
152-
modules_to_extract
153-
.entry(*target)
154-
.or_default()
155-
.insert(target_chunk);
121+
for conn in module_graph.get_outgoing_connections(&module_id) {
122+
// Dynamic imports produce promises; the target is not required to have
123+
// finished evaluating in the current frame, so no sync-load deadlock.
124+
let dep = module_graph.dependency_by_id(&conn.dependency_id);
125+
if dep.dependency_type() == &DependencyType::DynamicImport {
126+
continue;
127+
}
128+
if !conn.is_target_active(
129+
module_graph,
130+
None,
131+
&compilation.module_graph_cache_artifact,
132+
&compilation
133+
.build_module_graph_artifact
134+
.side_effects_state_artifact,
135+
&compilation.exports_info_artifact,
136+
) {
137+
continue;
138+
}
139+
let Some(target) = module_graph.module_identifier_by_dependency_id(&conn.dependency_id)
140+
else {
141+
continue;
142+
};
143+
let target_chunks = chunk_graph.get_module_chunks(*target);
144+
145+
// If the target lives in a chunk that is an ancestor of THIS async
146+
// chunk, extract it from that specific ancestor chunk.
147+
for &target_chunk in target_chunks {
148+
if ancestor_chunks.contains(&target_chunk) {
149+
modules_to_extract
150+
.entry(*target)
151+
.or_default()
152+
.insert(target_chunk);
153+
}
156154
}
157-
}
158155

159-
// Continue BFS if target is inside the same async chunk boundary
160-
if target_chunks.contains(&origin_async_chunk) {
161-
module_to_async_chunk.insert(*target, origin_async_chunk);
162-
queue.push_back(*target);
156+
// Continue BFS if target is inside THIS async chunk boundary
157+
if target_chunks.contains(&async_chunk_ukey) {
158+
queue.push_back(*target);
159+
}
163160
}
164161
}
165162
}

0 commit comments

Comments
 (0)