Skip to content

Commit 5f3ecf2

Browse files
committed
Fix hangs caused by interaction between Salsa and indexer/diagnostics
1 parent 9b0e552 commit 5f3ecf2

2 files changed

Lines changed: 85 additions & 1 deletion

File tree

crates/ark/src/lsp/main_loop.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1121,10 +1121,14 @@ pub(crate) fn diagnostics_refresh_all(state: WorldState) {
11211121
continue;
11221122
}
11231123

1124+
// The task sits in the indexer queue off the main loop.
1125+
// `legacy_snapshot()` hands it a detached oak db so it can't pin the
1126+
// live one against the main loop's next `set_*` (diagnostics read only
1127+
// non-oak state).
11241128
INDEXER_QUEUE
11251129
.send(IndexerQueueTask::Diagnostics(RefreshDiagnosticsTask {
11261130
uri: uri.clone(),
1127-
state: state.clone(),
1131+
state: state.legacy_snapshot(),
11281132
}))
11291133
.unwrap_or_else(|err| lsp::log_error!("Failed to queue diagnostics refresh: {err}"));
11301134
}

crates/ark/src/lsp/state.rs

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,28 @@ impl WorldState {
9393
Err(anyhow!("Can't find document for URI {uri}"))
9494
}
9595
}
96+
97+
/// Copy the world state for a background handler that does not query oak.
98+
///
99+
/// The copy gets a fresh, empty `OakDatabase` instead of a handle to the
100+
/// live one. A salsa db handle held off the main loop blocks the next
101+
/// `set_*` on the owner: the setter waits for `clones == 1`, and an idle
102+
/// handle (parked in the indexer queue, or held by a handler blocked in
103+
/// `r_task`) never drops on its own.
104+
///
105+
/// This is the snapshot for the non-salsa handlers (diagnostics,
106+
/// indexing) that read only the plain `WorldState` fields. A salsa-based
107+
/// handler that queries oak off the main loop needs a different snapshot,
108+
/// one that keeps the live db handle and runs its queries under
109+
/// cancellation (catch `Cancelled`, don't span `r_task`), so it sees real
110+
/// oak data. That's what the `legacy_` prefix warns: don't reach for this
111+
/// from oak-querying code.
112+
pub(crate) fn legacy_snapshot(&self) -> WorldState {
113+
WorldState {
114+
oak: OakDatabase::new(),
115+
..self.clone()
116+
}
117+
}
96118
}
97119

98120
pub(crate) fn with_document<T, F>(
@@ -132,3 +154,61 @@ pub(crate) fn workspace_uris(state: &WorldState) -> Vec<Url> {
132154
let uris: Vec<Url> = state.documents.iter().map(|elt| elt.0.clone()).collect();
133155
uris
134156
}
157+
158+
#[cfg(test)]
159+
mod tests {
160+
use std::sync::mpsc;
161+
use std::sync::Arc;
162+
use std::sync::Barrier;
163+
use std::time::Duration;
164+
165+
use oak_db::OakDatabase;
166+
use oak_scan::DbExt;
167+
use oak_semantic::library::Library;
168+
169+
use super::WorldState;
170+
171+
/// A legacy background snapshot must not pin the oak db against a
172+
/// main-loop mutation.
173+
///
174+
/// salsa reclaims `&mut` access for a setter by raising the cancellation
175+
/// flag and then blocking on `clones == 1`. That flag only frees a clone
176+
/// whose thread is inside a running query and notices it. A snapshot that
177+
/// sits idle (parked in the indexer queue, or held by a `spawn_blocking`
178+
/// handler blocked in `r_task`) never notices, so the next setter on the
179+
/// owner blocks until the snapshot drops. This test parks a snapshot with
180+
/// no query running and asserts a setter on the owner still completes.
181+
#[test]
182+
fn legacy_snapshot_does_not_pin_oak_against_mutation() {
183+
let mut state = WorldState::new(Library::new(vec![]), OakDatabase::new());
184+
185+
let snapshot = state.legacy_snapshot();
186+
187+
// Park the snapshot with no salsa query running, then hold it until
188+
// the main thread has finished timing the mutation.
189+
let release = Arc::new(Barrier::new(2));
190+
let held = {
191+
let release = Arc::clone(&release);
192+
std::thread::spawn(move || {
193+
let _snapshot = snapshot;
194+
release.wait();
195+
})
196+
};
197+
198+
let (tx, rx) = mpsc::channel();
199+
let mutator = std::thread::spawn(move || {
200+
state.oak.set_library_paths(&[]);
201+
let _ = tx.send(());
202+
});
203+
204+
let completed = rx.recv_timeout(Duration::from_secs(2)).is_ok();
205+
206+
// Release the parked snapshot so a blocked mutator can finish and both
207+
// threads join, regardless of the outcome.
208+
release.wait();
209+
held.join().unwrap();
210+
mutator.join().unwrap();
211+
212+
assert!(completed);
213+
}
214+
}

0 commit comments

Comments
 (0)