Skip to content

Commit 1af7f3e

Browse files
kalyazinclaude
andcommitted
feat(gdb): stop all vCPUs on a debug event (all-stop)
When one vCPU stopped at a debug event the others kept running, so querying a running vCPU (info threads, per-vCPU backtraces) blocked indefinitely. Pause the sibling vCPUs on every stop (initial entry stop, breakpoint stops, and Ctrl-C), like QEMU's all-stop, reusing the existing per-vCPU pause (which kicks a running or halted vCPU out of KVM_RUN). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> Signed-off-by: Nikita Kalyazin <nikita.kalyazin@e2b.dev>
1 parent 5660c31 commit 1af7f3e

2 files changed

Lines changed: 35 additions & 2 deletions

File tree

src/vmm/src/gdb/event_loop.rs

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ pub fn event_loop(
2424
gdb_event_receiver: Receiver<usize>,
2525
entry_addr: GuestAddress,
2626
) {
27-
let target = FirecrackerTarget::new(vmm, gdb_event_receiver, entry_addr);
27+
let mut target = FirecrackerTarget::new(vmm, gdb_event_receiver, entry_addr);
2828
let connection: Box<dyn ConnectionExt<Error = std::io::Error>> = { Box::new(connection) };
2929
let debugger = GdbStub::new(connection);
3030

@@ -34,6 +34,13 @@ pub fn event_loop(
3434
.recv()
3535
.expect("Error getting initial gdb event");
3636

37+
// All-stop: the initial breakpoint only stops the triggering vCPU; halt the
38+
// others too so the whole VM is stopped when gdb attaches (this initial stop is
39+
// consumed here rather than in `wait_for_stop_reason`).
40+
target
41+
.pause_all_vcpus()
42+
.expect("Error pausing vcpus on initial stop");
43+
3744
gdb_event_loop_thread(debugger, target);
3845
}
3946

@@ -85,6 +92,13 @@ impl run_blocking::BlockingEventLoop for GdbBlockingEventLoop {
8592
continue;
8693
};
8794

95+
// All-stop: halt the still-running sibling vCPUs so GDB sees a
96+
// fully-stopped VM. Without this, querying a running vCPU (e.g.
97+
// `info threads`) blocks indefinitely.
98+
target
99+
.pause_all_vcpus()
100+
.map_err(WaitForStopReasonError::Target)?;
101+
88102
trace!("Returned stop reason to gdb: {stop_response:?}");
89103
return Ok(run_blocking::Event::TargetStopped(stop_response));
90104
}
@@ -112,7 +126,9 @@ impl run_blocking::BlockingEventLoop for GdbBlockingEventLoop {
112126
// notify the target that a ctrl-c interrupt has occurred.
113127
let main_core = vcpuid_to_tid(0)?;
114128

115-
target.pause_vcpu(main_core)?;
129+
// All-stop: pause every vCPU, not just the main one, so the whole VM is
130+
// halted while GDB inspects it.
131+
target.pause_all_vcpus()?;
116132
target.set_paused_vcpu(main_core);
117133

118134
let exit_reason = MultiThreadStopReason::SignalWithThread {

src/vmm/src/gdb/target.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,23 @@ impl FirecrackerTarget {
233233
Ok(())
234234
}
235235

236+
/// Pauses every vcpu that is still running so the whole VM is stopped while GDB
237+
/// is in control (all-stop semantics, as QEMU does via `pause_all_vcpus`). The
238+
/// vcpu that triggered the stop is already paused; this halts its still-running
239+
/// siblings so they can be enumerated and inspected (`info threads`, `thread N`,
240+
/// per-vCPU backtraces). `send_event` kicks a running or halted vcpu out of
241+
/// `KVM_RUN`, so this completes even for idle siblings.
242+
pub fn pause_all_vcpus(&mut self) -> Result<(), GdbTargetError> {
243+
for cpu_id in 0..self.vcpu_state.len() {
244+
if !self.vcpu_state[cpu_id].paused {
245+
let tid = vcpuid_to_tid(cpu_id)?;
246+
self.pause_vcpu(tid)?;
247+
}
248+
}
249+
250+
Ok(())
251+
}
252+
236253
/// Resets all Vcpus to their base state
237254
fn reset_all_vcpu_states(&mut self) {
238255
for value in self.vcpu_state.iter_mut() {

0 commit comments

Comments
 (0)