Skip to content

Commit 5660c31

Browse files
kalyazinclaude
andcommitted
feat(gdb): start the gdb server when restoring from a snapshot
Upstream wires gdb only into the boot path; restored microVMs never started the gdb server. Accept a gdb_socket_path restore-time override on the load-snapshot request (alongside network_overrides and clock_realtime) and wire attach_debug_info + gdb_thread into build_microvm_from_snapshot (x86_64), arming the entry breakpoint at the restored vCPU RIP so gdb takes control at the resume point. Carrying the socket on LoadSnapshotParams keeps it a pure restore-time knob: no machine-config update is needed before the load (which would forbid the snapshot load), and there is no boot-time value to preserve across restore. persist sets the restored machine config's gdb_socket_path from the load param, which the snapshot builder reads. Also add resolve_gdb_socket_path() with a FIRECRACKER_GDB_SOCKET env fallback, so launchers that cannot set the load request can still enable gdb. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> Signed-off-by: Nikita Kalyazin <nikita.kalyazin@e2b.dev>
1 parent 295d67b commit 5660c31

7 files changed

Lines changed: 97 additions & 4 deletions

File tree

src/firecracker/src/api_server/request/snapshot.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,8 @@ fn parse_put_snapshot_load(body: &Body) -> Result<ParsedRequest, RequestError> {
119119
resume_vm: snapshot_config.resume_vm,
120120
network_overrides: snapshot_config.network_overrides,
121121
clock_realtime: snapshot_config.clock_realtime,
122+
#[cfg(feature = "gdb")]
123+
gdb_socket_path: snapshot_config.gdb_socket_path,
122124
};
123125

124126
// Construct the `ParsedRequest` object.
@@ -198,6 +200,8 @@ mod tests {
198200
resume_vm: false,
199201
network_overrides: vec![],
200202
clock_realtime: false,
203+
#[cfg(feature = "gdb")]
204+
gdb_socket_path: None,
201205
};
202206
let mut parsed_request = parse_put_snapshot(&Body::new(body), Some("load")).unwrap();
203207
assert!(
@@ -230,6 +234,8 @@ mod tests {
230234
resume_vm: false,
231235
network_overrides: vec![],
232236
clock_realtime: false,
237+
#[cfg(feature = "gdb")]
238+
gdb_socket_path: None,
233239
};
234240
let mut parsed_request = parse_put_snapshot(&Body::new(body), Some("load")).unwrap();
235241
assert!(
@@ -262,6 +268,8 @@ mod tests {
262268
resume_vm: true,
263269
network_overrides: vec![],
264270
clock_realtime: false,
271+
#[cfg(feature = "gdb")]
272+
gdb_socket_path: None,
265273
};
266274
let mut parsed_request = parse_put_snapshot(&Body::new(body), Some("load")).unwrap();
267275
assert!(
@@ -303,6 +311,8 @@ mod tests {
303311
host_dev_name: String::from("vmtap2"),
304312
}],
305313
clock_realtime: false,
314+
#[cfg(feature = "gdb")]
315+
gdb_socket_path: None,
306316
};
307317
let mut parsed_request = parse_put_snapshot(&Body::new(body), Some("load")).unwrap();
308318
assert!(
@@ -332,6 +342,8 @@ mod tests {
332342
resume_vm: true,
333343
network_overrides: vec![],
334344
clock_realtime: false,
345+
#[cfg(feature = "gdb")]
346+
gdb_socket_path: None,
335347
};
336348
let parsed_request = parse_put_snapshot(&Body::new(body), Some("load")).unwrap();
337349
assert_eq!(
@@ -435,6 +447,8 @@ mod tests {
435447
resume_vm: false,
436448
network_overrides: vec![],
437449
clock_realtime: false,
450+
#[cfg(feature = "gdb")]
451+
gdb_socket_path: None,
438452
};
439453
let mut parsed_request = parse_put_snapshot(&Body::new(body), Some("load")).unwrap();
440454
assert!(

src/firecracker/swagger/firecracker.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1758,6 +1758,13 @@ definitions:
17581758
elapsed since the snapshot was taken. When false (default), kvmclock resumes
17591759
from where it was at snapshot time. This option may be extended to other clock
17601760
sources and CPU architectures in the future."
1761+
gdb_socket_path:
1762+
type: string
1763+
description:
1764+
"Only available when Firecracker is built with the `gdb` feature. When set,
1765+
start the GDB server on this unix socket for the restored guest, for
1766+
source-level debugging of the guest kernel. Debug builds only; not for
1767+
production."
17611768

17621769

17631770
TokenBucket:

src/vmm/src/builder.rs

Lines changed: 58 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,18 @@ impl std::convert::From<linux_loader::cmdline::Error> for StartMicrovmError {
133133
}
134134
}
135135

136+
/// Resolves the GDB unix socket path. An explicit `machine-config.gdb_socket_path`
137+
/// takes precedence; otherwise fall back to the `FIRECRACKER_GDB_SOCKET` environment
138+
/// variable. The env fallback lets tooling that launches Firecracker (e.g. the e2b
139+
/// orchestrator / resume-build, which inherit the environment) enable GDB without
140+
/// setting machine-config.
141+
#[cfg(feature = "gdb")]
142+
fn resolve_gdb_socket_path(configured: &Option<String>) -> Option<String> {
143+
configured
144+
.clone()
145+
.or_else(|| std::env::var("FIRECRACKER_GDB_SOCKET").ok())
146+
}
147+
136148
/// Builds and starts a microVM based on the current Firecracker VmResources configuration.
137149
///
138150
/// The built microVM and all the created vCPUs start off in the paused state.
@@ -343,9 +355,16 @@ pub fn build_microvm_for_boot(
343355
.map_err(VmmError::VcpuStart)?;
344356

345357
#[cfg(feature = "gdb")]
346-
if let Some(gdb_socket_path) = &vm_resources.machine_config.gdb_socket_path {
347-
gdb::gdb_thread(vmm.clone(), gdb_rx, entry_point.entry_addr, gdb_socket_path)
348-
.map_err(StartMicrovmError::GdbServer)?;
358+
if let Some(gdb_socket_path) =
359+
resolve_gdb_socket_path(&vm_resources.machine_config.gdb_socket_path)
360+
{
361+
gdb::gdb_thread(
362+
vmm.clone(),
363+
gdb_rx,
364+
entry_point.entry_addr,
365+
&gdb_socket_path,
366+
)
367+
.map_err(StartMicrovmError::GdbServer)?;
349368
} else {
350369
debug!("No GDB socket provided not starting gdb server.");
351370
}
@@ -528,6 +547,31 @@ pub fn build_microvm_from_snapshot(
528547
page_size: vm_resources.machine_config.huge_pages.page_size(),
529548
};
530549

550+
// GDB debug support for restored microVMs (x86_64 only). Mirror the boot
551+
// path: attach the debug-event channel to every restored vCPU before they
552+
// start, then start the GDB server thread once the vCPUs are running. The
553+
// server arms a hardware breakpoint at the restored instruction pointer so
554+
// GDB takes control at the resume point on the first continue.
555+
//
556+
// Only wire the channel up when a GDB socket is actually configured: with no
557+
// socket, no server thread drains the receiver, so a vCPU debug event would
558+
// `send` on a dropped receiver and panic. Gating the attach keeps the channel
559+
// paired with its consumer (and leaves the vCPUs' gdb_event as None otherwise).
560+
#[cfg(all(feature = "gdb", target_arch = "x86_64"))]
561+
let gdb_socket_path =
562+
resolve_gdb_socket_path(&vm_resources.machine_config.gdb_socket_path);
563+
564+
#[cfg(all(feature = "gdb", target_arch = "x86_64"))]
565+
let gdb_rx = if gdb_socket_path.is_some() {
566+
let (gdb_tx, gdb_rx) = mpsc::channel();
567+
vcpus
568+
.iter_mut()
569+
.for_each(|vcpu| vcpu.attach_debug_info(gdb_tx.clone()));
570+
Some(gdb_rx)
571+
} else {
572+
None
573+
};
574+
531575
// Move vcpus to their own threads and start their state machine in the 'Paused' state.
532576
vmm.start_vcpus(
533577
vcpus,
@@ -540,6 +584,17 @@ pub fn build_microvm_from_snapshot(
540584
let vmm = Arc::new(Mutex::new(vmm));
541585
event_manager.add_subscriber(vmm.clone());
542586

587+
#[cfg(all(feature = "gdb", target_arch = "x86_64"))]
588+
if let Some(gdb_socket_path) = gdb_socket_path {
589+
// On restore the vCPUs resume at their saved RIP; arm the entry
590+
// breakpoint there so GDB stops at the resume point.
591+
let entry_addr = GuestAddress(microvm_state.vcpu_states[0].regs.rip);
592+
gdb::gdb_thread(vmm.clone(), gdb_rx.unwrap(), entry_addr, &gdb_socket_path)
593+
.map_err(StartMicrovmError::GdbServer)?;
594+
} else {
595+
debug!("No GDB socket provided not starting gdb server.");
596+
}
597+
543598
// Load seccomp filters for the VMM thread.
544599
// Keep this as the last step of the building process.
545600
crate::seccomp::apply_filter(

src/vmm/src/persist/mod.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -394,8 +394,11 @@ pub fn restore_from_snapshot(
394394
cpu_template: Some(microvm_state.vm_info.cpu_template),
395395
track_dirty_pages: Some(track_dirty_pages),
396396
huge_pages: Some(microvm_state.vm_info.huge_pages),
397+
// GDB socket is a restore-time override carried on the load request,
398+
// applied here so the restore-path gdb server (which reads
399+
// machine_config.gdb_socket_path) starts.
397400
#[cfg(feature = "gdb")]
398-
gdb_socket_path: None,
401+
gdb_socket_path: params.gdb_socket_path.clone(),
399402
})
400403
.map_err(BuildMicrovmFromSnapshotError::VmUpdateConfig)?;
401404

src/vmm/src/rpc_interface.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1443,6 +1443,8 @@ mod tests {
14431443
resume_vm: false,
14441444
network_overrides: vec![],
14451445
clock_realtime: false,
1446+
#[cfg(feature = "gdb")]
1447+
gdb_socket_path: None,
14461448
},
14471449
)));
14481450
check_unsupported(runtime_request(VmmAction::SetEntropyDevice(

src/vmm/src/vmm_config/snapshot.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,10 @@ pub struct LoadSnapshotParams {
7676
/// advancing kvmclock by the wall-clock time elapsed since the snapshot was taken. When false
7777
/// (default), kvmclock resumes from where it was at snapshot time.
7878
pub clock_realtime: bool,
79+
/// [gdb] When set, start the GDB server on this unix socket for the restored
80+
/// guest. A restore-time override (not configured via machine-config).
81+
#[cfg(feature = "gdb")]
82+
pub gdb_socket_path: Option<String>,
7983
}
8084

8185
/// Stores the configuration for loading a snapshot that is provided by the user.
@@ -108,6 +112,10 @@ pub struct LoadSnapshotConfig {
108112
/// [x86_64 only] When set to true, passes `KVM_CLOCK_REALTIME` to `KVM_SET_CLOCK` on restore.
109113
#[serde(default)]
110114
pub clock_realtime: bool,
115+
/// [gdb] Unix socket path for the GDB server (debug builds only).
116+
#[cfg(feature = "gdb")]
117+
#[serde(default)]
118+
pub gdb_socket_path: Option<String>,
111119
}
112120

113121
/// Stores the configuration used for managing snapshot memory.

src/vmm/tests/integration_tests.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,8 @@ fn verify_load_snapshot(snapshot_file: TempFile, memory_file: TempFile) {
304304
resume_vm: true,
305305
network_overrides: vec![],
306306
clock_realtime: false,
307+
#[cfg(feature = "gdb")]
308+
gdb_socket_path: None,
307309
}))
308310
.unwrap();
309311

@@ -390,6 +392,8 @@ fn verify_load_snap_disallowed_after_boot_resources(res: VmmAction, res_name: &s
390392
resume_vm: false,
391393
network_overrides: vec![],
392394
clock_realtime: false,
395+
#[cfg(feature = "gdb")]
396+
gdb_socket_path: None,
393397
});
394398
let err = preboot_api_controller.handle_preboot_request(req);
395399
assert!(

0 commit comments

Comments
 (0)