Skip to content

Commit 49d9946

Browse files
authored
Create sandboxes directly from snapshots (#1459)
* Introduce HostFunctions newtype for sandbox construction Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> * Expose SandboxMemoryLayout fields to crate Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> * Capture host function metadata in Snapshot Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> * Make gdb work for already initialised snapshots Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> * Add MultiUseSandbox from_snapshot Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> * Add gdb test for MultiUseSandbox from_snapshot Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> * Add tests for MultiUseSandbox from_snapshot Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com> --------- Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com>
1 parent 5b2f06b commit 49d9946

14 files changed

Lines changed: 884 additions & 94 deletions

File tree

src/hyperlight_host/examples/guest-debugging/main.rs

Lines changed: 150 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,76 @@ mod tests {
115115
#[cfg(windows)]
116116
const GDB_COMMAND: &str = "gdb";
117117

118+
/// Construct the (out_file_path, cmd_file_path, manifest_dir)
119+
/// triple every gdb test needs.
120+
fn gdb_test_paths(name: &str) -> (String, String, String) {
121+
let out_dir = std::env::var("OUT_DIR").expect("Failed to get out dir");
122+
let manifest_dir = std::env::var("CARGO_MANIFEST_DIR")
123+
.expect("Failed to get manifest dir")
124+
.replace('\\', "/");
125+
let out_file_path = format!("{out_dir}/{name}.output");
126+
let cmd_file_path = format!("{out_dir}/{name}-commands.txt");
127+
(out_file_path, cmd_file_path, manifest_dir)
128+
}
129+
130+
/// Build a gdb script that connects to `port`, sets a single
131+
/// breakpoint at `breakpoint`, prints `echo_msg` when hit, and
132+
/// detaches before quitting.
133+
///
134+
/// The breakpoint commands end with `detach` + `quit` instead of
135+
/// `continue`. The previous "inner continue, outer continue, quit"
136+
/// shape races with the inferior exit. After the breakpoint hits
137+
/// and the inner `continue` resumes the guest, the guest may run
138+
/// to completion and the gdb stub may close the remote before gdb
139+
/// has dispatched the outer `continue`, producing a non-zero exit
140+
/// with `Remote connection closed`. Detaching from the breakpoint
141+
/// commands removes that window. The host process keeps running
142+
/// the guest call to completion on its own after detach.
143+
fn single_breakpoint_script(
144+
manifest_dir: &str,
145+
port: u16,
146+
out_file_path: &str,
147+
breakpoint: &str,
148+
echo_msg: &str,
149+
) -> String {
150+
let cmd = format!(
151+
"file {manifest_dir}/../tests/rust_guests/bin/debug/simpleguest
152+
target remote :{port}
153+
154+
set pagination off
155+
set logging file {out_file_path}
156+
set logging enabled on
157+
158+
break {breakpoint}
159+
commands
160+
echo \"{echo_msg}\\n\"
161+
backtrace
162+
163+
set logging enabled off
164+
detach
165+
quit
166+
end
167+
168+
continue
169+
"
170+
);
171+
#[cfg(windows)]
172+
let cmd = format!("set osabi none\n{cmd}");
173+
cmd
174+
}
175+
176+
/// Spawn the gdb client to execute the script in `cmd_file_path`.
177+
fn spawn_gdb_client(cmd_file_path: &str) -> std::process::Child {
178+
Command::new(GDB_COMMAND)
179+
.arg("-nx")
180+
.arg("--nw")
181+
.arg("--batch")
182+
.arg("-x")
183+
.arg(cmd_file_path)
184+
.spawn()
185+
.expect("Failed to start gdb")
186+
}
187+
118188
fn write_cmds_file(cmd_file_path: &str, cmd: &str) -> io::Result<()> {
119189
let file = File::create(cmd_file_path)?;
120190
let mut writer = BufWriter::new(file);
@@ -163,14 +233,7 @@ mod tests {
163233
// wait 3 seconds for the gdb to connect
164234
thread::sleep(Duration::from_secs(3));
165235

166-
let mut gdb = Command::new(GDB_COMMAND)
167-
.arg("-nx") // Don't load any .gdbinit files
168-
.arg("--nw")
169-
.arg("--batch")
170-
.arg("-x")
171-
.arg(cmd_file_path)
172-
.spawn()
173-
.map_err(|e| new_error!("Failed to start gdb process: {}", e))?;
236+
let mut gdb = spawn_gdb_client(cmd_file_path);
174237

175238
// wait 3 seconds for the gdb to connect
176239
thread::sleep(Duration::from_secs(10));
@@ -245,38 +308,16 @@ mod tests {
245308
#[test]
246309
#[serial]
247310
fn test_gdb_end_to_end() {
248-
let out_dir = std::env::var("OUT_DIR").expect("Failed to get out dir");
249-
let manifest_dir = std::env::var("CARGO_MANIFEST_DIR")
250-
.expect("Failed to get manifest dir")
251-
.replace('\\', "/");
252-
let out_file_path = format!("{out_dir}/gdb.output");
253-
let cmd_file_path = format!("{out_dir}/gdb-commands.txt");
254-
255-
let cmd = format!(
256-
"file {manifest_dir}/../tests/rust_guests/bin/debug/simpleguest
257-
target remote :8080
258-
259-
set pagination off
260-
set logging file {out_file_path}
261-
set logging enabled on
262-
263-
break hyperlight_main
264-
commands
265-
echo \"Stopped at hyperlight_main breakpoint\\n\"
266-
backtrace
267-
268-
set logging enabled off
269-
detach
270-
quit
271-
end
272-
273-
continue
274-
"
311+
let (out_file_path, cmd_file_path, manifest_dir) = gdb_test_paths("gdb");
312+
313+
let cmd = single_breakpoint_script(
314+
&manifest_dir,
315+
8080,
316+
&out_file_path,
317+
"hyperlight_main",
318+
"Stopped at hyperlight_main breakpoint",
275319
);
276320

277-
#[cfg(windows)]
278-
let cmd = format!("set osabi none\n{}", cmd);
279-
280321
let checker = |contents: String| contents.contains("Stopped at hyperlight_main breakpoint");
281322

282323
let result = run_guest_and_gdb(&cmd_file_path, &out_file_path, &cmd, checker);
@@ -288,13 +329,8 @@ mod tests {
288329
#[test]
289330
#[serial]
290331
fn test_gdb_sse_check() {
291-
let out_dir = std::env::var("OUT_DIR").expect("Failed to get out dir");
292-
let manifest_dir = std::env::var("CARGO_MANIFEST_DIR")
293-
.expect("Failed to get manifest dir")
294-
.replace('\\', "/");
332+
let (out_file_path, cmd_file_path, manifest_dir) = gdb_test_paths("gdb-sse");
295333
println!("manifest dir {manifest_dir}");
296-
let out_file_path = format!("{out_dir}/gdb-sse.output");
297-
let cmd_file_path = format!("{out_dir}/gdb-sse--commands.txt");
298334

299335
let cmd = format!(
300336
"file {manifest_dir}/../tests/rust_guests/bin/debug/simpleguest
@@ -330,4 +366,74 @@ mod tests {
330366
cleanup(&out_file_path, &cmd_file_path);
331367
assert!(result.is_ok(), "{}", result.unwrap_err());
332368
}
369+
370+
#[test]
371+
#[serial]
372+
fn test_gdb_from_snapshot() {
373+
use hyperlight_host::HostFunctions;
374+
375+
const PORT: u16 = 8081;
376+
377+
let (out_file_path, cmd_file_path, manifest_dir) = gdb_test_paths("gdb-from-snapshot");
378+
379+
// Build a sandbox the normal way and snapshot it in-memory.
380+
let mut producer: MultiUseSandbox = UninitializedSandbox::new(
381+
hyperlight_host::GuestBinary::FilePath(
382+
hyperlight_testing::simple_guest_as_string().unwrap(),
383+
),
384+
None,
385+
)
386+
.unwrap()
387+
.evolve()
388+
.unwrap();
389+
let snap = producer.snapshot().unwrap();
390+
391+
// Order matters. The gdb stub event loop must enter (i.e.
392+
// `VcpuStopped` must be sent on the channel) before the gdb
393+
// client connects, otherwise the wire protocol desyncs. The
394+
// evolve case gets this for free because `evolve()` runs
395+
// `vm.initialise()` which trips the entry breakpoint
396+
// immediately. For a `Call` snapshot `vm.initialise` is a
397+
// no-op, so we trigger the breakpoint by running `sbox.call`
398+
// here before the client is launched below.
399+
let snap_thread = snap.clone();
400+
let sandbox_thread = thread::spawn(move || -> Result<()> {
401+
let mut cfg = SandboxConfiguration::default();
402+
cfg.set_guest_debug_info(DebugInfo { port: PORT });
403+
404+
let mut sbox =
405+
MultiUseSandbox::from_snapshot(snap_thread, HostFunctions::default(), Some(cfg))?;
406+
sbox.call::<i32>(
407+
"PrintOutput",
408+
"Hello from a from_snapshot sandbox\n".to_string(),
409+
)?;
410+
Ok(())
411+
});
412+
413+
// Wait for the sandbox thread to bind the listener, install
414+
// the one-shot breakpoint, and trip it.
415+
thread::sleep(Duration::from_secs(3));
416+
417+
let cmd = single_breakpoint_script(
418+
&manifest_dir,
419+
PORT,
420+
&out_file_path,
421+
"main.rs:simpleguest::print_output",
422+
"Stopped at print_output breakpoint",
423+
);
424+
write_cmds_file(&cmd_file_path, &cmd).expect("Failed to write gdb commands");
425+
426+
let mut gdb = spawn_gdb_client(&cmd_file_path);
427+
let _ = gdb.wait();
428+
let sandbox_result = sandbox_thread
429+
.join()
430+
.expect("from_snapshot sandbox thread panicked");
431+
432+
let checker = |contents: String| contents.contains("Stopped at print_output breakpoint");
433+
let result = check_output(&out_file_path, checker);
434+
435+
cleanup(&out_file_path, &cmd_file_path);
436+
sandbox_result.expect("from_snapshot sandbox returned error");
437+
result.expect("gdb output missing expected breakpoint hit");
438+
}
333439
}

src/hyperlight_host/src/func/host_functions.rs

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ impl Registerable for UninitializedSandbox {
5252
return_type: Output::TYPE,
5353
};
5454

55-
(*hfs).register_host_function(name.to_string(), entry)
55+
(*hfs).register_host_function(name.to_string(), entry);
56+
Ok(())
5657
}
5758
}
5859

@@ -92,7 +93,31 @@ impl Registerable for crate::MultiUseSandbox {
9293
return_type: Output::TYPE,
9394
};
9495

95-
(*hfs).register_host_function(name.to_string(), entry)
96+
(*hfs).register_host_function(name.to_string(), entry);
97+
98+
// Registration mutates the host-function set captured in
99+
// snapshots. Invalidate the cached snapshot so the next
100+
// `snapshot()` call reflects the updated registry.
101+
self.snapshot = None;
102+
Ok(())
103+
}
104+
}
105+
106+
impl Registerable for crate::HostFunctions {
107+
fn register_host_function<Args: ParameterTuple, Output: SupportedReturnType>(
108+
&mut self,
109+
name: &str,
110+
hf: impl Into<HostFunction<Output, Args>>,
111+
) -> Result<()> {
112+
let entry = FunctionEntry {
113+
function: hf.into().into(),
114+
parameter_types: Args::TYPE,
115+
return_type: Output::TYPE,
116+
};
117+
118+
self.inner_mut()
119+
.register_host_function(name.to_string(), entry);
120+
Ok(())
96121
}
97122
}
98123

@@ -236,7 +261,7 @@ pub(crate) fn register_host_function<Args: ParameterTuple, Output: SupportedRetu
236261
.host_funcs
237262
.try_lock()
238263
.map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))?
239-
.register_host_function(name.to_string(), entry)?;
264+
.register_host_function(name.to_string(), entry);
240265

241266
Ok(())
242267
}

src/hyperlight_host/src/hypervisor/gdb/arch.rs

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,8 @@ pub(crate) const DR6_HW_BP_FLAGS_MASK: u64 = 0x0F << DR6_HW_BP_FLAGS_POS;
6161
/// Determine the reason the vCPU stopped
6262
/// This is done by checking the DR6 register and the exception id
6363
pub(crate) fn vcpu_stop_reason(
64-
vm: &mut dyn DebuggableVm,
64+
vm: &dyn DebuggableVm,
6565
dr6: u64,
66-
entrypoint: u64,
6766
exception: u32,
6867
) -> std::result::Result<VcpuStopReason, VcpuStopReasonError> {
6968
let CommonRegisters { rip, .. } = vm.regs()?;
@@ -81,10 +80,6 @@ pub(crate) fn vcpu_stop_reason(
8180
// Check page 19-4 Vol. 3B of Intel 64 and IA-32
8281
// Architectures Software Developer's Manual
8382
if DR6_HW_BP_FLAGS_MASK & dr6 != 0 {
84-
if rip == entrypoint {
85-
vm.remove_hw_breakpoint(entrypoint)?;
86-
return Ok(VcpuStopReason::EntryPointBp);
87-
}
8883
return Ok(VcpuStopReason::HwBp);
8984
}
9085
}
@@ -98,12 +93,10 @@ pub(crate) fn vcpu_stop_reason(
9893
r"The vCPU exited because of an unknown reason:
9994
rip: {:?}
10095
dr6: {:?}
101-
entrypoint: {:?}
10296
exception: {:?}
10397
",
10498
rip,
10599
dr6,
106-
entrypoint,
107100
exception,
108101
);
109102

src/hyperlight_host/src/hypervisor/gdb/event_loop.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@ impl run_blocking::BlockingEventLoop for GdbBlockingEventLoop {
5959
// Resume execution if unknown reason for stop
6060
let stop_response = match stop_reason {
6161
VcpuStopReason::DoneStep => BaseStopReason::DoneStep,
62-
VcpuStopReason::EntryPointBp => BaseStopReason::HwBreak(()),
6362
VcpuStopReason::SwBp => BaseStopReason::SwBreak(()),
6463
VcpuStopReason::HwBp => BaseStopReason::HwBreak(()),
6564
// This is a consequence of the GDB client sending an interrupt signal

src/hyperlight_host/src/hypervisor/gdb/mod.rs

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -171,10 +171,6 @@ impl DebugMemoryAccess {
171171
pub enum VcpuStopReason {
172172
Crash,
173173
DoneStep,
174-
/// Hardware breakpoint inserted by the hypervisor so the guest can be stopped
175-
/// at the entry point. This is used to avoid the guest from executing
176-
/// the entry point code before the debugger is connected
177-
EntryPointBp,
178174
HwBp,
179175
SwBp,
180176
Interrupt,

src/hyperlight_host/src/hypervisor/hyperlight_vm/mod.rs

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,13 @@ pub(crate) struct HyperlightVm {
398398
pub(super) gdb_conn: Option<DebugCommChannel<DebugResponse, DebugMsg>>,
399399
#[cfg(gdb)]
400400
pub(super) sw_breakpoints: HashMap<u64, u8>, // addr -> original instruction
401+
/// One-shot hw breakpoint installed at the entry address when gdb is
402+
/// enabled, so the gdb stub gets a `VcpuStopped` to enter its event
403+
/// loop on the first vCPU run after construction. Cleared by the
404+
/// `VmExit::Debug` arm of `run` the first time a `HwBp` stop fires
405+
/// at the entry address.
406+
#[cfg(gdb)]
407+
pub(super) one_shot_entry_bp: Option<u64>,
401408
#[cfg(feature = "mem_profile")]
402409
pub(super) trace_info: MemTraceInfo,
403410
#[cfg(crashdump)]
@@ -654,17 +661,28 @@ impl HyperlightVm {
654661
match exit_reason {
655662
#[cfg(gdb)]
656663
Ok(VmExit::Debug { dr6, exception }) => {
657-
let initialise = match self.entrypoint {
658-
NextAction::Initialise(initialise) => initialise,
659-
_ => 0,
660-
};
661-
// Handle debug event (breakpoints)
664+
// Classify the debug exit. `vcpu_stop_reason` is a
665+
// pure classifier and has no side effects on the VM.
662666
let stop_reason = crate::hypervisor::gdb::arch::vcpu_stop_reason(
663-
self.vm.as_mut(),
667+
self.vm.as_ref(),
664668
dr6,
665-
initialise,
666669
exception,
667670
)?;
671+
// Remove the one-shot entry breakpoint installed by
672+
// `HyperlightVm::new` the first time it fires so it
673+
// does not interfere with later user-installed
674+
// breakpoints at the same address.
675+
if matches!(stop_reason, VcpuStopReason::HwBp)
676+
&& let Some(entry_addr) = self.one_shot_entry_bp
677+
{
678+
let rip = self.vm.regs().map_err(VcpuStopReasonError::GetRegs)?.rip;
679+
if rip == entry_addr {
680+
self.vm
681+
.remove_hw_breakpoint(entry_addr)
682+
.map_err(VcpuStopReasonError::RemoveHwBreakpoint)?;
683+
self.one_shot_entry_bp = None;
684+
}
685+
}
668686
if let Err(e) = self.handle_debug(dbg_mem_access_fn.clone(), stop_reason) {
669687
break Err(e.into());
670688
}

0 commit comments

Comments
 (0)