Skip to content

Commit d864fc4

Browse files
committed
work around AMD SVM NPT flush bug on hyper-v
Signed-off-by: Ludvig Liljenberg <4257730+ludfjig@users.noreply.github.com>
1 parent 39a4f8e commit d864fc4

File tree

2 files changed

+154
-4
lines changed

2 files changed

+154
-4
lines changed

src/hyperlight_host/src/hypervisor/virtual_machine/whp.rs

Lines changed: 153 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,145 @@ fn release_file_mapping(view_base: *mut c_void, mapping_handle: HandleWrapper) {
8686
}
8787
}
8888

89+
/// Workaround for a Hyper-V bug on AMD SVM where the `ObScrubPartition`
90+
/// path (invoked by `WHvResetPartition`) does not flush the Nested Page
91+
/// Table (NPT) TLB. After reset, stale GPA-to-HPA translations from
92+
/// the previous guest execution persist, causing the guest to read
93+
/// wrong physical memory. Intel is unaffected.
94+
///
95+
/// This bug most commonly surfaces as the `interrupt_same_thread_no_barrier`
96+
/// integration test failing on AMD Windows hosts. @ludfjig has verified
97+
/// the fix works on Windows Insider builds; this workaround can be
98+
/// removed once that fix ships in a released version of Windows.
99+
///
100+
/// The workaround unmaps and remaps a dummy page after each
101+
/// `WHvResetPartition`, which forces the hypervisor to invalidate
102+
/// NPT entries.
103+
mod npt_flush {
104+
use std::os::raw::c_void;
105+
106+
use hyperlight_common::mem::PAGE_SIZE_USIZE;
107+
use windows::Win32::Foundation::{CloseHandle, HANDLE, INVALID_HANDLE_VALUE};
108+
use windows::Win32::System::Hypervisor::*;
109+
use windows::Win32::System::Memory::{CreateFileMappingA, PAGE_READWRITE};
110+
use windows::core::PCSTR;
111+
112+
use super::WHvMapGpaRange2Func;
113+
use crate::hypervisor::surrogate_process::SurrogateProcess;
114+
use crate::hypervisor::virtual_machine::{CreateVmError, RegisterError};
115+
use crate::mem::layout::SandboxMemoryLayout;
116+
use crate::mem::memory_region::SurrogateMapping;
117+
118+
/// GPA for the dummy page. Placed just past the maximum snapshot
119+
/// region so it can never collide with guest memory.
120+
const GPA: u64 =
121+
(SandboxMemoryLayout::BASE_ADDRESS + SandboxMemoryLayout::MAX_MEMORY_SIZE) as u64;
122+
123+
#[derive(Debug)]
124+
pub(super) struct NptInvalidator {
125+
handle: HANDLE,
126+
surrogate_addr: *mut c_void,
127+
map_gpa_range2: WHvMapGpaRange2Func,
128+
}
129+
130+
impl NptInvalidator {
131+
const FLAGS: WHV_MAP_GPA_RANGE_FLAGS = WHvMapGpaRangeFlagRead;
132+
133+
/// Allocate a dummy page, map it into the surrogate process, and
134+
/// create the initial GPA mapping.
135+
pub(super) fn new(
136+
partition: WHV_PARTITION_HANDLE,
137+
surrogate_process: &mut SurrogateProcess,
138+
) -> Result<Self, CreateVmError> {
139+
let handle = unsafe {
140+
CreateFileMappingA(
141+
INVALID_HANDLE_VALUE,
142+
None,
143+
PAGE_READWRITE,
144+
0,
145+
PAGE_SIZE_USIZE as u32,
146+
PCSTR::null(),
147+
)
148+
.map_err(|e| CreateVmError::InitializeVm(e.into()))?
149+
};
150+
151+
let surrogate_addr = surrogate_process
152+
.map(
153+
handle.into(),
154+
0, // sentinel key; MapViewOfFile never returns 0
155+
PAGE_SIZE_USIZE,
156+
&SurrogateMapping::SandboxMemory,
157+
)
158+
.map_err(|e| CreateVmError::SurrogateProcess(e.to_string()))?;
159+
160+
let map_gpa_range2 = unsafe {
161+
super::try_load_whv_map_gpa_range2()
162+
.map_err(|e| CreateVmError::InitializeVm(e.into()))?
163+
};
164+
165+
let res = unsafe {
166+
map_gpa_range2(
167+
partition,
168+
surrogate_process.process_handle.into(),
169+
surrogate_addr,
170+
GPA,
171+
PAGE_SIZE_USIZE as u64,
172+
Self::FLAGS,
173+
)
174+
};
175+
if res.is_err() {
176+
return Err(CreateVmError::InitializeVm(
177+
windows_result::Error::from_hresult(res).into(),
178+
));
179+
}
180+
181+
Ok(Self {
182+
handle,
183+
surrogate_addr,
184+
map_gpa_range2,
185+
})
186+
}
187+
188+
/// Force an NPT TLB flush by unmapping and remapping the dummy page.
189+
pub(super) fn flush(
190+
&mut self,
191+
partition: WHV_PARTITION_HANDLE,
192+
surrogate_process: &SurrogateProcess,
193+
) -> std::result::Result<(), RegisterError> {
194+
unsafe {
195+
WHvUnmapGpaRange(partition, GPA, PAGE_SIZE_USIZE as u64)
196+
.map_err(|e| RegisterError::ResetPartition(e.into()))?;
197+
}
198+
let res = unsafe {
199+
(self.map_gpa_range2)(
200+
partition,
201+
surrogate_process.process_handle.into(),
202+
self.surrogate_addr,
203+
GPA,
204+
PAGE_SIZE_USIZE as u64,
205+
Self::FLAGS,
206+
)
207+
};
208+
if res.is_err() {
209+
return Err(RegisterError::ResetPartition(
210+
windows_result::Error::from_hresult(res).into(),
211+
));
212+
}
213+
Ok(())
214+
}
215+
}
216+
217+
impl Drop for NptInvalidator {
218+
fn drop(&mut self) {
219+
// The surrogate mapping is freed when SurrogateProcess drops;
220+
// we only need to close the file-mapping handle.
221+
if let Err(e) = unsafe { CloseHandle(self.handle) } {
222+
tracing::error!("Failed to close NptInvalidator handle: {:?}", e);
223+
}
224+
}
225+
}
226+
}
227+
89228
/// A Windows Hypervisor Platform implementation of a single-vcpu VM
90229
#[derive(Debug)]
91230
pub(crate) struct WhpVm {
@@ -98,15 +237,19 @@ pub(crate) struct WhpVm {
98237
/// Handle to the background timer (if started).
99238
#[cfg(feature = "hw-interrupts")]
100239
timer: Option<TimerThread>,
240+
/// Dummy page that is unmapped and remapped to force NPT TLB
241+
/// flushes after `WHvResetPartition` on AMD SVM hosts.
242+
/// See the [`npt_flush`] module for details.
243+
npt_flush: npt_flush::NptInvalidator,
101244
}
102245

103246
// Safety: `WhpVm` is !Send because it holds `SurrogateProcess` which contains a raw pointer
104247
// `allocated_address` (*mut c_void). This pointer represents a memory mapped view address
105248
// in the surrogate process. It is never dereferenced, only used for address arithmetic and
106249
// resource management (unmapping). This is a system resource that is not bound to the creating
107250
// thread and can be safely transferred between threads.
108-
// `file_mappings` contains raw pointers that are also kernel resource handles,
109-
// safe to use from any thread.
251+
// `file_mappings` and `NptInvalidator` contain raw pointers that are also kernel
252+
// resource handles, safe to use from any thread.
110253
unsafe impl Send for WhpVm {}
111254

112255
impl WhpVm {
@@ -144,16 +287,19 @@ impl WhpVm {
144287

145288
let mgr = get_surrogate_process_manager()
146289
.map_err(|e| CreateVmError::SurrogateProcess(e.to_string()))?;
147-
let surrogate_process = mgr
290+
let mut surrogate_process = mgr
148291
.get_surrogate_process()
149292
.map_err(|e| CreateVmError::SurrogateProcess(e.to_string()))?;
150293

294+
let npt_flush = npt_flush::NptInvalidator::new(partition, &mut surrogate_process)?;
295+
151296
Ok(WhpVm {
152297
partition,
153298
surrogate_process,
154299
file_mappings: Vec::new(),
155300
#[cfg(feature = "hw-interrupts")]
156301
timer: None,
302+
npt_flush,
157303
})
158304
}
159305

@@ -717,6 +863,10 @@ impl VirtualMachine for WhpVm {
717863
WHvResetPartition(self.partition)
718864
.map_err(|e| RegisterError::ResetPartition(e.into()))?;
719865

866+
// Flush NPT TLB (AMD SVM workaround, see npt_flush module).
867+
self.npt_flush
868+
.flush(self.partition, &self.surrogate_process)?;
869+
720870
// WHvResetPartition resets LAPIC to power-on defaults.
721871
// Re-initialize it when LAPIC emulation is active.
722872
//

src/hyperlight_host/src/mem/layout.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ impl SandboxMemoryLayout {
313313
/// Both the scratch region and the snapshot region are bounded by
314314
/// this size. The value is arbitrary but chosen to be large enough
315315
/// for most workloads while preventing accidental resource exhaustion.
316-
const MAX_MEMORY_SIZE: usize = (16 * 1024 * 1024 * 1024) - Self::BASE_ADDRESS; // 16 GiB - BASE_ADDRESS
316+
pub(crate) const MAX_MEMORY_SIZE: usize = (16 * 1024 * 1024 * 1024) - Self::BASE_ADDRESS; // 16 GiB - BASE_ADDRESS
317317

318318
/// The base address of the sandbox's memory.
319319
#[cfg(not(feature = "nanvix-unstable"))]

0 commit comments

Comments
 (0)