Skip to content

Commit 431f1fc

Browse files
committed
fix(memory): use madvise(MADV_REMOVE) to discard memfd ranges
Replace fallocate(PUNCH_HOLE) with madvise(MADV_REMOVE) for the memfd-backed (MAP_SHARED) memory discard path. The critical difference is that madvise(MADV_REMOVE) calls userfaultfd_remove() on the VMA before issuing the fallocate, which delivers a UFFD_EVENT_REMOVE to any userfaultfd registered on that VMA. fallocate(PUNCH_HOLE) called directly on the file descriptor does not go through this path and produces no uffd event. Without the event, a uffd handler cannot learn that the pages have been freed and may serve stale data on subsequent faults in the discarded range. Signed-off-by: Nikita Kalyazin <nikita.kalyazin@e2b.dev>
1 parent 3a19026 commit 431f1fc

1 file changed

Lines changed: 18 additions & 22 deletions

File tree

src/vmm/src/vstate/memory.rs

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,7 @@ pub use vm_memory::{
2222
GuestUsize, MemoryRegionAddress, MmapRegion, address,
2323
};
2424
use vm_memory::{GuestMemoryError, GuestMemoryRegionBytes, VolatileSlice, WriteVolatile};
25-
use vmm_sys_util::fallocate::FallocateMode;
26-
use vmm_sys_util::{errno, fallocate};
25+
use vmm_sys_util::errno;
2726

2827
use crate::utils::{get_page_size, u64_to_usize, usize_to_u64};
2928
use crate::vmm_config::machine_config::HugePageConfig;
@@ -441,26 +440,23 @@ impl GuestRegionMmapExt {
441440
}
442441
}
443442
// If we back memory over memfd we have a file mapped shared.
444-
(Some(file_offset), flags) if flags & libc::MAP_SHARED != 0 => {
445-
let Some(offset) = file_offset.start().checked_add(caddr.raw_value()) else {
446-
return Err(GuestMemoryError::InvalidGuestAddress(GuestAddress(
447-
caddr.raw_value(),
448-
)));
449-
};
450-
451-
fallocate::fallocate(
452-
file_offset.file(),
453-
FallocateMode::PunchHole,
454-
true,
455-
offset,
456-
usize_to_u64(len),
457-
)
458-
.map_err(|err| {
459-
error!("discard_range: punching hole failed: {err:?}");
460-
GuestMemoryError::IOError(err.into())
461-
})?;
462-
463-
Ok(())
443+
(Some(_), flags) if flags & libc::MAP_SHARED != 0 => {
444+
// MADV_REMOVE punches a hole in the underlying file (equivalent to
445+
// fallocate PUNCH_HOLE) and simultaneously frees the physical pages from
446+
// the page cache. This is the correct primitive for MAP_SHARED
447+
// file-backed mappings such as memfd.
448+
//
449+
// SAFETY: `phys_address` points to a valid host virtual address range of
450+
// `len` bytes belonging to this memory region, with `len` and the address
451+
// both page-aligned (verified above).
452+
let ret = unsafe { libc::madvise(phys_address.cast(), len, libc::MADV_REMOVE) };
453+
if ret < 0 {
454+
let os_error = std::io::Error::last_os_error();
455+
error!("discard_range: madvise failed: {:?}", os_error);
456+
Err(GuestMemoryError::IOError(os_error))
457+
} else {
458+
Ok(())
459+
}
464460
}
465461
// Anonymous mapping.
466462
_ => {

0 commit comments

Comments
 (0)