Skip to content

Commit 950a024

Browse files
committed
feat(memory): use fallocate(PUNCH_HOLE) for guest_memfd discard
MADV_DONTNEED is a no-op for MAP_SHARED mappings, which means discard_range() previously did nothing for guest_memfd-backed memory. This prevented virtio-mem unplug and balloon inflate from actually freeing physical pages back to the host when secret_free is enabled. Add a fallocate(FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE) path for MAP_SHARED file-backed regions, which punches holes in the guest_memfd backing file and releases the pages from the page cache. Signed-off-by: Jack Thomson <jackabt@amazon.com>
1 parent 102d665 commit 950a024

3 files changed

Lines changed: 33 additions & 8 deletions

File tree

resources/seccomp/aarch64-unknown-linux-musl.json

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,11 @@
217217
},
218218
{
219219
"syscall": "madvise",
220-
"comment": "Used by the VirtIO balloon device and by musl for some customer workloads. It is also used by aws-lc during random number generation. They setup a memory page that mark with MADV_WIPEONFORK to be able to detect forks. They also call it with -1 to see if madvise is supported in certain platforms."
220+
"comment": "Used by the VirtIO balloon device and by musl for some customer workloads. It is also used by aws-lc during random number generation. They setup a memory page that mark with MADV_WIPEONFORK to be able to detect forks. They also call it with -1 to see if madvise is supported in certain platforms."
221+
},
222+
{
223+
"syscall": "fallocate",
224+
"comment": "Used to punch holes in guest_memfd (MAP_SHARED) when discarding memory ranges, e.g. during virtio-mem unplug or balloon inflate with secret_free."
221225
},
222226
{
223227
"syscall": "msync",

resources/seccomp/x86_64-unknown-linux-musl.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,10 @@
219219
"syscall": "madvise",
220220
"comment": "Used by the VirtIO balloon device and by musl for some customer workloads. It is also used by aws-lc during random number generation. They setup a memory page that mark with MADV_WIPEONFORK to be able to detect forks. They also call it with -1 to see if madvise is supported in certain platforms."
221221
},
222+
{
223+
"syscall": "fallocate",
224+
"comment": "Used to punch holes in guest_memfd (MAP_SHARED) when discarding memory ranges, e.g. during virtio-mem unplug or balloon inflate with secret_free."
225+
},
222226
{
223227
"syscall": "msync",
224228
"comment": "Used by the VirtIO pmem device to sync the file content with the backing file.",

src/vmm/src/vstate/memory.rs

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -475,14 +475,31 @@ impl GuestRegionMmapExt {
475475
Ok(())
476476
}
477477
}
478-
// Match either the case of an anonymous mapping, or the case
479-
// of a shared file mapping.
480-
// TODO: madvise(MADV_DONTNEED) doesn't actually work with memfd
481-
// (or in general MAP_SHARED of a fd). In those cases we should use
482-
// fallocate64(FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE).
483-
// We keep falling to the madvise branch to keep the previous behaviour.
478+
// Guest_memfd (MAP_SHARED): use fallocate(PUNCH_HOLE) to free pages.
479+
(Some(fo), flags) if flags & libc::MAP_SHARED != 0 => {
480+
let file_off = fo.start() + caddr.raw_value() as u64;
481+
// SAFETY: fd and offset are valid, len is within the mapped region.
482+
let ret = unsafe {
483+
libc::fallocate(
484+
fo.file().as_raw_fd(),
485+
libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE,
486+
file_off as i64,
487+
len as i64,
488+
)
489+
};
490+
if ret < 0 {
491+
let os_error = std::io::Error::last_os_error();
492+
error!(
493+
"discard_range: fallocate(PUNCH_HOLE) failed: {:?}",
494+
os_error
495+
);
496+
Err(GuestMemoryError::IOError(os_error))
497+
} else {
498+
Ok(())
499+
}
500+
}
501+
// Anonymous memory: MADV_DONTNEED releases pages back to the kernel.
484502
_ => {
485-
// Madvise the region in order to mark it as not used.
486503
// SAFETY: The address and length are known to be valid.
487504
let ret = unsafe { libc::madvise(phys_address.cast(), len, libc::MADV_DONTNEED) };
488505
if ret < 0 {

0 commit comments

Comments
 (0)