Skip to content

Commit bf6c99b

Browse files
committed
fix(memory): punch holes for shared discard ranges
Use fallocate(PUNCH_HOLE|KEEP_SIZE) for MAP_SHARED file-backed guest memory so memfd-backed balloon hinting/reporting clears the shared backing instead of only dropping PTEs.
1 parent 639196c commit bf6c99b

1 file changed

Lines changed: 53 additions & 6 deletions

File tree

src/vmm/src/vstate/memory.rs

Lines changed: 53 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
use std::fs::File;
99
use std::io::SeekFrom;
1010
use std::ops::Deref;
11+
use std::os::fd::AsRawFd;
1112
use std::sync::{Arc, Mutex};
1213

1314
use bitvec::vec::BitVec;
@@ -420,12 +421,32 @@ impl GuestRegionMmapExt {
420421
Ok(())
421422
}
422423
}
423-
// Match either the case of an anonymous mapping, or the case
424-
// of a shared file mapping.
425-
// TODO: madvise(MADV_DONTNEED) doesn't actually work with memfd
426-
// (or in general MAP_SHARED of a fd). In those cases we should use
427-
// fallocate64(FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE).
428-
// We keep falling to the madvise branch to keep the previous behaviour.
424+
(Some(file_offset), flags) if flags & libc::MAP_SHARED != 0 => {
425+
let Some(offset) = file_offset.start().checked_add(caddr.raw_value()) else {
426+
return Err(GuestMemoryError::InvalidGuestAddress(GuestAddress(
427+
caddr.raw_value(),
428+
)));
429+
};
430+
431+
// SAFETY: fd, offset and length are validated by the kernel. The
432+
// file is kept alive by the FileOffset held by this mmap region.
433+
let ret = unsafe {
434+
libc::fallocate(
435+
file_offset.file().as_raw_fd(),
436+
libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE,
437+
i64::try_from(offset).expect("shared discard offset fits in i64"),
438+
i64::try_from(len).expect("shared discard length fits in i64"),
439+
)
440+
};
441+
if ret < 0 {
442+
let os_error = std::io::Error::last_os_error();
443+
error!("discard_range: fallocate failed: {:?}", os_error);
444+
Err(GuestMemoryError::IOError(os_error))
445+
} else {
446+
Ok(())
447+
}
448+
}
449+
// Anonymous mapping.
429450
_ => {
430451
// Madvise the region in order to mark it as not used.
431452
// SAFETY: The address and length are known to be valid.
@@ -1462,6 +1483,32 @@ mod tests {
14621483
);
14631484
}
14641485

1486+
#[test]
1487+
fn test_discard_range_on_shared_memfd() {
1488+
let page_size: usize = 0x1000;
1489+
let (mem, _file) = memfd_backed(
1490+
&[(GuestAddress(0), 2 * page_size)],
1491+
false,
1492+
HugePageConfig::None,
1493+
)
1494+
.unwrap();
1495+
let mem = into_region_ext(mem);
1496+
1497+
let ones = vec![1u8; 2 * page_size];
1498+
mem.write(&ones, GuestAddress(0)).unwrap();
1499+
1500+
mem.discard_range(GuestAddress(0), page_size).unwrap();
1501+
1502+
let mut actual_page = vec![0u8; page_size];
1503+
mem.read(actual_page.as_mut_slice(), GuestAddress(0))
1504+
.unwrap();
1505+
assert_eq!(vec![0u8; page_size], actual_page);
1506+
1507+
mem.read(actual_page.as_mut_slice(), GuestAddress(page_size as u64))
1508+
.unwrap();
1509+
assert_eq!(vec![1u8; page_size], actual_page);
1510+
}
1511+
14651512
/// Verifies that `slots_intersecting_range` returns the correct slots for
14661513
/// ranges at slot boundaries, interior to a slot, and spanning two slots.
14671514
#[test]

0 commit comments

Comments
 (0)