Skip to content

Commit bd85e43

Browse files
ValentaTomaskalyazin
authored andcommitted
fix(memory): punch holes for shared discard ranges
Use fallocate(PUNCH_HOLE|KEEP_SIZE) for MAP_SHARED file-backed guest memory so memfd-backed balloon hinting/reporting clears the shared backing instead of only dropping PTEs. Signed-off-by: Babis Chalios <babis.chalios@e2b.dev>
1 parent b222edc commit bd85e43

2 files changed

Lines changed: 115 additions & 10 deletions

File tree

src/vmm/src/test_utils/mod.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
#![allow(missing_docs)]
55

6+
use std::fs::File;
67
use std::sync::{Arc, Mutex};
78

89
use vm_memory::{GuestAddress, GuestRegionCollection};
@@ -53,6 +54,22 @@ pub fn multi_region_mem(regions: &[(GuestAddress, usize)]) -> GuestMemoryMmap {
5354
.unwrap()
5455
}
5556

57+
/// Creates a [`GuestMemoryMmap`] with multiple regions and without dirty page tracking.
58+
pub fn multi_region_mem_memfd(
59+
regions: &[(GuestAddress, usize)],
60+
huge_page_cfg: HugePageConfig,
61+
) -> (GuestMemoryMmap, Arc<File>) {
62+
let (reg, file) = memory::memfd_backed(regions, false, huge_page_cfg).unwrap();
63+
let mem = GuestRegionCollection::from_regions(
64+
reg.into_iter()
65+
.map(|region| GuestRegionMmapExt::dram_from_mmap_region(region, 0))
66+
.collect(),
67+
)
68+
.unwrap();
69+
70+
(mem, file)
71+
}
72+
5673
pub fn multi_region_mem_raw(regions: &[(GuestAddress, usize)]) -> Vec<GuestRegionMmap> {
5774
memory::anonymous(regions.iter().copied(), false, HugePageConfig::None)
5875
.expect("Cannot initialize memory")

src/vmm/src/vstate/memory.rs

Lines changed: 98 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ pub use vm_memory::{
2222
GuestUsize, MemoryRegionAddress, MmapRegion, address,
2323
};
2424
use vm_memory::{GuestMemoryError, GuestMemoryRegionBytes, VolatileSlice, WriteVolatile};
25-
use vmm_sys_util::errno;
25+
use vmm_sys_util::fallocate::FallocateMode;
26+
use vmm_sys_util::{errno, fallocate};
2627

2728
use crate::utils::{get_page_size, u64_to_usize, usize_to_u64};
2829
use crate::vmm_config::machine_config::HugePageConfig;
@@ -411,8 +412,7 @@ impl GuestRegionMmapExt {
411412

412413
let phys_address = self.get_host_address(caddr)?;
413414
match (self.inner.file_offset(), self.inner.flags()) {
414-
// If and only if we are resuming from a snapshot file, we have a file and it's mapped
415-
// private
415+
// If we are resuming from a snapshot file, we have a file and it's mapped private
416416
(Some(_), flags) if flags & libc::MAP_PRIVATE != 0 => {
417417
// Mmap a new anonymous region over the present one in order to create a hole
418418
// with zero pages.
@@ -440,12 +440,29 @@ impl GuestRegionMmapExt {
440440
Ok(())
441441
}
442442
}
443-
// Match either the case of an anonymous mapping, or the case
444-
// of a shared file mapping.
445-
// TODO: madvise(MADV_DONTNEED) doesn't actually work with memfd
446-
// (or in general MAP_SHARED of a fd). In those cases we should use
447-
// fallocate64(FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE).
448-
// We keep falling to the madvise branch to keep the previous behaviour.
443+
// If we back memory over memfd we have a file mapped shared.
444+
(Some(file_offset), flags) if flags & libc::MAP_SHARED != 0 => {
445+
let Some(offset) = file_offset.start().checked_add(caddr.raw_value()) else {
446+
return Err(GuestMemoryError::InvalidGuestAddress(GuestAddress(
447+
caddr.raw_value(),
448+
)));
449+
};
450+
451+
fallocate::fallocate(
452+
file_offset.file(),
453+
FallocateMode::PunchHole,
454+
true,
455+
offset,
456+
usize_to_u64(len),
457+
)
458+
.map_err(|err| {
459+
error!("discard_range: punching hole failed: {err:?}");
460+
GuestMemoryError::IOError(err.into())
461+
})?;
462+
463+
Ok(())
464+
}
465+
// Anonymous mapping.
449466
_ => {
450467
// Madvise the region in order to mark it as not used.
451468
// SAFETY: The address and length are known to be valid.
@@ -893,7 +910,7 @@ mod tests {
893910

894911
use super::*;
895912
use crate::snapshot::Snapshot;
896-
use crate::test_utils::single_region_mem;
913+
use crate::test_utils::{multi_region_mem_memfd, single_region_mem};
897914
use crate::utils::{get_page_size, mib_to_bytes};
898915
use crate::vstate::memory::test_utils::into_region_ext;
899916

@@ -1482,6 +1499,77 @@ mod tests {
14821499
);
14831500
}
14841501

1502+
fn check_mem_contents(mem: &GuestMemoryMmap, offset: usize, expected: &[u8]) {
1503+
let addr = GuestAddress(usize_to_u64(offset));
1504+
let mut actual_page = vec![0u8; expected.len()];
1505+
mem.read(actual_page.as_mut_slice(), addr).unwrap();
1506+
assert_eq!(actual_page, expected);
1507+
}
1508+
1509+
fn test_discard_range_on_memfd(huge_pages: HugePageConfig) {
1510+
// 8MiB of memory in total (multiples of both possible page sizes)
1511+
const REGION_SIZE: usize = 4 * 1024 * 1024;
1512+
1513+
let (mem, _file) = multi_region_mem_memfd(
1514+
&[
1515+
(GuestAddress(0), REGION_SIZE),
1516+
(GuestAddress(usize_to_u64(REGION_SIZE)), REGION_SIZE),
1517+
],
1518+
huge_pages,
1519+
);
1520+
1521+
let page_size = huge_pages.page_size();
1522+
1523+
// Fill up memory with 1s
1524+
let ones = vec![1u8; 2 * REGION_SIZE];
1525+
mem.write(&ones, GuestAddress(0)).unwrap();
1526+
1527+
check_mem_contents(&mem, 0, &vec![1u8; 2 * REGION_SIZE]);
1528+
1529+
// Discard the entire first region
1530+
mem.discard_range(GuestAddress(0), REGION_SIZE).unwrap();
1531+
check_mem_contents(&mem, 0, &vec![0u8; REGION_SIZE]);
1532+
check_mem_contents(&mem, REGION_SIZE, &vec![1u8; REGION_SIZE]);
1533+
1534+
// discard_range() works on page granularity. Discard the first page of the second region.
1535+
mem.discard_range(GuestAddress(usize_to_u64(REGION_SIZE)), page_size)
1536+
.unwrap();
1537+
check_mem_contents(&mem, REGION_SIZE, &vec![0u8; page_size]);
1538+
check_mem_contents(
1539+
&mem,
1540+
REGION_SIZE + page_size,
1541+
&vec![1u8; REGION_SIZE - page_size],
1542+
);
1543+
1544+
// discard_range() won't actually work with unaligned regions
1545+
1546+
// Try to discard less than a page
1547+
mem.discard_range(GuestAddress(usize_to_u64(REGION_SIZE + page_size)), 1024)
1548+
.unwrap_err();
1549+
mem.discard_range(
1550+
GuestAddress(usize_to_u64(REGION_SIZE + page_size)),
1551+
page_size + 1024,
1552+
)
1553+
.unwrap_err();
1554+
1555+
// Try to discard unaligned address
1556+
mem.discard_range(
1557+
GuestAddress(usize_to_u64(REGION_SIZE + page_size + 1024)),
1558+
page_size,
1559+
)
1560+
.unwrap_err();
1561+
}
1562+
1563+
#[test]
1564+
fn test_discard_range_on_memfd_4k() {
1565+
test_discard_range_on_memfd(HugePageConfig::None)
1566+
}
1567+
1568+
#[test]
1569+
fn test_discard_range_on_memfd_2m() {
1570+
test_discard_range_on_memfd(HugePageConfig::Hugetlbfs2M)
1571+
}
1572+
14851573
/// Verifies that `slots_intersecting_range` returns the correct slots for
14861574
/// ranges at slot boundaries, interior to a slot, and spanning two slots.
14871575
#[test]

0 commit comments

Comments
 (0)