@@ -22,9 +22,10 @@ pub use vm_memory::{
2222 GuestUsize , MemoryRegionAddress , MmapRegion , address,
2323} ;
2424use vm_memory:: { GuestMemoryError , GuestMemoryRegionBytes , VolatileSlice , WriteVolatile } ;
25- use vmm_sys_util:: errno;
25+ use vmm_sys_util:: fallocate:: FallocateMode ;
26+ use vmm_sys_util:: { errno, fallocate} ;
2627
27- use crate :: utils:: { get_page_size, u64_to_usize} ;
28+ use crate :: utils:: { get_page_size, u64_to_usize, usize_to_u64 } ;
2829use crate :: vmm_config:: machine_config:: HugePageConfig ;
2930use crate :: vstate:: vm:: VmError ;
3031use crate :: { DirtyBitmap , Vm } ;
@@ -391,8 +392,7 @@ impl GuestRegionMmapExt {
391392 let phys_address = self . get_host_address ( caddr) ?;
392393
393394 match ( self . inner . file_offset ( ) , self . inner . flags ( ) ) {
394- // If and only if we are resuming from a snapshot file, we have a file and it's mapped
395- // private
395+ // If we are resuming from a snapshot file, we have a file and it's mapped private
396396 ( Some ( _) , flags) if flags & libc:: MAP_PRIVATE != 0 => {
397397 // Mmap a new anonymous region over the present one in order to create a hole
398398 // with zero pages.
@@ -420,12 +420,48 @@ impl GuestRegionMmapExt {
420420 Ok ( ( ) )
421421 }
422422 }
423- // Match either the case of an anonymous mapping, or the case
424- // of a shared file mapping.
425- // TODO: madvise(MADV_DONTNEED) doesn't actually work with memfd
426- // (or in general MAP_SHARED of a fd). In those cases we should use
427- // fallocate64(FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE).
428- // We keep falling to the madvise branch to keep the previous behaviour.
423+ // If we back memory over memfd we have a file mapped shared.
424+ ( Some ( file_offset) , flags) if flags & libc:: MAP_SHARED != 0 => {
425+ let Some ( offset) = file_offset. start ( ) . checked_add ( caddr. raw_value ( ) ) else {
426+ return Err ( GuestMemoryError :: InvalidGuestAddress ( GuestAddress (
427+ caddr. raw_value ( ) ,
428+ ) ) ) ;
429+ } ;
430+
431+ // fallocate(PUNCH_HOLE) silently no-ops on memfd when [offset, offset+len)
432+ // doesn't cover any whole filesystem block, and returns EINVAL on hugetlbfs
433+ // for ranges not aligned to the huge-page size. Expand [offset, offset+len)
434+ // to the enclosing page boundaries so the punch actually covers every page
435+ // the caller asked to discard.
436+ let page_size = usize_to_u64 ( if flags & libc:: MAP_HUGETLB != 0 {
437+ HugePageConfig :: Hugetlbfs2M . page_size ( )
438+ } else {
439+ get_page_size ( ) . map_err ( |err| GuestMemoryError :: IOError ( err. into ( ) ) ) ?
440+ } ) ;
441+ let end = offset
442+ . checked_add ( usize_to_u64 ( len) )
443+ . ok_or ( GuestMemoryError :: GuestAddressOverflow ) ?;
444+ let aligned_offset = offset & !( page_size - 1 ) ;
445+ let aligned_end = end
446+ . checked_add ( page_size - 1 )
447+ . ok_or ( GuestMemoryError :: GuestAddressOverflow ) ?
448+ & !( page_size - 1 ) ;
449+
450+ fallocate:: fallocate (
451+ file_offset. file ( ) ,
452+ FallocateMode :: PunchHole ,
453+ true ,
454+ aligned_offset,
455+ aligned_end - aligned_offset,
456+ )
457+ . map_err ( |err| {
458+ error ! ( "discard_range: punching hole failed: {err:?}" ) ;
459+ GuestMemoryError :: IOError ( err. into ( ) )
460+ } ) ?;
461+
462+ Ok ( ( ) )
463+ }
464+ // Anonymous mapping.
429465 _ => {
430466 // Madvise the region in order to mark it as not used.
431467 // SAFETY: The address and length are known to be valid.
@@ -873,7 +909,7 @@ mod tests {
873909
874910 use super :: * ;
875911 use crate :: snapshot:: Snapshot ;
876- use crate :: test_utils:: single_region_mem;
912+ use crate :: test_utils:: { multi_region_mem_memfd , single_region_mem} ;
877913 use crate :: utils:: { get_page_size, mib_to_bytes} ;
878914 use crate :: vstate:: memory:: test_utils:: into_region_ext;
879915
@@ -1462,6 +1498,92 @@ mod tests {
14621498 ) ;
14631499 }
14641500
1501+ fn check_pages (
1502+ mem : & GuestMemoryMmap ,
1503+ page_size : usize ,
1504+ first_page_expected : & [ u8 ] ,
1505+ second_page_expected : & [ u8 ] ,
1506+ third_page_expected : & [ u8 ] ,
1507+ ) {
1508+ let first_page = 0u64 ;
1509+ let second_page = usize_to_u64 ( page_size) ;
1510+ let third_page = usize_to_u64 ( 2 * page_size) ;
1511+
1512+ let mut actual_page = vec ! [ 0u8 ; page_size] ;
1513+ mem. read ( actual_page. as_mut_slice ( ) , GuestAddress ( first_page) )
1514+ . unwrap ( ) ;
1515+ assert_eq ! ( first_page_expected, actual_page) ;
1516+ mem. read ( actual_page. as_mut_slice ( ) , GuestAddress ( second_page) )
1517+ . unwrap ( ) ;
1518+ assert_eq ! ( second_page_expected, actual_page) ;
1519+ mem. read ( actual_page. as_mut_slice ( ) , GuestAddress ( third_page) )
1520+ . unwrap ( ) ;
1521+ assert_eq ! ( third_page_expected, actual_page) ;
1522+ }
1523+
1524+ // A page size-agnostic tests for discard_range() of memory that is
1525+ // mapped on top of memfd
1526+ fn test_discard_range_on_shared_memfd ( huge_pages : HugePageConfig ) {
1527+ let page_size = huge_pages. page_size ( ) ;
1528+ let first_page = 0u64 ;
1529+ let second_page = usize_to_u64 ( page_size) ;
1530+ let third_page = usize_to_u64 ( 2 * page_size) ;
1531+
1532+ let ( mem, _file) = multi_region_mem_memfd (
1533+ & [
1534+ ( GuestAddress ( first_page) , page_size) ,
1535+ ( GuestAddress ( second_page) , page_size) ,
1536+ ( GuestAddress ( third_page) , page_size) ,
1537+ ] ,
1538+ huge_pages,
1539+ ) ;
1540+
1541+ // Fill up all three pages with 1s
1542+ let ones = vec ! [ 1u8 ; 3 * page_size] ;
1543+ mem. write ( & ones, GuestAddress ( 0 ) ) . unwrap ( ) ;
1544+
1545+ // Cleanup the last page
1546+ mem. discard_range ( GuestAddress ( third_page) , page_size)
1547+ . unwrap ( ) ;
1548+ check_pages (
1549+ & mem,
1550+ page_size,
1551+ & vec ! [ 1u8 ; page_size] ,
1552+ & vec ! [ 1u8 ; page_size] ,
1553+ & vec ! [ 0u8 ; page_size] ,
1554+ ) ;
1555+
1556+ // Cleanup the first page
1557+ mem. discard_range ( GuestAddress ( 0 ) , page_size) . unwrap ( ) ;
1558+ check_pages (
1559+ & mem,
1560+ page_size,
1561+ & vec ! [ 0u8 ; page_size] ,
1562+ & vec ! [ 1u8 ; page_size] ,
1563+ & vec ! [ 0u8 ; page_size] ,
1564+ ) ;
1565+
1566+ // Cleanup starting from first page and landing in second
1567+ mem. discard_range ( GuestAddress ( 1 ) , page_size) . unwrap ( ) ;
1568+ check_pages (
1569+ & mem,
1570+ page_size,
1571+ & vec ! [ 0u8 ; page_size] ,
1572+ & vec ! [ 0u8 ; page_size] ,
1573+ & vec ! [ 0u8 ; page_size] ,
1574+ ) ;
1575+ }
1576+
1577+ #[ test]
1578+ fn test_discard_range_on_shared_memfd_4k ( ) {
1579+ test_discard_range_on_shared_memfd ( HugePageConfig :: None )
1580+ }
1581+
1582+ #[ test]
1583+ fn test_discard_range_on_shared_memfd_2m ( ) {
1584+ test_discard_range_on_shared_memfd ( HugePageConfig :: Hugetlbfs2M )
1585+ }
1586+
14651587 /// Verifies that `slots_intersecting_range` returns the correct slots for
14661588 /// ranges at slot boundaries, interior to a slot, and spanning two slots.
14671589 #[ test]
0 commit comments