Skip to content

Commit 8fc760f

Browse files
committed
feat: enable write-protection on guest memory
UFFD provides an API to enable write-protection for memory ranges tracked by a userfault file descriptor. Detailed information can be found here: https://docs.kernel.org/admin-guide/mm/userfaultfd.html. To use the feature, users need to register the memory region with UFFDIO_REGISTER_MODE_WP. Then, users need to enable explicitly write-protection for sub-ranges of the registered region. Writes in pages within write-protected memory ranges can be handled in one of two ways. In synchronous mode, writes in a protected page will cause kernel to send a write protection event over the userfaultfd. In asynchronous mode, the kernel will automatically handle writes to protected pages by clearing the write-protection bit. Userspace can later observe the write protection bit by looking into the corresponding entry of /proc/<pid>/pagemap. This commit, uncoditionally, enables write protection for guest memory using the asynchronous mode. !NOTE!: asynchronous write protection requires (host) kernel version 6.7 or later). Signed-off-by: Babis Chalios <babis.chalios@e2b.dev>
1 parent 54a1c1a commit 8fc760f

4 files changed

Lines changed: 57 additions & 8 deletions

File tree

Cargo.lock

Lines changed: 26 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/vmm/Cargo.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,11 @@ serde_json = "1.0.140"
3636
slab = "0.4.7"
3737
thiserror = "2.0.12"
3838
timerfd = "1.5.0"
39-
userfaultfd = "0.8.1"
39+
userfaultfd = { git = "https://github.com/e2b-dev/userfaultfd-rs", branch = "feat_write_protection", features = [
40+
"linux5_7",
41+
"linux5_13",
42+
"linux6_7"
43+
] }
4044
utils = { path = "../utils" }
4145
vhost = { version = "0.13.0", features = ["vhost-user-frontend"] }
4246
vm-allocator = "0.1.0"

src/vmm/src/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -778,6 +778,10 @@ impl Vmm {
778778
let resident_bitmap =
779779
vstate::vm::mincore_bitmap(base_addr as *mut u8, len, page_size)?;
780780

781+
// TODO: if we support UFFD/async WP, we can completely skip this bit, as the
782+
// UFFD handler already tracks dirty pages through the WriteProtected events. For the
783+
// time being, we always do.
784+
//
781785
// Build dirty bitmap: check pagemap only for pages that mincore reports resident.
782786
// This reduces the number of /proc/self/pagemap reads.
783787
let mut slot_bitmap = vec![0u64; nr_pages.div_ceil(64)];

src/vmm/src/persist.rs

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ use std::sync::{Arc, Mutex};
1414

1515
use semver::Version;
1616
use serde::{Deserialize, Serialize};
17-
use userfaultfd::{FeatureFlags, Uffd, UffdBuilder};
17+
use userfaultfd::{FeatureFlags, RegisterMode, Uffd, UffdBuilder};
1818
use vmm_sys_util::sock_ctrl_msg::ScmSocket;
1919

2020
#[cfg(target_arch = "aarch64")]
@@ -469,6 +469,8 @@ pub enum GuestMemoryFromUffdError {
469469
Create(userfaultfd::Error),
470470
/// Failed to register memory address range with the userfaultfd object: {0}
471471
Register(userfaultfd::Error),
472+
/// Failed to enable write protection on memory address range with the userfaultfd object: {0}
473+
WriteProtect(userfaultfd::Error),
472474
/// Failed to connect to UDS Unix stream: {0}
473475
Connect(#[from] std::io::Error),
474476
/// Failed to sends file descriptor: {0}
@@ -490,7 +492,9 @@ fn guest_memory_from_uffd(
490492
// because the only place the kernel checks this is in a hook from madvise, e.g. it doesn't
491493
// actively change the behavior of UFFD, only passively. Without balloon devices
492494
// we never call madvise anyway, so no need to put this into a conditional.
493-
uffd_builder.require_features(FeatureFlags::EVENT_REMOVE);
495+
uffd_builder.require_features(
496+
FeatureFlags::EVENT_REMOVE | FeatureFlags::MISSING_HUGETLBFS | FeatureFlags::WP_ASYNC,
497+
);
494498

495499
let uffd = uffd_builder
496500
.close_on_exec(true)
@@ -500,8 +504,22 @@ fn guest_memory_from_uffd(
500504
.map_err(GuestMemoryFromUffdError::Create)?;
501505

502506
for mem_region in guest_memory.iter() {
503-
uffd.register(mem_region.as_ptr().cast(), mem_region.size() as _)
504-
.map_err(GuestMemoryFromUffdError::Register)?;
507+
uffd.register_with_mode(
508+
mem_region.as_ptr().cast(),
509+
mem_region.size() as _,
510+
RegisterMode::MISSING | RegisterMode::WRITE_PROTECT,
511+
)
512+
.map_err(GuestMemoryFromUffdError::Register)?;
513+
514+
// If memory is backed by huge pages, we can immediately write protect it.
515+
// Otherwise (memory is backed by anonymous memory), write protecting here
516+
// won't have any effect, as the write-protection bit for a page will be
517+
// wiped when the first page fault occurs. These cases need to be handled
518+
// directly from the UFFD handler.
519+
if huge_pages.is_hugetlbfs() {
520+
uffd.write_protect(mem_region.as_ptr().cast(), mem_region.size() as _)
521+
.map_err(GuestMemoryFromUffdError::WriteProtect)?;
522+
}
505523
}
506524

507525
send_uffd_handshake(mem_uds_path, &backend_mappings, &uffd)?;

0 commit comments

Comments
 (0)