Skip to content

Commit ac4bc0e

Browse files
committed
feat: enable write-protection on guest memory
UFFD provides an API to enable write-protection for memory ranges tracked by a userfault file descriptor. Detailed information can be found here: https://docs.kernel.org/admin-guide/mm/userfaultfd.html. To use the feature, users need to register the memory region with UFFDIO_REGISTER_MODE_WP. Then, users need to enable explicitly write-protection for sub-ranges of the registered region. Writes in pages within write-protected memory ranges can be handled in one of two ways. In synchronous mode, writes in a protected page will cause kernel to send a write protection event over the userfaultfd. In asynchronous mode, the kernel will automatically handle writes to protected pages by clearing the write-protection bit. Userspace can later observe the write protection bit by looking into the corresponding entry of /proc/<pid>/pagemap. This commit, uncoditionally, enables write protection for guest memory using the asynchronous mode. !NOTE!: asynchronous write protection requires (host) kernel version 6.7 or later). Signed-off-by: Babis Chalios <babis.chalios@e2b.dev>
1 parent 7eaafb6 commit ac4bc0e

4 files changed

Lines changed: 58 additions & 7 deletions

File tree

Cargo.lock

Lines changed: 26 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/vmm/Cargo.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,11 @@ serde_json = "1.0.132"
4242
slab = "0.4.7"
4343
thiserror = "1.0.67"
4444
timerfd = "1.5.0"
45-
userfaultfd = "0.8.1"
45+
userfaultfd = { git = "https://github.com/e2b-dev/userfaultfd-rs", branch = "feat_write_protection", features = [
46+
"linux5_7",
47+
"linux5_13",
48+
"linux6_7"
49+
] }
4650
utils = { path = "../utils" }
4751
vhost = { version = "0.13.0", features = ["vhost-user-frontend"] }
4852
vm-allocator = "0.1.0"

src/vmm/src/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -513,6 +513,10 @@ impl Vmm {
513513
)
514514
};
515515

516+
// TODO: if we don't support UFFD/async WP, we can completely skip this bit, as the
517+
// UFFD handler already tracks dirty pages through the WriteProtected events. For the
518+
// time being, we always do.
519+
//
516520
// Build dirty bitmap: check pagemap only for pages that mincore reports resident.
517521
let mut slot_bitmap = vec![0u64; nr_pages.div_ceil(64)];
518522
for page_idx in 0..nr_pages {

src/vmm/src/persist.rs

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ use std::sync::{Arc, Mutex};
1414
use seccompiler::BpfThreadMap;
1515
use semver::Version;
1616
use serde::{Deserialize, Serialize};
17-
use userfaultfd::{FeatureFlags, Uffd, UffdBuilder};
17+
use userfaultfd::{FeatureFlags, RegisterMode, Uffd, UffdBuilder};
1818
use vmm_sys_util::sock_ctrl_msg::ScmSocket;
1919

2020
#[cfg(target_arch = "aarch64")]
@@ -536,6 +536,8 @@ pub enum GuestMemoryFromUffdError {
536536
Create(userfaultfd::Error),
537537
/// Failed to register memory address range with the userfaultfd object: {0}
538538
Register(userfaultfd::Error),
539+
/// Failed to enable write protection on memory address range with the userfaultfd object: {0}
540+
WriteProtect(userfaultfd::Error),
539541
/// Failed to connect to UDS Unix stream: {0}
540542
Connect(#[from] std::io::Error),
541543
/// Failed to sends file descriptor: {0}
@@ -560,6 +562,10 @@ fn guest_memory_from_uffd(
560562
uffd_builder.require_features(FeatureFlags::EVENT_REMOVE);
561563
}
562564

565+
uffd_builder.require_features(
566+
FeatureFlags::MISSING_HUGETLBFS | FeatureFlags::WP_ASYNC,
567+
);
568+
563569
let uffd = uffd_builder
564570
.close_on_exec(true)
565571
.non_blocking(true)
@@ -568,8 +574,22 @@ fn guest_memory_from_uffd(
568574
.map_err(GuestMemoryFromUffdError::Create)?;
569575

570576
for mem_region in guest_memory.iter() {
571-
uffd.register(mem_region.as_ptr().cast(), mem_region.size() as _)
572-
.map_err(GuestMemoryFromUffdError::Register)?;
577+
uffd.register_with_mode(
578+
mem_region.as_ptr().cast(),
579+
mem_region.size() as _,
580+
RegisterMode::MISSING | RegisterMode::WRITE_PROTECT,
581+
)
582+
.map_err(GuestMemoryFromUffdError::Register)?;
583+
584+
// If memory is backed by huge pages, we can immediately write protect it.
585+
// Otherwise (memory is backed by anonymous memory), write protecting here
586+
// won't have any effect, as the write-protection bit for a page will be
587+
// wiped when the first page fault occurs. These cases need to be handled
588+
// directly from the UFFD handler.
589+
if huge_pages.is_hugetlbfs() {
590+
uffd.write_protect(mem_region.as_ptr().cast(), mem_region.size() as _)
591+
.map_err(GuestMemoryFromUffdError::WriteProtect)?;
592+
}
573593
}
574594

575595
send_uffd_handshake(mem_uds_path, &backend_mappings, &uffd)?;

0 commit comments

Comments
 (0)