Skip to content

Commit 7eaafb6

Browse files
committed
api: implement API for dirty memory
Implement API /memory/dirty which returns a bitmap tracking dirty guest memory. The bitmap is structured as a vector of u64, so its length is: total_number_of_pages.div_ceil(64). Pages are ordered in the order of pages as reported by /memory/mappings. Signed-off-by: Babis Chalios <babis.chalios@e2b.dev>
1 parent fb257a1 commit 7eaafb6

8 files changed

Lines changed: 247 additions & 3 deletions

File tree

resources/seccomp/x86_64-unknown-linux-musl.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,10 @@
220220
"syscall": "mincore",
221221
"comment": "Used by get_memory_dirty_bitmap to check if memory pages are resident"
222222
},
223+
{
224+
"syscall": "pread64",
225+
"comment": "Used by get_dirty_memory to read pagemap entries"
226+
},
223227
{
224228
"syscall": "mmap",
225229
"comment": "Used by the VirtIO balloon device",

src/firecracker/src/api_server/parsed_request.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ use super::request::logger::parse_put_logger;
2020
use super::request::machine_configuration::{
2121
parse_get_machine_config, parse_patch_machine_config, parse_put_machine_config,
2222
};
23-
use super::request::memory::{parse_get_memory, parse_get_memory_mappings};
23+
use super::request::memory::{parse_get_memory, parse_get_memory_dirty, parse_get_memory_mappings};
2424
use super::request::metrics::parse_put_metrics;
2525
use super::request::mmds::{parse_get_mmds, parse_patch_mmds, parse_put_mmds};
2626
use super::request::net::{parse_patch_net, parse_put_net};
@@ -85,6 +85,7 @@ impl TryFrom<&Request> for ParsedRequest {
8585
(Method::Get, "machine-config", None) => parse_get_machine_config(),
8686
(Method::Get, "memory", None) => match path_tokens.next() {
8787
Some("mappings") => parse_get_memory_mappings(),
88+
Some("dirty") => parse_get_memory_dirty(),
8889
None => parse_get_memory(),
8990
_ => Err(RequestError::InvalidPathMethod(
9091
request_uri.to_string(),
@@ -183,6 +184,7 @@ impl ParsedRequest {
183184
VmmData::InstanceInformation(info) => Self::success_response_with_data(info),
184185
VmmData::MemoryMappings(mappings) => Self::success_response_with_data(mappings),
185186
VmmData::Memory(memory) => Self::success_response_with_data(memory),
187+
VmmData::MemoryDirty(dirty) => Self::success_response_with_data(dirty),
186188
VmmData::VmmVersion(version) => Self::success_response_with_data(
187189
&serde_json::json!({ "firecracker_version": version.as_str() }),
188190
),
@@ -585,6 +587,9 @@ pub mod tests {
585587
VmmData::Memory(memory) => {
586588
http_response(&serde_json::to_string(memory).unwrap(), 200)
587589
}
590+
VmmData::MemoryDirty(dirty) => {
591+
http_response(&serde_json::to_string(dirty).unwrap(), 200)
592+
}
588593
VmmData::VmmVersion(version) => http_response(
589594
&serde_json::json!({ "firecracker_version": version.as_str() }).to_string(),
590595
200,
@@ -615,6 +620,9 @@ pub mod tests {
615620
empty: vec![],
616621
},
617622
));
623+
verify_ok_response_with(VmmData::MemoryDirty(
624+
vmm::vmm_config::instance_info::MemoryDirty { bitmap: vec![] },
625+
));
618626
verify_ok_response_with(VmmData::VmmVersion(String::default()));
619627

620628
// Error.

src/firecracker/src/api_server/request/memory.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@ pub(crate) fn parse_get_memory() -> Result<ParsedRequest, RequestError> {
1616
Ok(ParsedRequest::new_sync(VmmAction::GetMemory))
1717
}
1818

19+
pub(crate) fn parse_get_memory_dirty() -> Result<ParsedRequest, RequestError> {
20+
METRICS.get_api_requests.instance_info_count.inc();
21+
Ok(ParsedRequest::new_sync(VmmAction::GetMemoryDirty))
22+
}
23+
1924
#[cfg(test)]
2025
mod tests {
2126
use super::*;
@@ -36,4 +41,12 @@ mod tests {
3641
_ => panic!("Test failed."),
3742
}
3843
}
44+
45+
#[test]
46+
fn test_parse_get_memory_dirty_request() {
47+
match parse_get_memory_dirty().unwrap().into_parts() {
48+
(RequestAction::Sync(action), _) if *action == VmmAction::GetMemoryDirty => {}
49+
_ => panic!("Test failed."),
50+
}
51+
}
3952
}

src/vmm/src/lib.rs

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,8 @@ pub enum VmmError {
276276
VmmObserverTeardown(vmm_sys_util::errno::Error),
277277
/// VMGenID error: {0}
278278
VMGenID(#[from] VmGenIdError),
279+
/// Pagemap error: {0}
280+
Pagemap(#[from] utils::pagemap::PagemapError),
279281
}
280282

281283
/// Shorthand type for KVM dirty page bitmap.
@@ -482,6 +484,69 @@ impl Vmm {
482484
mappings
483485
}
484486

487+
/// Get dirty pages bitmap for guest memory.
488+
///
489+
/// Returns a bitmap where each bit represents whether a guest page has been written to
490+
/// (i.e., present in RAM and not write-protected via userfaultfd). Pages are ordered
491+
/// following the order of memory regions as returned by `guest_memory_mappings`.
492+
pub fn get_dirty_memory(&self, page_size: usize) -> Result<Vec<u64>, VmmError> {
493+
let pagemap = utils::pagemap::PagemapReader::new(page_size)?;
494+
let mut dirty_bitmap = vec![];
495+
496+
let sys_page_size = utils::get_page_size().expect("Failed to get system page size");
497+
498+
for region in self.guest_memory().iter() {
499+
let base_addr = region.as_ptr() as usize;
500+
let len = region.size();
501+
let nr_pages = len / page_size;
502+
503+
// Use mincore to get resident pages at guest page size granularity
504+
let mincore_n = len.div_ceil(sys_page_size);
505+
let mut mincore_vec = vec![0u8; mincore_n];
506+
507+
// SAFETY: base_addr points to a valid guest memory region we own.
508+
let mincore_result = unsafe {
509+
libc::mincore(
510+
base_addr as *mut libc::c_void,
511+
len,
512+
mincore_vec.as_mut_ptr(),
513+
)
514+
};
515+
516+
// Build dirty bitmap: check pagemap only for pages that mincore reports resident.
517+
let mut slot_bitmap = vec![0u64; nr_pages.div_ceil(64)];
518+
for page_idx in 0..nr_pages {
519+
let page_offset = page_idx * page_size;
520+
521+
let is_resident = if mincore_result == 0 {
522+
let start = page_offset / sys_page_size;
523+
let count = page_size.div_ceil(sys_page_size);
524+
if start + count <= mincore_vec.len() {
525+
mincore_vec[start..start + count]
526+
.iter()
527+
.any(|&v| (v & 0x1) != 0)
528+
} else {
529+
false
530+
}
531+
} else {
532+
// If mincore failed, assume resident (conservative)
533+
true
534+
};
535+
536+
if is_resident {
537+
let virt_addr = base_addr + page_offset;
538+
if pagemap.is_page_dirty(virt_addr)? {
539+
slot_bitmap[page_idx / 64] |= 1u64 << (page_idx % 64);
540+
}
541+
}
542+
}
543+
544+
dirty_bitmap.extend_from_slice(&slot_bitmap);
545+
}
546+
547+
Ok(dirty_bitmap)
548+
}
549+
485550
/// Sets RDA bit in serial console
486551
pub fn emulate_serial_init(&self) -> Result<(), EmulateSerialInitError> {
487552
// When restoring from a previously saved state, there is no serial

src/vmm/src/rpc_interface.rs

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@ use crate::vmm_config::balloon::{
2525
use crate::vmm_config::boot_source::{BootSourceConfig, BootSourceConfigError};
2626
use crate::vmm_config::drive::{BlockDeviceConfig, BlockDeviceUpdateConfig, DriveError};
2727
use crate::vmm_config::entropy::{EntropyDeviceConfig, EntropyDeviceError};
28-
use crate::vmm_config::instance_info::{InstanceInfo, MemoryMappingsResponse, MemoryResponse};
28+
use crate::vmm_config::instance_info::{
29+
InstanceInfo, MemoryDirty, MemoryMappingsResponse, MemoryResponse, VmState,
30+
};
2931
use crate::vmm_config::machine_config::{MachineConfig, MachineConfigUpdate, VmConfigError};
3032
use crate::vmm_config::metrics::{MetricsConfig, MetricsConfigError};
3133
use crate::vmm_config::mmds::{MmdsConfig, MmdsConfigError};
@@ -69,6 +71,8 @@ pub enum VmmAction {
6971
GetMemoryMappings,
7072
/// Get memory info (resident and empty pages).
7173
GetMemory,
74+
/// Get guest memory dirty pages information
75+
GetMemoryDirty,
7276
/// Get microVM version.
7377
GetVmmVersion,
7478
/// Flush the metrics. This action can only be called after the logger has been configured.
@@ -168,6 +172,8 @@ pub enum VmmActionError {
168172
OperationNotSupportedPostBoot,
169173
/// The requested operation is not supported before starting the microVM.
170174
OperationNotSupportedPreBoot,
175+
/// The requested operation is not supported while the microVM is running.
176+
OperationNotSupportedWhileRunning,
171177
/// Start microvm error: {0}
172178
StartMicrovm(#[from] StartMicrovmError),
173179
/// Vsock config error: {0}
@@ -197,6 +203,8 @@ pub enum VmmData {
197203
MemoryMappings(MemoryMappingsResponse),
198204
/// Memory info (resident and empty pages).
199205
Memory(MemoryResponse),
206+
/// The guest memory dirty pages information
207+
MemoryDirty(MemoryDirty),
200208
/// The microVM version.
201209
VmmVersion(String),
202210
}
@@ -427,7 +435,9 @@ impl<'a> PrebootApiController<'a> {
427435
&self.vm_resources.vm_config,
428436
))),
429437
GetVmInstanceInfo => Ok(VmmData::InstanceInformation(self.instance_info.clone())),
430-
GetMemoryMappings | GetMemory => Err(VmmActionError::OperationNotSupportedPreBoot),
438+
GetMemoryMappings | GetMemory | GetMemoryDirty => {
439+
Err(VmmActionError::OperationNotSupportedPreBoot)
440+
}
431441
GetVmmVersion => Ok(VmmData::VmmVersion(self.instance_info.vmm_version.clone())),
432442
InsertBlockDevice(config) => self.insert_block_device(config),
433443
InsertNetworkDevice(config) => self.insert_net_device(config),
@@ -680,6 +690,7 @@ impl RuntimeApiController {
680690
empty: empty_bitmap,
681691
}))
682692
}
693+
GetMemoryDirty => self.get_dirty_memory_info(),
683694
GetVmmVersion => Ok(VmmData::VmmVersion(
684695
self.vmm.lock().expect("Poisoned lock").version(),
685696
)),
@@ -779,6 +790,28 @@ impl RuntimeApiController {
779790
.map_err(VmmActionError::InternalVmm)
780791
}
781792

793+
/// Get dirty pages information for guest memory
794+
fn get_dirty_memory_info(&self) -> Result<VmmData, VmmActionError> {
795+
let start_us = get_time_us(ClockType::Monotonic);
796+
let vmm = self.vmm.lock().expect("Poisoned lock");
797+
798+
// Dirty page tracking via pagemap requires the VM to be paused so that guest
799+
// pages are not modified while we are reading the pagemap.
800+
if vmm.instance_info.state != VmState::Paused {
801+
return Err(VmmActionError::OperationNotSupportedWhileRunning);
802+
}
803+
804+
let page_size = self.vm_resources.vm_config.huge_pages.page_size_kib();
805+
let bitmap = vmm
806+
.get_dirty_memory(page_size)
807+
.map_err(VmmActionError::InternalVmm)?;
808+
809+
let elapsed_time_us = get_time_us(ClockType::Monotonic) - start_us;
810+
info!("'get dirty memory' VMM action took {elapsed_time_us} us.");
811+
812+
Ok(VmmData::MemoryDirty(MemoryDirty { bitmap }))
813+
}
814+
782815
fn create_snapshot(
783816
&mut self,
784817
create_params: &CreateSnapshotParams,

src/vmm/src/utils/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ pub mod net;
99
pub mod signal;
1010
/// Module with state machine
1111
pub mod sm;
12+
/// Module with pagemap utilities
13+
pub mod pagemap;
1214

1315
use std::num::Wrapping;
1416
use std::result::Result;

src/vmm/src/utils/pagemap.rs

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
//! Utilities for reading /proc/self/pagemap to track dirty pages.
2+
3+
#![allow(clippy::cast_possible_wrap)]
4+
5+
use std::fs::File;
6+
use std::os::unix::io::AsRawFd;
7+
8+
use crate::utils::get_page_size;
9+
10+
const PAGEMAP_ENTRY_SIZE: usize = 8;
11+
12+
/// Errors related to pagemap operations
13+
#[derive(Debug, thiserror::Error, displaydoc::Display)]
14+
pub enum PagemapError {
15+
/// Failed to open /proc/self/pagemap: {0}
16+
OpenPagemap(#[source] std::io::Error),
17+
/// Failed to read pagemap entry: {0}
18+
ReadEntry(#[source] std::io::Error),
19+
}
20+
21+
/// Represents a single entry in /proc/pid/pagemap.
22+
///
23+
/// Each virtual page has an 8-byte entry with the following layout:
24+
/// - Bits 0-54: Page frame number (PFN) if present
25+
/// - Bit 55: Page is soft-dirty (written to since last clear)
26+
/// - Bit 56: Page is exclusively mapped
27+
/// - Bit 57: Page is write-protected via userfaultfd
28+
/// - Bit 58: Unused
29+
/// - Bit 59-60: Unused
30+
/// - Bit 61: Page is file-page or shared-anon
31+
/// - Bit 62: Page is swapped
32+
/// - Bit 63: Page is present in RAM
33+
#[derive(Debug, Clone, Copy)]
34+
pub struct PagemapEntry {
35+
raw: u64,
36+
}
37+
38+
impl PagemapEntry {
39+
/// Create a PagemapEntry from bytes (little-endian)
40+
pub fn from_bytes(bytes: [u8; 8]) -> Self {
41+
Self {
42+
raw: u64::from_ne_bytes(bytes),
43+
}
44+
}
45+
46+
/// Check if page is write-protected via userfaultfd
47+
pub fn is_write_protected(&self) -> bool {
48+
(self.raw & (1u64 << 57)) != 0
49+
}
50+
51+
/// Check if page is present in RAM (bit 63)
52+
pub fn is_present(&self) -> bool {
53+
(self.raw & (1u64 << 63)) != 0
54+
}
55+
}
56+
57+
/// Reader for /proc/self/pagemap
58+
#[derive(Debug)]
59+
pub struct PagemapReader {
60+
pagemap_fd: File,
61+
}
62+
63+
impl PagemapReader {
64+
/// Create a new PagemapReader
65+
pub fn new(_page_size: usize) -> Result<Self, PagemapError> {
66+
let pagemap_fd = File::open("/proc/self/pagemap").map_err(PagemapError::OpenPagemap)?;
67+
68+
Ok(Self { pagemap_fd })
69+
}
70+
71+
/// Check if a single page is dirty (write-protected bit cleared).
72+
///
73+
/// Checks the first host page (4K) of the guest page at the given address.
74+
/// For huge pages, all host pages within the huge page typically have the same
75+
/// dirty status, so sampling the first is sufficient.
76+
///
77+
/// # Arguments
78+
/// * `virt_addr` - Virtual address of the page to check
79+
///
80+
/// # Returns
81+
/// True if the page is present and write-protected bit is cleared (dirty).
82+
pub fn is_page_dirty(&self, virt_addr: usize) -> Result<bool, PagemapError> {
83+
// Pagemap always uses host (4K) page size
84+
let host_page_size = get_page_size().expect("Failed to get system page size");
85+
86+
// Calculate offset for this virtual page (using host page size)
87+
let host_vpn = virt_addr / host_page_size;
88+
let offset = (host_vpn * PAGEMAP_ENTRY_SIZE) as i64;
89+
90+
let mut entry_bytes = [0u8; 8];
91+
92+
// SAFETY: pread is safe as long as the fd is valid and the buffer is properly sized
93+
let ret = unsafe {
94+
libc::pread(
95+
self.pagemap_fd.as_raw_fd(),
96+
entry_bytes.as_mut_ptr().cast(),
97+
PAGEMAP_ENTRY_SIZE,
98+
offset,
99+
)
100+
};
101+
102+
if ret != PAGEMAP_ENTRY_SIZE as isize {
103+
return Err(PagemapError::ReadEntry(std::io::Error::last_os_error()));
104+
}
105+
106+
let entry = PagemapEntry::from_bytes(entry_bytes);
107+
108+
// Page must be present and the write_protected bit cleared (indicating it was written to)
109+
Ok(entry.is_present() && !entry.is_write_protected())
110+
}
111+
}

src/vmm/src/vmm_config/instance_info.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,11 @@ pub struct MemoryResponse {
6868
/// This is a subset of the resident pages.
6969
pub empty: Vec<u64>,
7070
}
71+
72+
/// Information about dirty guest memory pages
73+
#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize)]
74+
pub struct MemoryDirty {
75+
/// Bitmap for dirty pages. The bitmap is encoded as a vector of u64 values.
76+
/// Each bit represents whether a page has been written since the last snapshot.
77+
pub bitmap: Vec<u64>,
78+
}

0 commit comments

Comments
 (0)