From acbe29c45b35400a3ce0d692ba602b6cbb43afc7 Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Tue, 24 Mar 2026 16:24:34 +0000 Subject: [PATCH 1/7] fix: pmem: add validation for head and status descriptors lengths Head and status descriptors must be 4 bytes long by the spec. Add validation for this. Signed-off-by: Egor Lazarchuk --- src/vmm/src/devices/virtio/pmem/device.rs | 51 +++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/src/vmm/src/devices/virtio/pmem/device.rs b/src/vmm/src/devices/virtio/pmem/device.rs index e05a3291c9d..3ebe7fa4590 100644 --- a/src/vmm/src/devices/virtio/pmem/device.rs +++ b/src/vmm/src/devices/virtio/pmem/device.rs @@ -44,6 +44,10 @@ pub enum PmemError { ReadOnlyDescriptor, /// Unexpected write-only descriptor WriteOnlyDescriptor, + /// Head descriptor has invalid length of {0} instead of 4 + Non4byteHeadDescriptor(u32), + /// Status descriptor has invalid length of {0} instead of 4 + Non4byteStatusDescriptor(u32), /// UnknownRequestType: {0} UnknownRequestType(u32), /// Descriptor chain too short @@ -283,19 +287,31 @@ impl Pmem { // This is safe since we checked in the event handler that the device is activated. let active_state = self.device_state.active_state().unwrap(); + // Virtio spec, section 5.19.6 Driver Operations + // https://docs.oasis-open.org/virtio/virtio/v1.3/csd01/virtio-v1.3-csd01.html#x1-6970006 if head.is_write_only() { return Err(PmemError::WriteOnlyDescriptor); } + if head.len != 4 { + return Err(PmemError::Non4byteHeadDescriptor(head.len)); + } let request: u32 = active_state.mem.read_obj(head.addr)?; if request != VIRTIO_PMEM_REQ_TYPE_FLUSH { return Err(PmemError::UnknownRequestType(request)); } + + // Virtio spec, section 5.19.7 Device Operations + // https://docs.oasis-open.org/virtio/virtio/v1.3/csd01/virtio-v1.3-csd01.html#x1-6980007 let Some(status_descriptor) = head.next_descriptor() else { return Err(PmemError::DescriptorChainTooShort); }; if !status_descriptor.is_write_only() { return Err(PmemError::ReadOnlyDescriptor); } + if status_descriptor.len != 4 { + return Err(PmemError::Non4byteStatusDescriptor(status_descriptor.len)); + } + let mut result = SUCCESS; // SAFETY: We are calling the system call with valid arguments and checking the returned // value @@ -556,5 +572,40 @@ mod tests { PmemError::ReadOnlyDescriptor, )); } + + // Invalid length head descriptor + { + vq.avail.ring[0].set(0); + vq.dtable[0].set(0x1000, 0x69, VIRTQ_DESC_F_NEXT, 1); + mem.write_obj::(0, GuestAddress(0x1000)).unwrap(); + + pmem.queues[0] = vq.create_queue(); + vq.used.idx.set(0); + vq.avail.idx.set(1); + let head = pmem.queues[0].pop().unwrap().unwrap(); + assert!(matches!( + pmem.process_chain(head).unwrap_err(), + PmemError::Non4byteHeadDescriptor(0x69), + )); + } + + // Invalid length status descriptor + { + vq.avail.ring[0].set(0); + vq.dtable[0].set(0x1000, 4, VIRTQ_DESC_F_NEXT, 1); + vq.avail.ring[1].set(1); + vq.dtable[1].set(0x2000, 0x69, VIRTQ_DESC_F_WRITE, 0); + mem.write_obj::(0, GuestAddress(0x1000)).unwrap(); + mem.write_obj::(0x69, GuestAddress(0x2000)).unwrap(); + + pmem.queues[0] = vq.create_queue(); + vq.used.idx.set(0); + vq.avail.idx.set(1); + let head = pmem.queues[0].pop().unwrap().unwrap(); + assert!(matches!( + pmem.process_chain(head).unwrap_err(), + PmemError::Non4byteStatusDescriptor(0x69), + )); + } } } From 2b61f325f532bf72d97740b510922e6abd06eff9 Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Tue, 24 Mar 2026 16:46:24 +0000 Subject: [PATCH 2/7] feat: pmem: cache the msync result There is only one type of request virto-pmem accepts, but guest still can issue many of them and Firecracker will need to process it all. Instead of doing one `msync` per request, it is less resource intensive to do it once on the first valid descriptor and then duplicate the result to other descriptors. This is safe since the guest will only know the result of the execution after Firecracker will signal it, which will only happen after all descriptors are processed. Signed-off-by: Egor Lazarchuk --- src/vmm/src/devices/virtio/pmem/device.rs | 81 +++++++++++++++++------ 1 file changed, 59 insertions(+), 22 deletions(-) diff --git a/src/vmm/src/devices/virtio/pmem/device.rs b/src/vmm/src/devices/virtio/pmem/device.rs index 3ebe7fa4590..ed022ade61d 100644 --- a/src/vmm/src/devices/virtio/pmem/device.rs +++ b/src/vmm/src/devices/virtio/pmem/device.rs @@ -254,8 +254,9 @@ impl Pmem { // This is safe since we checked in the event handler that the device is activated. let active_state = self.device_state.active_state().unwrap(); + let mut cached_result = None; while let Some(head) = self.queues[0].pop()? { - let add_result = match self.process_chain(head) { + let add_result = match self.process_chain(head, &mut cached_result) { Ok(()) => self.queues[0].add_used(head.index, 4), Err(err) => { error!("pmem: {err}"); @@ -283,7 +284,11 @@ impl Pmem { Ok(()) } - fn process_chain(&self, head: DescriptorChain) -> Result<(), PmemError> { + fn process_chain( + &self, + head: DescriptorChain, + cached_result: &mut Option, + ) -> Result<(), PmemError> { // This is safe since we checked in the event handler that the device is activated. let active_state = self.device_state.active_state().unwrap(); @@ -312,21 +317,33 @@ impl Pmem { return Err(PmemError::Non4byteStatusDescriptor(status_descriptor.len)); } - let mut result = SUCCESS; - // SAFETY: We are calling the system call with valid arguments and checking the returned - // value - unsafe { - let ret = libc::msync( - self.mmap_ptr as *mut libc::c_void, - u64_to_usize(self.file_len), - libc::MS_SYNC, - ); - if ret < 0 { - error!("pmem: Unable to msync the file. Error: {}", ret); - result = FAILURE; + // Since there is only 1 type of request pmem device supports, + // we treat single notification from the guest as a single request + // and reuse cached result of `msync` from first valid descriptor + // for all following descriptors. + if let Some(result) = cached_result { + active_state + .mem + .write_obj(*result, status_descriptor.addr)?; + } else { + let mut status = SUCCESS; + // SAFETY: We are calling the system call with valid arguments and checking the returned + // value + unsafe { + let ret = libc::msync( + self.mmap_ptr as *mut libc::c_void, + u64_to_usize(self.file_len), + libc::MS_SYNC, + ); + if ret < 0 { + error!("pmem: Unable to msync the file. Error: {}", ret); + status = FAILURE; + } } + *cached_result = Some(status); + + active_state.mem.write_obj(status, status_descriptor.addr)?; } - active_state.mem.write_obj(result, status_descriptor.addr)?; Ok(()) } @@ -501,8 +518,28 @@ mod tests { vq.used.idx.set(0); vq.avail.idx.set(1); let head = pmem.queues[0].pop().unwrap().unwrap(); - pmem.process_chain(head).unwrap(); + let mut result = None; + pmem.process_chain(head, &mut result).unwrap(); assert_eq!(mem.read_obj::(GuestAddress(0x2000)).unwrap(), 0); + assert!(result.is_some()); + } + + // Valid request cached value reuse + { + vq.avail.ring[0].set(0); + vq.dtable[0].set(0x1000, 4, VIRTQ_DESC_F_NEXT, 1); + vq.avail.ring[1].set(1); + vq.dtable[1].set(0x2000, 4, VIRTQ_DESC_F_WRITE, 0); + mem.write_obj::(0, GuestAddress(0x1000)).unwrap(); + mem.write_obj::(0x69, GuestAddress(0x2000)).unwrap(); + + pmem.queues[0] = vq.create_queue(); + vq.used.idx.set(0); + vq.avail.idx.set(1); + let head = pmem.queues[0].pop().unwrap().unwrap(); + let mut result = Some(0x69); + pmem.process_chain(head, &mut result).unwrap(); + assert_eq!(mem.read_obj::(GuestAddress(0x2000)).unwrap(), 0x69); } // Invalid request type @@ -516,7 +553,7 @@ mod tests { vq.avail.idx.set(1); let head = pmem.queues[0].pop().unwrap().unwrap(); assert!(matches!( - pmem.process_chain(head).unwrap_err(), + pmem.process_chain(head, &mut None).unwrap_err(), PmemError::UnknownRequestType(0x69), )); } @@ -532,7 +569,7 @@ mod tests { vq.avail.idx.set(1); let head = pmem.queues[0].pop().unwrap().unwrap(); assert!(matches!( - pmem.process_chain(head).unwrap_err(), + pmem.process_chain(head, &mut None).unwrap_err(), PmemError::DescriptorChainTooShort, )); } @@ -550,7 +587,7 @@ mod tests { vq.avail.idx.set(1); let head = pmem.queues[0].pop().unwrap().unwrap(); assert!(matches!( - pmem.process_chain(head).unwrap_err(), + pmem.process_chain(head, &mut None).unwrap_err(), PmemError::WriteOnlyDescriptor, )); } @@ -568,7 +605,7 @@ mod tests { vq.avail.idx.set(1); let head = pmem.queues[0].pop().unwrap().unwrap(); assert!(matches!( - pmem.process_chain(head).unwrap_err(), + pmem.process_chain(head, &mut None).unwrap_err(), PmemError::ReadOnlyDescriptor, )); } @@ -584,7 +621,7 @@ mod tests { vq.avail.idx.set(1); let head = pmem.queues[0].pop().unwrap().unwrap(); assert!(matches!( - pmem.process_chain(head).unwrap_err(), + pmem.process_chain(head, &mut None).unwrap_err(), PmemError::Non4byteHeadDescriptor(0x69), )); } @@ -603,7 +640,7 @@ mod tests { vq.avail.idx.set(1); let head = pmem.queues[0].pop().unwrap().unwrap(); assert!(matches!( - pmem.process_chain(head).unwrap_err(), + pmem.process_chain(head, &mut None).unwrap_err(), PmemError::Non4byteStatusDescriptor(0x69), )); } From 8e585568a576b28af2e544ba76f2c1971acead3e Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Tue, 7 Apr 2026 14:15:08 +0100 Subject: [PATCH 3/7] feat: pmem: add rate-limiter for virtio-pmem device Add rate-limiter support to the virtio-pmem device to allow users to configure limits of the I/O bandwidth generated by the `msync` call in the device which could be triggered by the guest FLUSH requests. Signed-off-by: Egor Lazarchuk --- .../src/api_server/parsed_request.rs | 3 +- .../src/api_server/request/pmem.rs | 63 ++++++++- src/firecracker/swagger/firecracker.yaml | 44 ++++++ src/vmm/src/builder.rs | 1 + src/vmm/src/device_manager/pci_mngr.rs | 4 +- src/vmm/src/device_manager/persist.rs | 4 +- src/vmm/src/devices/virtio/pmem/device.rs | 63 ++++++++- .../src/devices/virtio/pmem/event_handler.rs | 9 ++ src/vmm/src/devices/virtio/pmem/metrics.rs | 8 ++ src/vmm/src/devices/virtio/pmem/persist.rs | 9 ++ src/vmm/src/lib.rs | 14 ++ src/vmm/src/logger/metrics.rs | 6 + src/vmm/src/rpc_interface.rs | 29 +++- src/vmm/src/vmm_config/pmem.rs | 19 +++ tests/host_tools/fcmetrics.py | 4 + .../integration_tests/functional/test_api.py | 125 ++++++++++++++++++ 16 files changed, 398 insertions(+), 7 deletions(-) diff --git a/src/firecracker/src/api_server/parsed_request.rs b/src/firecracker/src/api_server/parsed_request.rs index efad11f1a55..be3127c9206 100644 --- a/src/firecracker/src/api_server/parsed_request.rs +++ b/src/firecracker/src/api_server/parsed_request.rs @@ -24,7 +24,7 @@ use super::request::machine_configuration::{ use super::request::metrics::parse_put_metrics; use super::request::mmds::{parse_get_mmds, parse_patch_mmds, parse_put_mmds}; use super::request::net::{parse_patch_net, parse_put_net}; -use super::request::pmem::parse_put_pmem; +use super::request::pmem::{parse_patch_pmem, parse_put_pmem}; use super::request::snapshot::{parse_patch_vm_state, parse_put_snapshot}; use super::request::version::parse_get_version; use super::request::vsock::parse_put_vsock; @@ -120,6 +120,7 @@ impl TryFrom<&Request> for ParsedRequest { (Method::Patch, "network-interfaces", Some(body)) => { parse_patch_net(body, path_tokens.next()) } + (Method::Patch, "pmem", Some(body)) => parse_patch_pmem(body, path_tokens.next()), (Method::Patch, "vm", Some(body)) => parse_patch_vm_state(body), (Method::Patch, "hotplug", Some(body)) if path_tokens.next() == Some("memory") => { parse_patch_memory_hotplug(body) diff --git a/src/firecracker/src/api_server/request/pmem.rs b/src/firecracker/src/api_server/request/pmem.rs index dc538a0d5fc..5ccc18b29b1 100644 --- a/src/firecracker/src/api_server/request/pmem.rs +++ b/src/firecracker/src/api_server/request/pmem.rs @@ -3,7 +3,7 @@ use vmm::logger::{IncMetric, METRICS}; use vmm::rpc_interface::VmmAction; -use vmm::vmm_config::pmem::PmemConfig; +use vmm::vmm_config::pmem::{PmemConfig, PmemDeviceUpdateConfig}; use super::super::parsed_request::{ParsedRequest, RequestError, checked_id}; use super::{Body, StatusCode}; @@ -37,6 +37,36 @@ pub(crate) fn parse_put_pmem( } } +pub(crate) fn parse_patch_pmem( + body: &Body, + id_from_path: Option<&str>, +) -> Result { + METRICS.patch_api_requests.pmem_count.inc(); + let id = if let Some(id) = id_from_path { + checked_id(id)? + } else { + METRICS.patch_api_requests.pmem_fails.inc(); + return Err(RequestError::EmptyID); + }; + + let update_cfg = + serde_json::from_slice::(body.raw()).inspect_err(|_| { + METRICS.patch_api_requests.pmem_fails.inc(); + })?; + + if id == update_cfg.id { + Ok(ParsedRequest::new_sync(VmmAction::UpdatePmemDevice( + update_cfg, + ))) + } else { + METRICS.patch_api_requests.pmem_fails.inc(); + Err(RequestError::Generic( + StatusCode::BadRequest, + "The id from the path does not match the id from the body!".to_string(), + )) + } +} + #[cfg(test)] mod tests { use super::*; @@ -60,7 +90,8 @@ mod tests { "id": "1000", "path_on_host": "dummy", "root_device": true, - "read_only": true + "read_only": true, + "rate_limiter": {} }"#; let r = vmm_action_from_request(parse_put_pmem(&Body::new(body), Some("1000")).unwrap()); @@ -69,7 +100,35 @@ mod tests { path_on_host: "dummy".to_string(), root_device: true, read_only: true, + rate_limiter: Some(Default::default()), }; assert_eq!(r, VmmAction::InsertPmemDevice(expected_config)); } + + #[test] + fn test_parse_patch_pmem_request() { + parse_put_pmem(&Body::new("invalid_payload"), None).unwrap_err(); + parse_put_pmem(&Body::new("invalid_payload"), Some("id")).unwrap_err(); + + let body = r#"{ + "id": "bar", + }"#; + parse_patch_pmem(&Body::new(body), Some("1")).unwrap_err(); + let body = r#"{ + "foo": "1", + }"#; + parse_patch_pmem(&Body::new(body), Some("1")).unwrap_err(); + + let body = r#"{ + "id": "1000", + "rate_limiter": {} + }"#; + let r = vmm_action_from_request(parse_patch_pmem(&Body::new(body), Some("1000")).unwrap()); + + let expected_config = PmemDeviceUpdateConfig { + id: "1000".to_string(), + rate_limiter: Some(Default::default()), + }; + assert_eq!(r, VmmAction::UpdatePmemDevice(expected_config)); + } } diff --git a/src/firecracker/swagger/firecracker.yaml b/src/firecracker/swagger/firecracker.yaml index 96366c68c70..5e8f16f732c 100644 --- a/src/firecracker/swagger/firecracker.yaml +++ b/src/firecracker/swagger/firecracker.yaml @@ -370,6 +370,35 @@ paths: description: Internal server error. schema: $ref: "#/definitions/Error" + patch: + summary: Updates the rate limiter of a pmem device. Post-boot only. + description: + Updates the rate limiter applied to the pmem device with the ID specified + by the id path parameter. + operationId: patchGuestPmemByID + parameters: + - name: id + in: path + description: The id of the guest pmem device + required: true + type: string + - name: body + in: body + description: Pmem rate limiter properties + required: true + schema: + $ref: "#/definitions/PartialPmem" + responses: + 204: + description: Pmem device updated + 400: + description: Pmem device cannot be updated due to bad input + schema: + $ref: "#/definitions/Error" + default: + description: Internal server error. + schema: + $ref: "#/definitions/Error" /logger: put: @@ -1255,6 +1284,8 @@ definitions: type: boolean description: Flag to map backing file in read-only mode. + rate_limiter: + $ref: "#/definitions/RateLimiter" Error: type: object @@ -1538,6 +1569,19 @@ definitions: tx_rate_limiter: $ref: "#/definitions/RateLimiter" + PartialPmem: + type: object + description: + Defines a partial pmem device structure, used to update the rate limiter + for that device, after microvm start. + required: + - id + properties: + id: + type: string + rate_limiter: + $ref: "#/definitions/RateLimiter" + RateLimiter: type: object description: diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 1152023e549..1f6110583c7 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -1315,6 +1315,7 @@ pub(crate) mod tests { path_on_host: "".into(), root_device: true, read_only: true, + ..Default::default() }]; let mut vmm = default_vmm(); let mut cmdline = default_kernel_cmdline(); diff --git a/src/vmm/src/device_manager/pci_mngr.rs b/src/vmm/src/device_manager/pci_mngr.rs index 832d12b9195..d2e41b45955 100644 --- a/src/vmm/src/device_manager/pci_mngr.rs +++ b/src/vmm/src/device_manager/pci_mngr.rs @@ -705,6 +705,7 @@ mod tests { path_on_host: "".into(), root_device: true, read_only: true, + ..Default::default() }]; _pmem_files = insert_pmem_devices(&mut vmm, &mut cmdline, &mut event_manager, pmem_configs); @@ -812,7 +813,8 @@ mod tests { "id": "pmem", "path_on_host": "{}", "root_device": true, - "read_only": true + "read_only": true, + "rate_limiter": null }} ], "memory-hotplug": {{ diff --git a/src/vmm/src/device_manager/persist.rs b/src/vmm/src/device_manager/persist.rs index d951af089eb..46342adb5bf 100644 --- a/src/vmm/src/device_manager/persist.rs +++ b/src/vmm/src/device_manager/persist.rs @@ -739,6 +739,7 @@ mod tests { path_on_host: "".into(), root_device: true, read_only: true, + ..Default::default() }]; _pmem_files = insert_pmem_devices(&mut vmm, &mut cmdline, &mut event_manager, pmem_configs); @@ -843,7 +844,8 @@ mod tests { "id": "pmem", "path_on_host": "{}", "root_device": true, - "read_only": true + "read_only": true, + "rate_limiter": null }} ], "memory-hotplug": {{ diff --git a/src/vmm/src/devices/virtio/pmem/device.rs b/src/vmm/src/devices/virtio/pmem/device.rs index ed022ade61d..8beda143610 100644 --- a/src/vmm/src/devices/virtio/pmem/device.rs +++ b/src/vmm/src/devices/virtio/pmem/device.rs @@ -22,7 +22,9 @@ use crate::devices::virtio::pmem::metrics::{PmemMetrics, PmemMetricsPerDevice}; use crate::devices::virtio::queue::{DescriptorChain, InvalidAvailIdx, Queue, QueueError}; use crate::devices::virtio::transport::{VirtioInterrupt, VirtioInterruptType}; use crate::logger::{IncMetric, error, info}; +use crate::rate_limiter::{BucketUpdate, RateLimiter, TokenType}; use crate::utils::{align_up, u64_to_usize}; +use crate::vmm_config::RateLimiterConfig; use crate::vmm_config::pmem::PmemConfig; use crate::vstate::memory::{ByteValued, Bytes, GuestMemoryMmap, GuestMmapRegion}; use crate::vstate::vm::VmError; @@ -58,6 +60,8 @@ pub enum PmemError { Queue(#[from] QueueError), /// Error during obtaining the descriptor from the queue: {0} QueuePop(#[from] InvalidAvailIdx), + /// Error creating rate limiter: {0} + RateLimiter(std::io::Error), } const VIRTIO_PMEM_REQ_TYPE_FLUSH: u32 = 0; @@ -94,6 +98,7 @@ pub struct Pmem { pub file_len: u64, pub mmap_ptr: u64, pub metrics: Arc, + pub rate_limiter: RateLimiter, pub config: PmemConfig, } @@ -125,6 +130,13 @@ impl Pmem { let (file, file_len, mmap_ptr, mmap_len) = Self::mmap_backing_file(&config.path_on_host, config.read_only)?; + let rate_limiter = config + .rate_limiter + .map(RateLimiterConfig::try_into) + .transpose() + .map_err(PmemError::RateLimiter)? + .unwrap_or_default(); + Ok(Self { avail_features: 1u64 << VIRTIO_F_VERSION_1, acked_features: 0u64, @@ -140,6 +152,7 @@ impl Pmem { file_len, mmap_ptr, metrics: PmemMetricsPerDevice::alloc(config.id.clone()), + rate_limiter, config, }) } @@ -254,6 +267,26 @@ impl Pmem { // This is safe since we checked in the event handler that the device is activated. let active_state = self.device_state.active_state().unwrap(); + if self.queues[0].is_empty() { + return Ok(()); + } + + // There is only 1 type of request pmem supports, so we can consume + // rate-limiter before even looking at the queue. This is still valid + // even if the queue will not have any valid requests since it indicate + // broken guest driver and rate-limiting should still apply for such case. + // Rate limit: consume 1 op and file_len bytes for the coalesced msync. + // If the rate limiter is blocked, defer notification until the timer fires. + if !self.rate_limiter.consume(1, TokenType::Ops) { + self.metrics.rate_limiter_throttled_events.inc(); + return Ok(()); + } + if !self.rate_limiter.consume(self.file_len, TokenType::Bytes) { + self.rate_limiter.manual_replenish(1, TokenType::Ops); + self.metrics.rate_limiter_throttled_events.inc(); + return Ok(()); + } + let mut cached_result = None; while let Some(head) = self.queues[0].pop()? { let add_result = match self.process_chain(head, &mut cached_result) { @@ -270,8 +303,8 @@ impl Pmem { break; } } - self.queues[0].advance_used_ring_idx(); + self.queues[0].advance_used_ring_idx(); if self.queues[0].prepare_kick() { active_state .interrupt @@ -347,6 +380,11 @@ impl Pmem { Ok(()) } + /// Updates the parameters for the rate limiter. + pub fn update_rate_limiter(&mut self, bytes: BucketUpdate, ops: BucketUpdate) { + self.rate_limiter.update_buckets(bytes, ops); + } + pub fn process_queue(&mut self) { self.metrics.queue_event_count.inc(); if let Err(err) = self.queue_events[0].read() { @@ -355,6 +393,25 @@ impl Pmem { return; } + if self.rate_limiter.is_blocked() { + self.metrics.rate_limiter_throttled_events.inc(); + return; + } + + self.handle_queue().unwrap_or_else(|err| { + error!("pmem: {err:?}"); + self.metrics.event_fails.inc(); + }); + } + + pub fn process_rate_limiter_event(&mut self) { + self.metrics.rate_limiter_event_count.inc(); + if let Err(err) = self.rate_limiter.event_handler() { + error!("pmem: Failed to get rate-limiter event: {err:?}"); + self.metrics.event_fails.inc(); + return; + } + self.handle_queue().unwrap_or_else(|err| { error!("pmem: {err:?}"); self.metrics.event_fails.inc(); @@ -458,6 +515,7 @@ mod tests { path_on_host: "not_a_path".into(), root_device: true, read_only: false, + ..Default::default() }; assert!(matches!( Pmem::new(config).unwrap_err(), @@ -471,6 +529,7 @@ mod tests { path_on_host: dummy_path.clone(), root_device: true, read_only: false, + ..Default::default() }; assert!(matches!( Pmem::new(config).unwrap_err(), @@ -483,6 +542,7 @@ mod tests { path_on_host: dummy_path, root_device: true, read_only: false, + ..Default::default() }; Pmem::new(config).unwrap(); } @@ -497,6 +557,7 @@ mod tests { path_on_host: dummy_path, root_device: true, read_only: false, + ..Default::default() }; let mut pmem = Pmem::new(config).unwrap(); diff --git a/src/vmm/src/devices/virtio/pmem/event_handler.rs b/src/vmm/src/devices/virtio/pmem/event_handler.rs index e2ef18d2130..2324c509a69 100644 --- a/src/vmm/src/devices/virtio/pmem/event_handler.rs +++ b/src/vmm/src/devices/virtio/pmem/event_handler.rs @@ -10,6 +10,7 @@ use crate::logger::{error, warn}; impl Pmem { const PROCESS_ACTIVATE: u32 = 0; const PROCESS_PMEM_QUEUE: u32 = 1; + const PROCESS_RATE_LIMITER: u32 = 2; fn register_runtime_events(&self, ops: &mut EventOps) { if let Err(err) = ops.add(Events::with_data( @@ -19,6 +20,13 @@ impl Pmem { )) { error!("pmem: Failed to register queue event: {err}"); } + if let Err(err) = ops.add(Events::with_data( + &self.rate_limiter, + Self::PROCESS_RATE_LIMITER, + EventSet::IN, + )) { + error!("pmem: Failed to register rate-limiter event: {err}"); + } } fn register_activate_event(&self, ops: &mut EventOps) { @@ -76,6 +84,7 @@ impl MutEventSubscriber for Pmem { match source { Self::PROCESS_ACTIVATE => self.process_activate_event(ops), Self::PROCESS_PMEM_QUEUE => self.process_queue(), + Self::PROCESS_RATE_LIMITER => self.process_rate_limiter_event(), _ => { warn!("pmem: Unknown event received: {source}"); } diff --git a/src/vmm/src/devices/virtio/pmem/metrics.rs b/src/vmm/src/devices/virtio/pmem/metrics.rs index 02348b4ca51..8aaae2f041e 100644 --- a/src/vmm/src/devices/virtio/pmem/metrics.rs +++ b/src/vmm/src/devices/virtio/pmem/metrics.rs @@ -151,6 +151,10 @@ pub struct PmemMetrics { pub event_fails: SharedIncMetric, /// Number of events triggered on the queue of this pmem device. pub queue_event_count: SharedIncMetric, + /// Number of events throttled because of the rate limiter. + pub rate_limiter_throttled_events: SharedIncMetric, + /// Number of rate limiter replenish events. + pub rate_limiter_event_count: SharedIncMetric, } impl PmemMetrics { @@ -172,6 +176,10 @@ impl PmemMetrics { self.event_fails.add(other.event_fails.fetch_diff()); self.queue_event_count .add(other.queue_event_count.fetch_diff()); + self.rate_limiter_throttled_events + .add(other.rate_limiter_throttled_events.fetch_diff()); + self.rate_limiter_event_count + .add(other.rate_limiter_event_count.fetch_diff()); } } diff --git a/src/vmm/src/devices/virtio/pmem/persist.rs b/src/vmm/src/devices/virtio/pmem/persist.rs index 7d5d2bd778e..aba7da62769 100644 --- a/src/vmm/src/devices/virtio/pmem/persist.rs +++ b/src/vmm/src/devices/virtio/pmem/persist.rs @@ -9,6 +9,8 @@ use crate::Vm; use crate::devices::virtio::device::{DeviceState, VirtioDeviceType}; use crate::devices::virtio::persist::{PersistError as VirtioStateError, VirtioDeviceState}; use crate::devices::virtio::pmem::{PMEM_NUM_QUEUES, PMEM_QUEUE_SIZE}; +use crate::rate_limiter::RateLimiter; +use crate::rate_limiter::persist::RateLimiterState; use crate::snapshot::Persist; use crate::vmm_config::pmem::PmemConfig; use crate::vstate::memory::{GuestMemoryMmap, GuestRegionMmap}; @@ -19,6 +21,7 @@ pub struct PmemState { pub virtio_state: VirtioDeviceState, pub config_space: ConfigSpace, pub config: PmemConfig, + pub rate_limiter_state: RateLimiterState, } #[derive(Debug)] @@ -35,6 +38,8 @@ pub enum PmemPersistError { Pmem(#[from] PmemError), /// Error registering memory region: {0} Vm(#[from] VmError), + /// Error restoring rate limiter: {0} + RateLimiter(std::io::Error), } impl<'a> Persist<'a> for Pmem { @@ -47,6 +52,7 @@ impl<'a> Persist<'a> for Pmem { virtio_state: VirtioDeviceState::from_device(self), config_space: self.config_space, config: self.config.clone(), + rate_limiter_state: self.rate_limiter.save(), } } @@ -65,6 +71,8 @@ impl<'a> Persist<'a> for Pmem { pmem.config_space = state.config_space; pmem.avail_features = state.virtio_state.avail_features; pmem.acked_features = state.virtio_state.acked_features; + pmem.rate_limiter = RateLimiter::restore((), &state.rate_limiter_state) + .map_err(PmemPersistError::RateLimiter)?; pmem.set_mem_region(constructor_args.vm)?; @@ -92,6 +100,7 @@ mod tests { path_on_host: dummy_path, root_device: true, read_only: false, + ..Default::default() }; let pmem = Pmem::new(config).unwrap(); let guest_mem = default_mem(); diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index 020d941012e..d30543d63a9 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -677,6 +677,20 @@ impl Vmm { Ok(()) } + /// Updates the rate limiter parameters for pmem device with `pmem_id` id. + pub fn update_pmem_rate_limiter( + &mut self, + pmem_id: &str, + rl_bytes: BucketUpdate, + rl_ops: BucketUpdate, + ) -> Result<(), VmmError> { + self.device_manager + .with_virtio_device(pmem_id, |pmem: &mut Pmem| { + pmem.update_rate_limiter(rl_bytes, rl_ops) + })?; + Ok(()) + } + /// Updates the rate limiter parameters for net device with `net_id` id. pub fn update_net_rate_limiters( &mut self, diff --git a/src/vmm/src/logger/metrics.rs b/src/vmm/src/logger/metrics.rs index 4a22af7f945..ca96fe8b5b3 100644 --- a/src/vmm/src/logger/metrics.rs +++ b/src/vmm/src/logger/metrics.rs @@ -490,6 +490,10 @@ pub struct PatchRequestsMetrics { pub hotplug_memory_count: SharedIncMetric, /// Number of failed PATCHes to /hotplug/memory pub hotplug_memory_fails: SharedIncMetric, + /// Number of tries to PATCH a pmem device. + pub pmem_count: SharedIncMetric, + /// Number of failures in PATCHing a pmem device. + pub pmem_fails: SharedIncMetric, } impl PatchRequestsMetrics { /// Const default construction. @@ -505,6 +509,8 @@ impl PatchRequestsMetrics { mmds_fails: SharedIncMetric::new(), hotplug_memory_count: SharedIncMetric::new(), hotplug_memory_fails: SharedIncMetric::new(), + pmem_count: SharedIncMetric::new(), + pmem_fails: SharedIncMetric::new(), } } } diff --git a/src/vmm/src/rpc_interface.rs b/src/vmm/src/rpc_interface.rs index efd8a595e38..891324ff14a 100644 --- a/src/vmm/src/rpc_interface.rs +++ b/src/vmm/src/rpc_interface.rs @@ -38,7 +38,7 @@ use crate::vmm_config::mmds::{MmdsConfig, MmdsConfigError}; use crate::vmm_config::net::{ NetworkInterfaceConfig, NetworkInterfaceError, NetworkInterfaceUpdateConfig, }; -use crate::vmm_config::pmem::{PmemConfig, PmemConfigError}; +use crate::vmm_config::pmem::{PmemConfig, PmemConfigError, PmemDeviceUpdateConfig}; use crate::vmm_config::serial::SerialConfig; use crate::vmm_config::snapshot::{CreateSnapshotParams, LoadSnapshotParams, SnapshotType}; use crate::vmm_config::vsock::{VsockConfigError, VsockDeviceConfig}; @@ -83,6 +83,8 @@ pub enum VmmAction { InsertBlockDevice(BlockDeviceConfig), /// Add a virtio-pmem device. InsertPmemDevice(PmemConfig), + /// Update an existing pmem device's rate limiter. + UpdatePmemDevice(PmemDeviceUpdateConfig), /// Add a new network interface config or update one that already exists using the /// `NetworkInterfaceConfig` as input. This action can only be called before the microVM has /// booted. @@ -494,6 +496,7 @@ impl<'a> PrebootApiController<'a> { | UpdateBlockDevice(_) | UpdateMemoryHotplugSize(_) | UpdateNetworkInterface(_) + | UpdatePmemDevice(_) | StartFreePageHinting(_) | GetFreePageHintingStatus | StopFreePageHinting => Err(VmmActionError::OperationNotSupportedPreBoot), @@ -789,6 +792,7 @@ impl RuntimeApiController { .map_err(VmmActionError::BalloonUpdate), UpdateBlockDevice(new_cfg) => self.update_block_device(new_cfg), UpdateNetworkInterface(netif_update) => self.update_net_rate_limiters(netif_update), + UpdatePmemDevice(new_cfg) => self.update_pmem_device(new_cfg), UpdateMemoryHotplugSize(cfg) => self .vmm .lock() @@ -943,6 +947,25 @@ impl RuntimeApiController { Ok(VmmData::Empty) } + /// Updates the rate limiter for a pmem device. + fn update_pmem_device( + &mut self, + new_cfg: PmemDeviceUpdateConfig, + ) -> Result { + if new_cfg.rate_limiter.is_some() { + self.vmm + .lock() + .expect("Poisoned lock") + .update_pmem_rate_limiter( + &new_cfg.id, + RateLimiterUpdate::from(new_cfg.rate_limiter).bandwidth, + RateLimiterUpdate::from(new_cfg.rate_limiter).ops, + ) + .map_err(PmemConfigError::DeviceUpdate)?; + } + Ok(VmmData::Empty) + } + /// Updates configuration for an emulated net device as described in `new_cfg`. fn update_net_rate_limiters( &mut self, @@ -1162,6 +1185,9 @@ mod tests { check_unsupported(preboot_request(VmmAction::UpdateBlockDevice( BlockDeviceUpdateConfig::default(), ))); + check_unsupported(preboot_request(VmmAction::UpdatePmemDevice( + PmemDeviceUpdateConfig::default(), + ))); check_unsupported(preboot_request(VmmAction::UpdateNetworkInterface( NetworkInterfaceUpdateConfig { iface_id: String::new(), @@ -1298,6 +1324,7 @@ mod tests { path_on_host: String::new(), root_device: false, read_only: false, + ..Default::default() }))); check_unsupported(runtime_request(VmmAction::SetMemoryHotplugDevice( MemoryHotplugConfig::default(), diff --git a/src/vmm/src/vmm_config/pmem.rs b/src/vmm/src/vmm_config/pmem.rs index d680e9c36d7..13d347b3a9b 100644 --- a/src/vmm/src/vmm_config/pmem.rs +++ b/src/vmm/src/vmm_config/pmem.rs @@ -6,6 +6,7 @@ use std::sync::{Arc, Mutex}; use serde::{Deserialize, Serialize}; use crate::devices::virtio::pmem::device::{Pmem, PmemError}; +use crate::vmm_config::RateLimiterConfig; /// Errors associated wit the operations allowed on a pmem device #[derive(Debug, thiserror::Error, displaydoc::Display)] @@ -18,6 +19,19 @@ pub enum PmemConfigError { CreateDevice(#[from] PmemError), /// Error accessing underlying file: {0} File(std::io::Error), + /// Device not found: {0} + DeviceUpdate(crate::VmmError), +} + +/// Configuration for updating a pmem device at runtime. +/// Only the rate limiter can be updated. +#[derive(Debug, Default, PartialEq, Eq, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct PmemDeviceUpdateConfig { + /// The pmem device ID. + pub id: String, + /// New rate limiter config. + pub rate_limiter: Option, } /// Use this structure to setup a Pmem device before boothing the kernel. @@ -34,6 +48,8 @@ pub struct PmemConfig { /// Map the file as read only #[serde(default)] pub read_only: bool, + /// Rate Limiter for flush operations. + pub rate_limiter: Option, } /// Wrapper for the collection that holds all the Pmem devices. @@ -119,6 +135,7 @@ mod tests { path_on_host: dummy_path, root_device: true, read_only: false, + ..Default::default() }; builder.build(config.clone(), false).unwrap(); assert_eq!(builder.devices.len(), 1); @@ -143,6 +160,7 @@ mod tests { path_on_host: dummy_path, root_device: true, read_only: false, + ..Default::default() }; builder.build(config.clone(), false).unwrap(); @@ -165,6 +183,7 @@ mod tests { path_on_host: dummy_path, root_device: true, read_only: false, + ..Default::default() }; assert!(matches!( builder.build(config, true).unwrap_err(), diff --git a/tests/host_tools/fcmetrics.py b/tests/host_tools/fcmetrics.py index d9e0ef7252c..b05a09e3ac5 100644 --- a/tests/host_tools/fcmetrics.py +++ b/tests/host_tools/fcmetrics.py @@ -211,6 +211,8 @@ def validate_fc_metrics(metrics): "mmds_fails", "hotplug_memory_count", "hotplug_memory_fails", + "pmem_count", + "pmem_fails", ], "put_api_requests": [ "actions_count", @@ -313,6 +315,8 @@ def validate_fc_metrics(metrics): "cfg_fails", "event_fails", "queue_event_count", + "rate_limiter_throttled_events", + "rate_limiter_event_count", ], "memory_hotplug": [ "activate_fails", diff --git a/tests/integration_tests/functional/test_api.py b/tests/integration_tests/functional/test_api.py index aa3c8b050d1..cd3b487e10e 100644 --- a/tests/integration_tests/functional/test_api.py +++ b/tests/integration_tests/functional/test_api.py @@ -619,6 +619,21 @@ def test_rate_limiters_api_config(uvm_plain, io_engine): }, ) + # Test the PMEM rate limiting API. + + # Test pmem with bw and ops rate-limiting. + pmem_fs = drive_tools.FilesystemFile( + os.path.join(test_microvm.fsfiles, "pmem_rl"), size=2 + ) + test_microvm.api.pmem.put( + id="pmem_rl", + path_on_host=test_microvm.create_jailed_resource(pmem_fs.path), + rate_limiter={ + "bandwidth": {"size": 1000000, "refill_time": 100}, + "ops": {"size": 1, "refill_time": 100}, + }, + ) + def test_api_patch_pre_boot(uvm_plain, io_engine): """ @@ -669,6 +684,15 @@ def test_api_patch_pre_boot(uvm_plain, io_engine): with pytest.raises(RuntimeError, match=NOT_SUPPORTED_BEFORE_START): test_microvm.api.network.patch(iface_id=iface_id) + # Patching pmem before boot is not allowed. + # Using nonexistents pmem device is fine since the failure should happen + # at API layer + with pytest.raises(RuntimeError, match=NOT_SUPPORTED_BEFORE_START): + test_microvm.api.pmem.patch( + id="nonexistent", + rate_limiter={"ops": {"size": 1, "refill_time": 100}}, + ) + def test_negative_api_patch_post_boot(uvm_plain, io_engine): """ @@ -1151,6 +1175,105 @@ def test_pmem_api(uvm_plain_any, rootfs): vm.api.pmem.put(id="pmem") +def test_pmem_rate_limiter_api(uvm_plain_any, rootfs): + """ + Test virtio-pmem rate limiter PUT and PATCH API commands. + """ + vm = uvm_plain_any + vm.spawn() + vm.basic_config(add_root_device=False) + + pmem_size_mb = 2 + pmem_path_on_host = drive_tools.FilesystemFile( + os.path.join(vm.fsfiles, "scratch"), size=pmem_size_mb + ) + pmem_file_path = vm.create_jailed_resource(pmem_path_on_host.path) + + # PUT pmem with rate limiter at creation time. + vm.api.pmem.put( + id="pmem0", + path_on_host=pmem_file_path, + rate_limiter={ + "bandwidth": {"size": 1000000, "refill_time": 100}, + "ops": {"size": 10, "refill_time": 1000}, + }, + ) + + # Verify rate limiter is reflected in vm config. + response = vm.api.vm_config.get().json() + pmem_cfg = response["pmem"][0] + assert pmem_cfg["rate_limiter"]["bandwidth"]["size"] == 1000000 + assert pmem_cfg["rate_limiter"]["ops"]["size"] == 10 + + # PUT pmem without rate limiter (overwrite). + vm.api.pmem.put( + id="pmem0", + path_on_host=pmem_file_path, + ) + response = vm.api.vm_config.get().json() + assert response["pmem"][0]["rate_limiter"] is None + + # PATCH pmem before boot is not allowed. + with pytest.raises(RuntimeError, match=NOT_SUPPORTED_BEFORE_START): + vm.api.pmem.patch( + id="pmem0", + rate_limiter={ + "bandwidth": {"size": 5000, "refill_time": 100}, + }, + ) + + # Boot with pmem as rootfs. + vm.add_pmem("rootfs", rootfs, True, True) + vm.start() + + # PUT pmem after boot is not allowed. + with pytest.raises(RuntimeError, match=NOT_SUPPORTED_AFTER_START): + vm.api.pmem.put(id="pmem0", path_on_host=pmem_file_path) + + # PATCH pmem rate limiter after boot should succeed. + vm.api.pmem.patch( + id="pmem0", + rate_limiter={ + "bandwidth": {"size": 5000, "refill_time": 100}, + "ops": {"size": 500, "refill_time": 100}, + }, + ) + + # PATCH with only bandwidth should succeed. + vm.api.pmem.patch( + id="pmem0", + rate_limiter={ + "bandwidth": {"size": 2000000, "refill_time": 200}, + }, + ) + + # PATCH with only ops should succeed. + vm.api.pmem.patch( + id="pmem0", + rate_limiter={ + "ops": {"size": 100, "refill_time": 500}, + }, + ) + + # PATCH without rate_limiter field should succeed. + vm.api.pmem.patch(id="pmem0") + + # PATCH with unknown fields should fail. + with pytest.raises(RuntimeError, match="unknown field"): + vm.api.pmem.patch( + id="pmem0", + path_on_host="foo", + rate_limiter={"ops": {"size": 1, "refill_time": 100}}, + ) + + # PATCH non-existent device should fail. + with pytest.raises(RuntimeError, match="not found"): + vm.api.pmem.patch( + id="nonexistent", + rate_limiter={"ops": {"size": 1, "refill_time": 100}}, + ) + + def test_get_full_config_after_restoring_snapshot(microvm_factory, uvm_nano): """ Test the configuration of a microVM after restoring from a snapshot. @@ -1204,6 +1327,7 @@ def test_get_full_config_after_restoring_snapshot(microvm_factory, uvm_nano): "path_on_host": "/" + uvm_nano.rootfs_file.name, "root_device": False, "read_only": False, + "rate_limiter": None, } ] @@ -1339,6 +1463,7 @@ def test_get_full_config(uvm_plain): "path_on_host": "/" + test_microvm.rootfs_file.name, "root_device": False, "read_only": False, + "rate_limiter": None, } ] From f690f1ff3c7c39f95ffd0fa0470631a8d9fc5ed5 Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Thu, 26 Mar 2026 16:37:18 +0000 Subject: [PATCH 4/7] doc: pmem: add a note about limiting msync with cgroupsv2 Add a section in the pmem.md file describing a way of limiting I/O usage of `msync` calls from a virtio-pmem device. Signed-off-by: Egor Lazarchuk --- docs/pmem.md | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/docs/pmem.md b/docs/pmem.md index 44731c3eca9..a539faca807 100644 --- a/docs/pmem.md +++ b/docs/pmem.md @@ -157,6 +157,100 @@ VMs, which could be exploited as a side channel by an attacker inside the microVM. Users that want to use `virtio-pmem` to share memory are encouraged to carefully evaluate the security risk according to their threat model. +### Limiting `msync` write bandwidth + +When a guest issues a flush request to the `virtio-pmem` device (via the +`VIRTIO_PMEM_REQ_TYPE_FLUSH`), Firecracker calls `msync(MS_SYNC)` on the backing +file to persist dirty pages to disk. A malicious guest can issue a high volume +of flush requests, leading to excessive host I/O usage. + +There are two ways to mitigate this: + +#### Firecracker rate limiter + +The `virtio-pmem` device supports a built-in rate limiter, identical to the one +available for block devices. It throttles flush requests using two token +buckets: + +- `bandwidth` — limits the total number of bytes sent to the `msync` per refill + interval. Each flush consumes tokens equal to the **full backing file size**, + because `msync` is called over the entire mapped region. For example, with a + 256 MiB backing file and `size` set to `268435456` (256 MiB), at most one + flush is allowed per `refill_time` milliseconds. +- `ops` — limits the number of `msync` calls per refill interval (after + coalescing multiple flush requests within a single queue notification into one + call). + +The rate limiter can be configured at device creation time. The following +example allows at most 1 flush per second for a 256 MiB backing file +(`bandwidth.size` = 256 MiB = 268435456 bytes), and at most 10 `msync` +operations per second: + +```json +"pmem": [ + { + "id": "pmem0", + "path_on_host": "./backing_file_256m", + "rate_limiter": { + "bandwidth": { "size": 268435456, "refill_time": 1000 }, + "ops": { "size": 10, "refill_time": 1000 } + } + } +] +``` + +It can also be updated at runtime via the API: + +```console +curl --unix-socket $socket_location -i \ + -X PATCH 'http://localhost/pmem/pmem0' \ + -H 'Content-Type: application/json' \ + -d '{ + "id": "pmem0", + "rate_limiter": { + "bandwidth": { "size": 268435456, "refill_time": 1000 }, + "ops": { "size": 10, "refill_time": 1000 } + } + }' +``` + +> [!NOTE] +> +> Since each flush always costs exactly one op and exactly `file_size` bytes, +> the `bandwidth` and `ops` buckets are correlated: setting `bandwidth.size` to +> `file_size` with a given `refill_time` is equivalent to setting `ops.size` to +> `1` with the same `refill_time` — both allow one flush per interval. In +> practice, configuring only one of the two buckets is sufficient. Use `ops` for +> a simple "N flushes per interval" limit, or `bandwidth` if you want to express +> the limit in terms of I/O throughput. + +#### Cgroup v2 IO controller + +Alternatively, the **cgroup v2 IO controller** can throttle write bandwidth on +the block device that hosts the `virtio-pmem` backing file: + +```bash +# Identify the block device MAJOR:MINOR for the backing file +dev=$(stat -c '%d' /path/to/backing_file) +echo "$((dev >> 8)):$((dev & 0xff))" + +# Enable the io controller +echo "+io" | sudo tee /sys/fs/cgroup//cgroup.subtree_control + +# Limit write bandwidth (e.g. 10 MB/s) on device MAJOR:MINOR +echo "MAJOR:MINOR wbps=10485760" | sudo tee /sys/fs/cgroup//io.max +``` + +> [!NOTE] +> +> - This requires **cgroup v2** with a filesystem that supports cgroup-aware +> writeback (e.g. ext4, btrfs). +> - The limit applies to all I/O from the cgroup to that device, not only +> `msync` flushes. +> - When using the [Jailer](jailer.md), the Firecracker process is already +> placed in a cgroup. You can configure `io.max` on that cgroup before +> starting the microVM. + ## Snapshot support `virtio-pmem` works with snapshot functionality of Firecracker. Snapshot will From c3f41767ab63a63e685ea8ce6b8d2307a750f84d Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Fri, 27 Mar 2026 11:01:44 +0000 Subject: [PATCH 5/7] changelog: pmem: add a note about rate-limiter support in virtio-pmem Mention new rate-limiter API for the virtio-pmem device in the CHANGELOG.md Signed-off-by: Egor Lazarchuk --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d4430760149..fdc35c74a8d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,9 @@ and this project adheres to logging resumes after suppression, a warn-level summary reports the count of suppressed messages. A new `rate_limited_log_count` metric tracks the total number of suppressed messages. +- [#5789](https://github.com/firecracker-microvm/firecracker/pull/5789): Add + rate-limiter support to virtio-pmem device to allow control over I/O bandwidth + generated by the FLUSH requests from the guest. ### Changed From 00a1f0ea8fc51bc0ffd0073adbd8031704442d68 Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Fri, 27 Mar 2026 12:15:13 +0000 Subject: [PATCH 6/7] doc: pmem: fix incorrect NOTE section formatting Fix incorrect NOTE section formatting in memory usage section Signed-off-by: Egor Lazarchuk --- docs/pmem.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/pmem.md b/docs/pmem.md index a539faca807..7629d9c1433 100644 --- a/docs/pmem.md +++ b/docs/pmem.md @@ -278,10 +278,11 @@ if `virtio-pmem` is used for memory sharing. ## Memory usage -> [!NOTE] `virtio-pmem` memory can be paged out by the host, because it is -> backed by a file with `MAP_SHARED` mapping type. To prevent this from -> happening, you can use `vmtouch` or similar tool to lock file pages from being -> evicted. +> [!NOTE] +> +> `virtio-pmem` memory can be paged out by the host, because it is backed by a +> file with `MAP_SHARED` mapping type. To prevent this from happening, you can +> use `vmtouch` or similar tool to lock file pages from being evicted. `virtio-pmem` resides in host memory and does increase the maximum possible memory usage of a VM since now VM can use all of its RAM and access all of the From ba905dd349190deefb1783ef8e569bba657ecdbe Mon Sep 17 00:00:00 2001 From: Egor Lazarchuk Date: Thu, 9 Apr 2026 17:38:21 +0100 Subject: [PATCH 7/7] pmem: docs: mention new rate-limiter option Update device-api.md with rate-limiter for virtio-pmem. Signed-off-by: Egor Lazarchuk --- docs/device-api.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/device-api.md b/docs/device-api.md index 4d209b97220..cc6ab4cba44 100644 --- a/docs/device-api.md +++ b/docs/device-api.md @@ -107,6 +107,7 @@ specification: | | path_on_host | O | O | O | O | O | O | O | **R** | O | | | root_device | O | O | O | O | O | O | O | **R** | O | | | read_only | O | O | O | O | O | O | O | **R** | O | +| | rate_limiter | O | O | O | O | O | O | O | **R** | O | | `SerialConfig` | serial_out_path | O | O | O | O | O | O | O | O | O | | | rate_limiter | O | O | O | O | O | O | O | O | O | | `MemoryHotplugConfig` | total_size_mib | O | O | O | O | O | O | O | O | **R** | @@ -118,7 +119,7 @@ specification: either virtio-block or vhost-user-block devices. \*\* The `TokenBucket` can be configured with any combination of virtio-net, -virtio-block, virtio-rng and serial devices. +virtio-block, virtio-pmem, virtio-rng and serial devices. ## Output Schema