diff --git a/CHANGELOG.md b/CHANGELOG.md index 1b2a76e523d..69c5ffa25df 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,8 +10,17 @@ and this project adheres to ### Added +- [#2046](https://github.com/firecracker-microvm/firecracker/issues/2046): The + `SendCtrlAltDel` action is now supported on aarch64. It injects a virtual + power-button press through a new PL061 GPIO controller exposed to the guest as + a `gpio-keys` power button, enabling external graceful shutdown (aarch64 + previously rejected the action with a 400). + ### Changed +- Bumped the snapshot version to 11.0.0 because the aarch64 PL061 GPIO device + adds new state to the snapshot format. + ### Deprecated ### Removed diff --git a/docs/api_requests/actions.md b/docs/api_requests/actions.md index c11f152e651..8354a85f93e 100644 --- a/docs/api_requests/actions.md +++ b/docs/api_requests/actions.md @@ -31,19 +31,29 @@ curl --unix-socket /tmp/firecracker.socket -i \ -d '{ "action_type": "FlushMetrics" }' ``` -## [Intel and AMD only] SendCtrlAltDel +## SendCtrlAltDel -This action will send the CTRL+ALT+DEL key sequence to the microVM. By +This action requests an orderly shutdown of the microVM from the host. Since +Firecracker exits when the guest powers off (CPU reset), `SendCtrlAltDel` can be +used to trigger a clean shutdown of the microVM. The mechanism differs per +architecture, but the API request is the same on both. + +On **x86_64**, this action sends the CTRL+ALT+DEL key sequence to the microVM. By convention, this sequence has been used to trigger a soft reboot and, as such, most Linux distributions perform an orderly shutdown and reset upon receiving -this keyboard input. Since Firecracker exits on CPU reset, `SendCtrlAltDel` can -be used to trigger a clean shutdown of the microVM. - -For this action, Firecracker emulates a standard AT keyboard, connected via an -i8042 controller. Driver support for both these devices needs to be present in +this keyboard input. Firecracker emulates a standard AT keyboard, connected via +an i8042 controller. Driver support for both these devices needs to be present in the guest OS. For Linux, that means the guest kernel needs `CONFIG_SERIO_I8042` and `CONFIG_KEYBOARD_ATKBD`. +On **aarch64**, this action injects a virtual power-button press. Firecracker +exposes a PL061 GPIO controller and describes a `gpio-keys` power button (mapped +to `KEY_POWER`) in the device tree. Driver support needs to be present in the +guest OS; for Linux that means `CONFIG_GPIOLIB`, `CONFIG_GPIO_PL061`, +`CONFIG_INPUT_KEYBOARD` and `CONFIG_KEYBOARD_GPIO`, plus a userspace consumer of +the power-key event (for example `systemd-logind` with the default +`HandlePowerKey=poweroff`). + > [!NOTE] > > At boot time, the Linux driver for i8042 spends a few tens of milliseconds diff --git a/docs/device-api.md b/docs/device-api.md index f638cc889f3..41701e23987 100644 --- a/docs/device-api.md +++ b/docs/device-api.md @@ -160,3 +160,8 @@ specification: | `FlushMetrics` | O | O | O | O | O | O | | `InstanceStart` | O | O | O | O | O | O | | `SendCtrlAltDel` | **R** | O | O | O | O | O | + +The `keyboard` requirement for `SendCtrlAltDel` applies to x86_64, which emulates +an i8042 keyboard controller. On aarch64 the action instead drives a PL061 GPIO +power button (exposed to the guest as `gpio-keys`); see +[actions](api_requests/actions.md#sendctrlaltdel). diff --git a/resources/guest_configs/ci.config b/resources/guest_configs/ci.config index 9de3b655783..e6efd84e7fd 100644 --- a/resources/guest_configs/ci.config +++ b/resources/guest_configs/ci.config @@ -13,3 +13,10 @@ CONFIG_SERIO_LIBPS2=y CONFIG_SERIO_GSCPS2=y CONFIG_KEYBOARD_ATKBD=y CONFIG_INPUT_KEYBOARD=y +# On aarch64 SendCtrlAltDel presses a gpio-keys power button wired to a PL061 +# GPIO controller; these enable the guest-side driver chain so systemd-logind +# can act on KEY_POWER. The PL061 driver is ARM AMBA only, so olddefconfig +# drops these on x86_64. +CONFIG_GPIOLIB=y +CONFIG_GPIO_PL061=y +CONFIG_KEYBOARD_GPIO=y diff --git a/src/firecracker/src/api_server/request/actions.rs b/src/firecracker/src/api_server/request/actions.rs index a3b3f3f3a88..1ffe0b05d5c 100644 --- a/src/firecracker/src/api_server/request/actions.rs +++ b/src/firecracker/src/api_server/request/actions.rs @@ -7,8 +7,6 @@ use vmm::rpc_interface::VmmAction; use super::super::parsed_request::{ParsedRequest, RequestError}; use super::Body; -#[cfg(target_arch = "aarch64")] -use super::StatusCode; // The names of the members from this enum must precisely correspond (as a string) to the possible // values of "action_type" from the json request body. This is useful to get a strongly typed @@ -37,17 +35,7 @@ pub(crate) fn parse_put_actions(body: &Body) -> Result Ok(ParsedRequest::new_sync(VmmAction::FlushMetrics)), ActionType::InstanceStart => Ok(ParsedRequest::new_sync(VmmAction::StartMicroVm)), - ActionType::SendCtrlAltDel => { - // SendCtrlAltDel not supported on aarch64. - #[cfg(target_arch = "aarch64")] - return Err(RequestError::Generic( - StatusCode::BadRequest, - "SendCtrlAltDel does not supported on aarch64.".to_string(), - )); - - #[cfg(target_arch = "x86_64")] - Ok(ParsedRequest::new_sync(VmmAction::SendCtrlAltDel)) - } + ActionType::SendCtrlAltDel => Ok(ParsedRequest::new_sync(VmmAction::SendCtrlAltDel)), } } @@ -69,7 +57,6 @@ mod tests { assert_eq!(result.unwrap(), req); } - #[cfg(target_arch = "x86_64")] { let json = r#"{ "action_type": "SendCtrlAltDel" @@ -80,16 +67,6 @@ mod tests { assert_eq!(result.unwrap(), req); } - #[cfg(target_arch = "aarch64")] - { - let json = r#"{ - "action_type": "SendCtrlAltDel" - }"#; - - let result = parse_put_actions(&Body::new(json)); - result.unwrap_err(); - } - { let json = r#"{ "action_type": "FlushMetrics" diff --git a/src/vmm/src/arch/aarch64/fdt.rs b/src/vmm/src/arch/aarch64/fdt.rs index e34d80182bf..7bf1b56ad84 100644 --- a/src/vmm/src/arch/aarch64/fdt.rs +++ b/src/vmm/src/arch/aarch64/fdt.rs @@ -32,6 +32,8 @@ const GIC_PHANDLE: u32 = 1; const CLOCK_PHANDLE: u32 = 2; // This is a value for uniquely identifying the FDT node declaring the MSI controller. const MSI_PHANDLE: u32 = 3; +// This is a value for uniquely identifying the PL061 GPIO controller node. +const GPIO_PL061_PHANDLE: u32 = 4; // You may be wondering why this big value? // This phandle is used to uniquely identify the FDT nodes containing cache information. Each cpu // can have a variable number of caches, some of these caches may be shared with other cpus. @@ -51,6 +53,9 @@ const GIC_FDT_IRQ_TYPE_PPI: u32 = 1; // From https://elixir.bootlin.com/linux/v4.9.62/source/include/dt-bindings/interrupt-controller/irq.h#L17 const IRQ_TYPE_EDGE_RISING: u32 = 1; const IRQ_TYPE_LEVEL_HI: u32 = 4; +const KEY_POWER: u32 = 116; +const POWER_BUTTON_GPIO_PIN: u32 = 0; +const GPIO_ACTIVE_HIGH: u32 = 0; /// Errors thrown while configuring the Flattened Device Tree for aarch64. #[derive(Debug, thiserror::Error, displaydoc::Display)] @@ -457,6 +462,55 @@ fn create_rtc_node(fdt: &mut FdtWriter, dev_info: &MMIODeviceInfo) -> Result<(), Ok(()) } +fn create_gpio_pl061_node(fdt: &mut FdtWriter, dev_info: &MMIODeviceInfo) -> Result<(), FdtError> { + let compatible = b"arm,pl061\0arm,primecell\0"; + + let gpio = fdt.begin_node(&format!("pl061@{:x}", dev_info.addr))?; + fdt.property("compatible", compatible)?; + fdt.property_array_u64("reg", &[dev_info.addr, dev_info.len])?; + fdt.property_u32("clocks", CLOCK_PHANDLE)?; + fdt.property_string("clock-names", "apb_pclk")?; + fdt.property_u32("#gpio-cells", 2)?; + fdt.property_null("gpio-controller")?; + fdt.property_u32("phandle", GPIO_PL061_PHANDLE)?; + // The PL061 interrupt is injected through a plain KVM irqfd (no resample fd), which + // models an edge-triggered line: each `trigger()` delivers a single pulse and there is + // no path to de-assert a level. Declaring it level-high would make the GIC re-fire the + // interrupt forever after the guest EOIs it (interrupt storm). Match the edge-triggered + // model used by every other Firecracker SPI (serial, vmgenid, vmclock). + fdt.property_array_u32( + "interrupts", + &[ + GIC_FDT_IRQ_TYPE_SPI, + dev_info.gsi.unwrap(), + IRQ_TYPE_EDGE_RISING, + ], + )?; + fdt.end_node(gpio)?; + + Ok(()) +} + +fn create_gpio_keys_node(fdt: &mut FdtWriter) -> Result<(), FdtError> { + let gpio_keys = fdt.begin_node("gpio-keys")?; + fdt.property_string("compatible", "gpio-keys")?; + + // A single KEY_POWER button bound to line 0 of the PL061 above (via its phandle), so the + // guest's gpio-keys driver reports a power-key event when the host asserts that line. + let power_button = fdt.begin_node("poweroff")?; + fdt.property_string("label", "GPIO Key Poweroff")?; + fdt.property_u32("linux,code", KEY_POWER)?; + fdt.property_array_u32( + "gpios", + &[GPIO_PL061_PHANDLE, POWER_BUTTON_GPIO_PIN, GPIO_ACTIVE_HIGH], + )?; + fdt.end_node(power_button)?; + + fdt.end_node(gpio_keys)?; + + Ok(()) +} + fn create_devices_node( fdt: &mut FdtWriter, device_manager: &DeviceManager, @@ -469,6 +523,11 @@ fn create_devices_node( create_serial_node(fdt, serial_info)?; } + if let Some(gpio_pl061_info) = device_manager.mmio_devices.gpio_pl061_device_info() { + create_gpio_pl061_node(fdt, gpio_pl061_info)?; + create_gpio_keys_node(fdt)?; + } + let mut virtio_mmio = device_manager.mmio_devices.virtio_device_info(); // Sort out virtio devices by address from low to high and insert them into fdt table. @@ -623,7 +682,9 @@ mod tests { "psci", "rtc@40001000", "uart@40002000", - "virtio_mmio@40003000", + "pl061@40003000", + "gpio-keys", + "virtio_mmio@40004000", "vmgenid", "ptp@2149572608", ]; diff --git a/src/vmm/src/arch/aarch64/layout.rs b/src/vmm/src/arch/aarch64/layout.rs index b50f8fb40e6..609618a4a4e 100644 --- a/src/vmm/src/arch/aarch64/layout.rs +++ b/src/vmm/src/arch/aarch64/layout.rs @@ -120,9 +120,11 @@ pub const BOOT_DEVICE_MEM_START: u64 = MMIO32_MEM_START; pub const RTC_MEM_START: u64 = BOOT_DEVICE_MEM_START + MMIO_LEN; /// Memory region start for Serial device. pub const SERIAL_MEM_START: u64 = RTC_MEM_START + MMIO_LEN; +/// Memory region start for PL061 GPIO device. +pub const GPIO_PL061_MEM_START: u64 = SERIAL_MEM_START + MMIO_LEN; /// Beginning of memory region for device MMIO 32-bit accesses -pub const MEM_32BIT_DEVICES_START: u64 = SERIAL_MEM_START + MMIO_LEN; +pub const MEM_32BIT_DEVICES_START: u64 = GPIO_PL061_MEM_START + MMIO_LEN; /// Size of memory region for device MMIO 32-bit accesses pub const MEM_32BIT_DEVICES_SIZE: u64 = PCI_MMCONFIG_START - MEM_32BIT_DEVICES_START; diff --git a/src/vmm/src/arch/mod.rs b/src/vmm/src/arch/mod.rs index a35c61d3117..46c2c4b986c 100644 --- a/src/vmm/src/arch/mod.rs +++ b/src/vmm/src/arch/mod.rs @@ -55,6 +55,9 @@ pub enum DeviceType { Rtc, /// Device Type: BootTimer. BootTimer, + /// Device Type: PL061 GPIO. + #[cfg(target_arch = "aarch64")] + GpioPl061, } /// Default page size for the guest OS. diff --git a/src/vmm/src/device_manager/mmio.rs b/src/vmm/src/device_manager/mmio.rs index e84784a3d5f..40d5b93f33f 100644 --- a/src/vmm/src/device_manager/mmio.rs +++ b/src/vmm/src/device_manager/mmio.rs @@ -20,9 +20,9 @@ use vm_allocator::AllocPolicy; use crate::EventManager; use crate::arch::BOOT_DEVICE_MEM_START; #[cfg(target_arch = "aarch64")] -use crate::arch::{RTC_MEM_START, SERIAL_MEM_START}; +use crate::arch::{GPIO_PL061_MEM_START, RTC_MEM_START, SERIAL_MEM_START}; #[cfg(target_arch = "aarch64")] -use crate::devices::legacy::{RTCDevice, SerialDevice}; +use crate::devices::legacy::{PL061Device, RTCDevice, SerialDevice}; use crate::devices::pseudo::BootTimer; use crate::devices::virtio::device::{VirtioDevice, VirtioDeviceId, VirtioDeviceType}; use crate::devices::virtio::transport::mmio::MmioTransport; @@ -132,6 +132,9 @@ pub struct MMIODeviceManager { #[cfg(target_arch = "aarch64")] /// Serial device on Aarch64 platforms pub(crate) serial: Option>, + #[cfg(target_arch = "aarch64")] + /// PL061 GPIO controller on Aarch64 platforms + pub(crate) gpio_pl061: Option>, #[cfg(target_arch = "x86_64")] // We create the AML byte code for every VirtIO device in the order we build // it, so that we ensure the root block device is appears first in the DSDT. @@ -367,6 +370,47 @@ impl MMIODeviceManager { Ok(()) } + #[cfg(target_arch = "aarch64")] + /// Create and register a MMIO PL061 GPIO device at the specified MMIO configuration if + /// given as parameter, otherwise allocate a new MMIO resources for it. + pub fn register_mmio_gpio_pl061( + &mut self, + vm: &KvmVm, + gpio_pl061: Arc>, + device_info_opt: Option, + ) -> Result<(), MmioError> { + let device_info = if let Some(device_info) = device_info_opt { + device_info + } else { + let gsi = vm.resource_allocator().allocate_gsi_legacy(1)?; + MMIODeviceInfo { + addr: GPIO_PL061_MEM_START, + len: MMIO_LEN, + gsi: Some(gsi[0]), + } + }; + + vm.register_irq( + &gpio_pl061.lock().expect("Poisoned lock").interrupt_evt, + device_info.gsi.unwrap(), + ) + .map_err(MmioError::RegisterIrqFd)?; + + let device = MMIODevice { + resources: device_info, + inner: gpio_pl061, + sub_id: None, + }; + + vm.common.mmio_bus.insert( + device.inner.clone(), + device.resources.addr, + device.resources.len, + )?; + self.gpio_pl061 = Some(device); + Ok(()) + } + /// Register a boot timer device. pub fn register_mmio_boot_timer( &mut self, @@ -443,6 +487,11 @@ impl MMIODeviceManager { pub fn serial_device_info(&self) -> Option<&MMIODeviceInfo> { self.serial.as_ref().map(|device| &device.resources) } + + #[cfg(target_arch = "aarch64")] + pub fn gpio_pl061_device_info(&self) -> Option<&MMIODeviceInfo> { + self.gpio_pl061.as_ref().map(|device| &device.resources) + } } #[cfg(test)] diff --git a/src/vmm/src/device_manager/mod.rs b/src/vmm/src/device_manager/mod.rs index a1a3f99a0aa..48c300b05a2 100644 --- a/src/vmm/src/device_manager/mod.rs +++ b/src/vmm/src/device_manager/mod.rs @@ -27,10 +27,10 @@ use crate::EventManager; use crate::device_manager::acpi::ACPIDeviceError; #[cfg(target_arch = "x86_64")] use crate::devices::legacy::I8042Device; -#[cfg(target_arch = "aarch64")] -use crate::devices::legacy::RTCDevice; use crate::devices::legacy::SerialDevice; use crate::devices::legacy::serial::{SerialOut, SerialOutInner}; +#[cfg(target_arch = "aarch64")] +use crate::devices::legacy::{PL061Device, PL061Error, RTCDevice}; use crate::devices::pseudo::BootTimer; use crate::devices::virtio::ActivateError; use crate::devices::virtio::balloon::BalloonError; @@ -99,6 +99,9 @@ pub enum AttachDeviceError { #[cfg(target_arch = "aarch64")] /// Error creating serial device: {0} CreateSerial(#[from] std::io::Error), + #[cfg(target_arch = "aarch64")] + /// Error creating PL061 GPIO device: {0} + CreateGpioPl061(#[from] PL061Error), /// Error attach PCI device: {0} PciTransport(#[from] PciManagerError), /// Operation not supported on this VM type @@ -362,6 +365,9 @@ impl DeviceManager { let rtc = Arc::new(Mutex::new(RTCDevice::new())); self.mmio_devices.register_mmio_rtc(vm, rtc, None)?; + let gpio_pl061 = Arc::new(Mutex::new(PL061Device::new()?)); + self.mmio_devices + .register_mmio_gpio_pl061(vm, gpio_pl061, None)?; Ok(()) } @@ -662,6 +668,9 @@ pub enum DevicePersistError { #[cfg(target_arch = "aarch64")] /// Legacy: {0} Legacy(#[from] std::io::Error), + #[cfg(target_arch = "aarch64")] + /// PL061 GPIO: {0} + GpioPl061(#[from] PL061Error), /// Net: {0} Net(#[from] NetPersistError), /// Vsock: {0} @@ -835,6 +844,7 @@ pub(crate) mod tests { let mut vmm = default_vmm(); assert!(vmm.device_manager.mmio_devices.rtc.is_none()); assert!(vmm.device_manager.mmio_devices.serial.is_none()); + assert!(vmm.device_manager.mmio_devices.gpio_pl061.is_none()); let mut cmdline = Cmdline::new(4096).unwrap(); let mut event_manager = EventManager::new().unwrap(); @@ -849,6 +859,7 @@ pub(crate) mod tests { .unwrap(); assert!(vmm.device_manager.mmio_devices.rtc.is_some()); assert!(vmm.device_manager.mmio_devices.serial.is_none()); + assert!(vmm.device_manager.mmio_devices.gpio_pl061.is_some()); let mut vmm = default_vmm(); cmdline.insert("console", "/dev/blah").unwrap(); @@ -863,6 +874,7 @@ pub(crate) mod tests { .unwrap(); assert!(vmm.device_manager.mmio_devices.rtc.is_some()); assert!(vmm.device_manager.mmio_devices.serial.is_some()); + assert!(vmm.device_manager.mmio_devices.gpio_pl061.is_some()); assert!( cmdline diff --git a/src/vmm/src/device_manager/persist.rs b/src/vmm/src/device_manager/persist.rs index 14ba16e5d81..772ea39d380 100644 --- a/src/vmm/src/device_manager/persist.rs +++ b/src/vmm/src/device_manager/persist.rs @@ -18,7 +18,7 @@ use crate::device_manager::acpi::ACPIDeviceError; use crate::devices::acpi::vmclock::{VmClock, VmClockState}; use crate::devices::acpi::vmgenid::{VMGenIDState, VmGenId}; #[cfg(target_arch = "aarch64")] -use crate::devices::legacy::RTCDevice; +use crate::devices::legacy::{PL061Device, PL061State, RTCDevice}; use crate::devices::virtio::balloon::Balloon; use crate::devices::virtio::balloon::persist::{BalloonConstructorArgs, BalloonState}; use crate::devices::virtio::block::device::Block; @@ -144,6 +144,9 @@ pub struct DeviceStates { pub pmem_devices: Vec>, /// Memory device state. pub memory_device: Option>, + /// PL061 GPIO controller register state (aarch64 only). + #[cfg(target_arch = "aarch64")] + pub gpio_pl061_state: Option, } pub struct MMIODevManagerConstructorArgs<'a> { @@ -226,6 +229,15 @@ impl<'a> Persist<'a> for MMIODeviceManager { device_info: device.resources, }); } + + if let Some(device) = &self.gpio_pl061 { + states.legacy_devices.push(ConnectedLegacyState { + type_: DeviceType::GpioPl061, + device_info: device.resources, + }); + states.gpio_pl061_state = + Some(device.inner.lock().expect("Poisoned lock").state()); + } } let _: Result<(), ()> = self.for_each_virtio_mmio_device(|_, devid, device| { @@ -362,6 +374,8 @@ impl<'a> Persist<'a> for MMIODeviceManager { #[cfg(target_arch = "aarch64")] { + // Captured before the loop shadows `state` with each `ConnectedLegacyState`. + let gpio_pl061_state = state.gpio_pl061_state.as_ref(); for state in &state.legacy_devices { if state.type_ == DeviceType::Serial { let serial_state: Option = @@ -379,6 +393,17 @@ impl<'a> Persist<'a> for MMIODeviceManager { let rtc = Arc::new(Mutex::new(RTCDevice::new())); dev_manager.register_mmio_rtc(vm, rtc, Some(state.device_info))?; } + if state.type_ == DeviceType::GpioPl061 { + let gpio_pl061 = Arc::new(Mutex::new(match gpio_pl061_state { + Some(saved) => PL061Device::from_state(saved)?, + None => PL061Device::new()?, + })); + dev_manager.register_mmio_gpio_pl061( + vm, + gpio_pl061, + Some(state.device_info), + )?; + } } } @@ -614,19 +639,32 @@ impl<'a> Persist<'a> for MMIODeviceManager { #[cfg(test)] mod tests { + #[cfg(target_arch = "aarch64")] + use std::sync::Arc; + + #[cfg(target_arch = "aarch64")] + use linux_loader::cmdline::Cmdline; use vmm_sys_util::tempfile::TempFile; use super::*; + #[cfg(target_arch = "aarch64")] + use crate::arch::KvmVm; use crate::builder::tests::*; use crate::device_manager; + #[cfg(target_arch = "aarch64")] + use crate::device_manager::tests::default_device_manager; use crate::devices::virtio::block::CacheType; use crate::resources::VmmConfig; + #[cfg(target_arch = "aarch64")] + use crate::test_utils::{arch_mem, arch_mem_raw}; use crate::vmm_config::balloon::BalloonDeviceConfig; use crate::vmm_config::entropy::EntropyDeviceConfig; use crate::vmm_config::memory_hotplug::MemoryHotplugConfig; use crate::vmm_config::net::NetworkInterfaceConfig; use crate::vmm_config::pmem::PmemConfig; use crate::vmm_config::vsock::VsockDeviceConfig; + #[cfg(target_arch = "aarch64")] + use crate::{EventManager, Kvm}; impl PartialEq for VirtioDeviceState { fn eq(&self, other: &VirtioDeviceState) -> bool { @@ -669,6 +707,98 @@ mod tests { } } + #[cfg(target_arch = "aarch64")] + #[test] + fn test_legacy_mmio_persistence() { + let mem_raw = arch_mem_raw(crate::arch::FDT_MAX_SIZE + 0x1000); + let kvm = Kvm::new(vec![]).unwrap(); + let mut vm = KvmVm::new(kvm).unwrap(); + vm.register_dram_memory_regions(mem_raw).unwrap(); + vm.setup_irqchip(1).unwrap(); + let vm = Arc::new(vm); + + let mut event_manager = EventManager::new().unwrap(); + let mut device_manager = default_device_manager(); + let mut cmdline = Cmdline::new(4096).unwrap(); + device_manager + .attach_legacy_devices_aarch64(&vm, &mut event_manager, &mut cmdline, None, None) + .unwrap(); + + // Configure some PL061 registers so we can verify they survive a snapshot round-trip. + let saved_gpio_state = { + let gpio = device_manager + .mmio_devices + .gpio_pl061 + .as_ref() + .unwrap() + .inner + .clone(); + let mut gpio = gpio.lock().unwrap(); + gpio.bus_write(0x40c, &1u32.to_le_bytes()); // interrupt event (IEV) + gpio.bus_write(0x410, &1u32.to_le_bytes()); // interrupt mask (IE) + gpio.state() + }; + + let state = device_manager.mmio_devices.save(); + assert_eq!(state.gpio_pl061_state.as_ref(), Some(&saved_gpio_state)); + assert_eq!(state.legacy_devices.len(), 2); + assert!( + state + .legacy_devices + .iter() + .any(|device| device.type_ == DeviceType::Rtc) + ); + assert!( + state + .legacy_devices + .iter() + .any(|device| device.type_ == DeviceType::GpioPl061) + ); + + let restore_mem = arch_mem(crate::arch::FDT_MAX_SIZE + 0x1000); + let restore_mem_raw = arch_mem_raw(crate::arch::FDT_MAX_SIZE + 0x1000); + let restore_kvm = Kvm::new(vec![]).unwrap(); + let mut restore_vm = KvmVm::new(restore_kvm).unwrap(); + restore_vm + .register_dram_memory_regions(restore_mem_raw) + .unwrap(); + restore_vm.setup_irqchip(1).unwrap(); + let restore_vm = Arc::new(restore_vm); + let mut restore_event_manager = EventManager::new().unwrap(); + let mut vm_resources = crate::resources::VmResources::default(); + let restored = MMIODeviceManager::restore( + MMIODevManagerConstructorArgs { + mem: &restore_mem, + vm: &restore_vm, + event_manager: &mut restore_event_manager, + vm_resources: &mut vm_resources, + instance_id: "fc-test", + serial_state: None, + }, + &state, + ) + .unwrap(); + + assert!(restored.serial.is_none()); + assert_eq!( + restored.rtc_device_info(), + device_manager.mmio_devices.rtc_device_info() + ); + assert_eq!( + restored.gpio_pl061_device_info(), + device_manager.mmio_devices.gpio_pl061_device_info() + ); + let restored_gpio_state = restored + .gpio_pl061 + .as_ref() + .unwrap() + .inner + .lock() + .unwrap() + .state(); + assert_eq!(restored_gpio_state, saved_gpio_state); + } + #[test] fn test_device_manager_persistence() { // These need to survive so the restored blocks find them. diff --git a/src/vmm/src/devices/legacy/gpio_pl061.rs b/src/vmm/src/devices/legacy/gpio_pl061.rs new file mode 100644 index 00000000000..03d9e761cb7 --- /dev/null +++ b/src/vmm/src/devices/legacy/gpio_pl061.rs @@ -0,0 +1,520 @@ +// Copyright 2026 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use std::sync::{Arc, Barrier}; + +use serde::{Deserialize, Serialize}; +use vm_superio::Trigger; +use vmm_sys_util::eventfd::EventFd; + +use crate::devices::legacy::EventFdTrigger; +use crate::logger::{IncMetric, SharedIncMetric, warn}; +use crate::utils::u64_to_usize; +use crate::vstate::bus::BusDevice; + +/// Size of the PL061 MMIO register window. +const PL061_REGISTER_SPACE_SIZE: u64 = 0x1000; +/// End of the data register aperture. The data registers occupy `0x000..0x3ff`; the +/// access address doubles as a per-line mask (address bits [9:2]). +const PL061_DATA_REG_END: u64 = 0x400; + +/// PL061 register offsets (see the ARM PrimeCell PL061 TRM). +const PL061_DIR: u64 = 0x400; // GPIODIR: direction, 0 = input, 1 = output +const PL061_IS: u64 = 0x404; // GPIOIS: interrupt sense, 0 = edge, 1 = level +const PL061_IBE: u64 = 0x408; // GPIOIBE: interrupt both edges +const PL061_IEV: u64 = 0x40c; // GPIOIEV: interrupt event (edge/level polarity) +const PL061_IE: u64 = 0x410; // GPIOIE: interrupt mask (enable) +const PL061_RIS: u64 = 0x414; // GPIORIS: raw interrupt status +const PL061_MIS: u64 = 0x418; // GPIOMIS: masked interrupt status +const PL061_IC: u64 = 0x41c; // GPIOIC: interrupt clear (write 1 to clear) +const PL061_AFSEL: u64 = 0x420; // GPIOAFSEL: alternate function select + +/// PrimeCell identification registers (`PeriphID0..3` then `PrimeCellID0..3`). +const PL061_ID_REG_START: u64 = 0xfd0; +const PL061_ID_REG_END: u64 = 0x1000; + +/// Number of GPIO lines modelled by a single PL061. +const GPIO_PIN_COUNT: u8 = 8; +/// Mask covering all GPIO lines. +const GPIO_PIN_MASK: u8 = 0xff; +/// GPIO line the virtual power button is wired to (must match the FDT `gpio-keys` node). +const POWER_BUTTON_PIN: u8 = 0; +/// Identification register contents: PeriphID = 0x00041061 (PL061), PrimeCellID = 0xb105f00d. +/// These let the Linux AMBA bus match and probe the `pl061` driver. +const PL061_ID: [u8; 12] = [ + 0x00, 0x00, 0x00, 0x00, 0x61, 0x10, 0x04, 0x00, 0x0d, 0xf0, 0x05, 0xb1, +]; + +#[derive(Debug, thiserror::Error, displaydoc::Display)] +pub enum PL061Error { + /// Could not create EventFd: {0} + CreateEventFd(std::io::Error), + /// Could not trigger interrupt: {0} + TriggerInterrupt(std::io::Error), +} + +/// Metrics specific to the PL061 device. +#[derive(Debug, Serialize, Default)] +pub struct PL061DeviceMetrics { + /// Errors triggered while using the PL061 device. + pub error_count: SharedIncMetric, + /// Number of superfluous read intents on this device. + pub missed_read_count: SharedIncMetric, + /// Number of superfluous write intents on this device. + pub missed_write_count: SharedIncMetric, + /// Number of interrupts injected into the guest. + pub interrupt_count: SharedIncMetric, +} + +impl PL061DeviceMetrics { + pub const fn new() -> Self { + Self { + error_count: SharedIncMetric::new(), + missed_read_count: SharedIncMetric::new(), + missed_write_count: SharedIncMetric::new(), + interrupt_count: SharedIncMetric::new(), + } + } + + fn invalid_read(&self, offset: u64, len: usize) { + self.missed_read_count.inc(); + self.error_count.inc(); + warn!( + "Guest read at invalid PL061 offset/length: offset={:#x}, len={}", + offset, len + ); + } + + fn invalid_write(&self, offset: u64, len: usize) { + self.missed_write_count.inc(); + self.error_count.inc(); + warn!( + "Guest write at invalid PL061 offset/length: offset={:#x}, len={}", + offset, len + ); + } +} + +/// Stores aggregated metrics. There is only ever one PL061 device, so it accesses this directly. +pub static METRICS: PL061DeviceMetrics = PL061DeviceMetrics::new(); + +/// Minimal PL061 GPIO controller skeleton for aarch64 guests. +/// +/// This models the core MMIO register bank, an interrupt line, and host-side +/// input injection so higher layers can later wire a GPIO-backed power button. +#[derive(Debug)] +pub struct PL061Device { + /// Interrupt line exposed to the guest. + pub interrupt_evt: EventFdTrigger, + /// Register state (also what is captured/restored for snapshots). + state: PL061State, +} + +/// Serializable register state of a [`PL061Device`], used for snapshot save/restore. +/// +/// The interrupt eventfd is intentionally not part of the state: it is recreated and +/// re-registered with KVM during restore. +#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] +pub struct PL061State { + /// Pin levels (GPIODATA). + data: u8, + /// Pin directions (GPIODIR): 0 = input, 1 = output. + direction: u8, + /// Interrupt sense (GPIOIS): 0 = edge, 1 = level. + interrupt_sense: u8, + /// Interrupt both-edges select (GPIOIBE). + interrupt_both_edges: u8, + /// Interrupt event/polarity (GPIOIEV). + interrupt_event: u8, + /// Interrupt mask/enable (GPIOIE). + interrupt_mask: u8, + /// Raw (pre-mask) interrupt status (GPIORIS). + raw_interrupt_status: u8, + /// Alternate function select (GPIOAFSEL). + alternate_function_select: u8, +} + +impl PL061Device { + pub fn new() -> Result { + Ok(Self { + interrupt_evt: EventFdTrigger::new( + EventFd::new(libc::EFD_NONBLOCK).map_err(PL061Error::CreateEventFd)?, + ), + state: PL061State::default(), + }) + } + + /// Captures the current register state for snapshotting. + pub fn state(&self) -> PL061State { + self.state.clone() + } + + /// Rebuilds a device from a previously saved register state, with a fresh interrupt eventfd. + pub fn from_state(state: &PL061State) -> Result { + Ok(Self { + state: state.clone(), + ..Self::new()? + }) + } + + /// Drives the virtual power-button line high (pressed) or low (released). + pub fn trigger_power_button(&mut self, pressed: bool) -> Result<(), PL061Error> { + self.set_input_level(POWER_BUTTON_PIN, pressed) + } + + /// Sets the level of an input `line` from the host side, raising an interrupt if the + /// resulting transition is one the guest has armed. + fn set_input_level(&mut self, line: u8, high: bool) -> Result<(), PL061Error> { + assert!(line < GPIO_PIN_COUNT); + + // A line the guest has configured as an output is driven by the guest, not the host. + let mask = line_mask(line); + if self.state.direction & mask != 0 { + return Ok(()); + } + + let old_data = self.state.data; + if high { + self.state.data |= mask; + } else { + self.state.data &= !mask; + } + + self.update_interrupts(old_data) + } + + /// Reads a register, returning `None` for offsets that are not backed by a register. + fn read_reg(&mut self, offset: u64) -> Option { + let result = match offset { + // Reads in the data aperture only return the lines selected by the address mask. + 0..PL061_DATA_REG_END => self.state.data & data_mask(offset), + PL061_DIR => self.state.direction, + PL061_IS => self.state.interrupt_sense, + PL061_IBE => self.state.interrupt_both_edges, + PL061_IEV => self.state.interrupt_event, + PL061_IE => self.state.interrupt_mask, + PL061_RIS => self.state.raw_interrupt_status, + PL061_MIS => self.masked_interrupt_status(), + PL061_AFSEL => self.state.alternate_function_select, + PL061_ID_REG_START..PL061_ID_REG_END => { + let index = u64_to_usize((offset - PL061_ID_REG_START) >> 2); + PL061_ID.get(index).copied()? + } + _ => return None, + }; + Some(u32::from(result)) + } + + /// Writes a register. Returns `Ok(true)` if the offset was handled, `Ok(false)` for an + /// unknown/read-only offset, and `Err` only if raising the interrupt line failed. + fn write_reg(&mut self, offset: u64, value: u8) -> Result { + match offset { + 0..PL061_DATA_REG_END => { + // Only the lines selected by the address mask, and only those configured as + // outputs, are affected by a guest write. + let mask = data_mask(offset) & self.state.direction; + self.state.data = (self.state.data & !mask) | (value & mask); + } + PL061_DIR => self.state.direction = value, + PL061_IS => self.state.interrupt_sense = value, + PL061_IBE => self.state.interrupt_both_edges = value, + PL061_IEV => self.state.interrupt_event = value, + PL061_IE => { + let had_no_pending_interrupt = self.masked_interrupt_status() == 0; + self.state.interrupt_mask = value; + self.refresh_masked_interrupt(had_no_pending_interrupt)?; + return Ok(true); + } + PL061_IC => { + let had_no_pending_interrupt = self.masked_interrupt_status() == 0; + self.state.raw_interrupt_status &= !value; + self.refresh_masked_interrupt(had_no_pending_interrupt)?; + return Ok(true); + } + PL061_AFSEL => self.state.alternate_function_select = value, + _ => return Ok(false), + } + + Ok(true) + } + + /// Handles a guest MMIO read. Only 1-byte and (4-aligned) 4-byte accesses are accepted, as + /// used by the Linux `gpio-pl061` driver; anything else is counted as a missed read. + pub fn bus_read(&mut self, offset: u64, data: &mut [u8]) { + if !(data.len() == 1 || data.len() == 4) + || offset >= PL061_REGISTER_SPACE_SIZE + || (data.len() == 4 && !offset.is_multiple_of(4)) + { + METRICS.invalid_read(offset, data.len()); + return; + } + + if let Some(value) = self.read_reg(offset) { + match data.len() { + 1 => data[0] = value.to_le_bytes()[0], + 4 => data.copy_from_slice(&value.to_le_bytes()), + _ => unreachable!(), + } + } else { + METRICS.invalid_read(offset, data.len()); + } + } + + /// Handles a guest MMIO write, with the same access-width rules as [`Self::bus_read`]. + pub fn bus_write(&mut self, offset: u64, data: &[u8]) { + if !(data.len() == 1 || data.len() == 4) + || offset >= PL061_REGISTER_SPACE_SIZE + || (data.len() == 4 && !offset.is_multiple_of(4)) + { + METRICS.invalid_write(offset, data.len()); + return; + } + + // The PL061 registers are all 8-bit, so only the low byte of the access is meaningful. + match self.write_reg(offset, data[0]) { + Ok(true) => {} + Ok(false) => METRICS.invalid_write(offset, data.len()), + Err(err) => { + METRICS.error_count.inc(); + warn!("Failed to update PL061 state: {err}"); + } + } + } + + /// Interrupt status visible to the guest after masking (GPIOMIS). + fn masked_interrupt_status(&self) -> u8 { + self.state.raw_interrupt_status & self.state.interrupt_mask + } + + /// Recomputes the raw interrupt status after the input lines changed from `old_data`, then + /// refreshes the masked line. Only input lines can raise interrupts. + fn update_interrupts(&mut self, old_data: u8) -> Result<(), PL061Error> { + let had_no_pending_interrupt = self.masked_interrupt_status() == 0; + let changed = (old_data ^ self.state.data) & !self.state.direction; + + // Edge-sensitive lines latch on the transition the guest selected. + if changed != 0 { + for line in 0..GPIO_PIN_COUNT { + let mask = line_mask(line); + // Skip lines that did not change or are configured level-sensitive (handled below). + if changed & mask == 0 || self.state.interrupt_sense & mask != 0 { + continue; + } + + if self.state.interrupt_both_edges & mask != 0 { + self.state.raw_interrupt_status |= mask; + } else { + // Single-edge: latch only when the new level matches the configured edge. + let line_is_high = self.state.data & mask != 0; + let wants_high = self.state.interrupt_event & mask != 0; + if line_is_high == wants_high { + self.state.raw_interrupt_status |= mask; + } + } + } + } + + // Level-sensitive lines are asserted while the level matches GPIOIEV. + self.state.raw_interrupt_status |= + !(self.state.data ^ self.state.interrupt_event) & self.state.interrupt_sense; + + self.refresh_masked_interrupt(had_no_pending_interrupt) + } + + /// Pulses the interrupt line when a masked interrupt becomes newly pending. The line is + /// modelled as edge-triggered (one eventfd signal per 0->pending transition) to match the + /// plain KVM irqfd used to deliver it; see the FDT node, which declares it edge-rising. + fn refresh_masked_interrupt( + &mut self, + had_no_pending_interrupt: bool, + ) -> Result<(), PL061Error> { + let has_pending_interrupt = self.masked_interrupt_status() != 0; + if had_no_pending_interrupt && has_pending_interrupt { + METRICS.interrupt_count.inc(); + self.interrupt_evt + .trigger() + .map_err(PL061Error::TriggerInterrupt)?; + } + Ok(()) + } +} + +/// Single-bit mask for a GPIO `line`. +fn line_mask(line: u8) -> u8 { + 1u8 << line +} + +/// Per-line mask carried by a data-aperture access address (PL061 uses address bits [9:2] as +/// the line mask, so the masked register at `0x000..0x3ff` only touches the selected lines). +#[allow(clippy::cast_possible_truncation)] +fn data_mask(offset: u64) -> u8 { + ((offset >> 2) & u64::from(GPIO_PIN_MASK)) as u8 +} + +impl BusDevice for PL061Device { + fn read(&mut self, _base: u64, offset: u64, data: &mut [u8]) { + self.bus_read(offset, data); + } + + fn write(&mut self, _base: u64, offset: u64, data: &[u8]) -> Option> { + self.bus_write(offset, data); + None + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::logger::IncMetric; + + /// Address of a masked access to the lines in `line_mask`, within the data register aperture + /// (`0x000..0x3ff`). The PL061 carries the per-line mask in address bits [9:2], so the access + /// to a given set of lines is made at `line_mask << 2`. + fn data_aperture(line_mask: u8) -> u64 { + u64::from(line_mask) << 2 + } + + fn read_u32(device: &mut PL061Device, offset: u64) -> u32 { + let mut data = [0u8; 4]; + device.bus_read(offset, &mut data); + u32::from_le_bytes(data) + } + + fn write_u32(device: &mut PL061Device, offset: u64, value: u32) { + device.bus_write(offset, &value.to_le_bytes()); + } + + fn read_u8(device: &mut PL061Device, offset: u64) -> u8 { + let mut data = [0u8; 1]; + device.bus_read(offset, &mut data); + data[0] + } + + fn write_u8(device: &mut PL061Device, offset: u64, value: u8) { + device.bus_write(offset, &[value]); + } + + #[test] + fn test_pl061_data_and_direction_registers() { + let mut device = PL061Device::new().unwrap(); + + // Lines 0 and 1 are outputs; the rest are inputs. + write_u32(&mut device, PL061_DIR, 0b0000_0011); + // A masked write to line 0 only affects line 0 (and only because it is an output). + write_u32(&mut device, data_aperture(0b0000_0001), 0b1111_1111); + + assert_eq!(read_u32(&mut device, PL061_DIR), 0b0000_0011); + assert_eq!(read_u32(&mut device, data_aperture(0b0000_0001)), 0b0000_0001); + + // Drive input line 2 high from the host and read it back through its line mask. + device.set_input_level(2, true).unwrap(); + assert_eq!(read_u32(&mut device, data_aperture(0b0000_0100)), 0b0000_0100); + } + + #[test] + fn test_pl061_edge_interrupts() { + let mut device = PL061Device::new().unwrap(); + + write_u32(&mut device, PL061_IEV, 0b0000_0001); + write_u32(&mut device, PL061_IE, 0b0000_0001); + + device.trigger_power_button(true).unwrap(); + + assert_eq!(read_u32(&mut device, PL061_RIS), 0b0000_0001); + assert_eq!(read_u32(&mut device, PL061_MIS), 0b0000_0001); + assert_eq!(device.interrupt_evt.read().unwrap(), 1); + + write_u32(&mut device, PL061_IC, 0b0000_0001); + assert_eq!(read_u32(&mut device, PL061_RIS), 0); + assert_eq!(read_u32(&mut device, PL061_MIS), 0); + } + + #[test] + fn test_pl061_both_edges_interrupt() { + // This mirrors how the Linux gpio-keys driver wires the power button: it requests the + // line's IRQ for both edges, so press (rising) and release (falling) must each interrupt. + let mut device = PL061Device::new().unwrap(); + + write_u32(&mut device, PL061_IBE, 0b0000_0001); // both-edge detection on line 0 + write_u32(&mut device, PL061_IE, 0b0000_0001); // unmask line 0 + + // Rising edge (button press). + device.trigger_power_button(true).unwrap(); + assert_eq!(read_u32(&mut device, PL061_RIS), 0b0000_0001); + assert_eq!(read_u32(&mut device, PL061_MIS), 0b0000_0001); + assert_eq!(device.interrupt_evt.read().unwrap(), 1); + write_u32(&mut device, PL061_IC, 0b0000_0001); // ack + assert_eq!(read_u32(&mut device, PL061_RIS), 0); + + // Falling edge (button release) must interrupt as well. + device.trigger_power_button(false).unwrap(); + assert_eq!(read_u32(&mut device, PL061_RIS), 0b0000_0001); + assert_eq!(read_u32(&mut device, PL061_MIS), 0b0000_0001); + assert_eq!(device.interrupt_evt.read().unwrap(), 1); + } + + #[test] + fn test_pl061_byte_accesses_match_linux_driver_usage() { + let mut device = PL061Device::new().unwrap(); + + write_u8(&mut device, PL061_IEV, 0b0000_0001); + write_u8(&mut device, PL061_IE, 0b0000_0001); + + device.trigger_power_button(true).unwrap(); + + assert_eq!(read_u8(&mut device, data_aperture(0b0000_0001)), 0b0000_0001); + assert_eq!(read_u8(&mut device, PL061_RIS), 0b0000_0001); + assert_eq!(read_u8(&mut device, PL061_MIS), 0b0000_0001); + assert_eq!(device.interrupt_evt.read().unwrap(), 1); + } + + #[test] + fn test_pl061_id_registers() { + let mut device = PL061Device::new().unwrap(); + + for (index, expected) in PL061_ID.iter().enumerate() { + let offset = PL061_ID_REG_START + (index as u64) * 4; + assert_eq!(read_u32(&mut device, offset), u32::from(*expected)); + } + } + + #[test] + fn test_pl061_state_serialization_round_trip() { + let mut device = PL061Device::new().unwrap(); + + // Configure a representative register set, including a pending interrupt. + write_u32(&mut device, PL061_DIR, 0b0000_0010); + write_u32(&mut device, PL061_IEV, 0b0000_0001); + write_u32(&mut device, PL061_IE, 0b0000_0001); + device.set_input_level(0, true).unwrap(); + let state = device.state(); + + // Round-trip through the same serializer Firecracker uses for snapshots. + let bytes = bitcode::serialize(&state).unwrap(); + let decoded: PL061State = bitcode::deserialize(&bytes).unwrap(); + assert_eq!(decoded, state); + + // A device rebuilt from the restored state must expose identical registers. + let mut restored = PL061Device::from_state(&decoded).unwrap(); + assert_eq!(restored.state(), state); + assert_eq!(read_u32(&mut restored, PL061_DIR), 0b0000_0010); + assert_eq!(read_u32(&mut restored, PL061_IE), 0b0000_0001); + assert_eq!(read_u32(&mut restored, PL061_RIS), 0b0000_0001); + } + + #[test] + fn test_pl061_invalid_access_metrics() { + let mut device = PL061Device::new().unwrap(); + + let errors_before = METRICS.error_count.count(); + let missed_reads_before = METRICS.missed_read_count.count(); + let missed_writes_before = METRICS.missed_write_count.count(); + + device.bus_read(PL061_DIR, &mut [0u8; 2]); + device.bus_write(PL061_REGISTER_SPACE_SIZE, &[0u8; 4]); + + assert_eq!(METRICS.error_count.count() - errors_before, 2); + assert_eq!(METRICS.missed_read_count.count() - missed_reads_before, 1); + assert_eq!(METRICS.missed_write_count.count() - missed_writes_before, 1); + } +} diff --git a/src/vmm/src/devices/legacy/mod.rs b/src/vmm/src/devices/legacy/mod.rs index 9804625cf09..76239d2f9ce 100644 --- a/src/vmm/src/devices/legacy/mod.rs +++ b/src/vmm/src/devices/legacy/mod.rs @@ -6,6 +6,8 @@ // found in the THIRD-PARTY file. //! Implements legacy devices (UART, RTC etc). +#[cfg(target_arch = "aarch64")] +pub mod gpio_pl061; mod i8042; #[cfg(target_arch = "aarch64")] pub mod rtc_pl031; @@ -19,6 +21,8 @@ use serde::ser::SerializeMap; use vm_superio::Trigger; use vmm_sys_util::eventfd::EventFd; +#[cfg(target_arch = "aarch64")] +pub use self::gpio_pl061::{PL061Device, PL061Error, PL061State}; pub use self::i8042::{I8042Device, I8042Error as I8042DeviceError}; #[cfg(target_arch = "aarch64")] pub use self::rtc_pl031::RTCDevice; @@ -67,6 +71,8 @@ pub fn flush_metrics(serializer: S) -> Result { let mut seq = serializer.serialize_map(Some(1))?; seq.serialize_entry("i8042", &i8042::METRICS)?; #[cfg(target_arch = "aarch64")] + seq.serialize_entry("gpio_pl061", &gpio_pl061::METRICS)?; + #[cfg(target_arch = "aarch64")] seq.serialize_entry("rtc", &rtc_pl031::METRICS)?; seq.serialize_entry("uart", &serial::METRICS)?; seq.end() diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index e914814e7ac..b0d8d8b70ad 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -169,6 +169,9 @@ pub use crate::vstate::vm::{StartVcpusError, Vm}; /// Shorthand type for the EventManager flavour used by Firecracker. pub type EventManager = BaseEventManager>>; +#[cfg(target_arch = "aarch64")] +const POWER_BUTTON_PULSE_DURATION: Duration = Duration::from_millis(50); + // Since the exit code names e.g. `SIGBUS` are most appropriate yet trigger a test error with the // clippy lint `upper_case_acronyms` we have disabled this lint for this enum. /// Vmm exit-code type. @@ -227,6 +230,9 @@ pub enum VmmError { DirtyBitmap(kvm_ioctls::Error), /// I8042 error: {0} I8042Error(devices::legacy::I8042DeviceError), + #[cfg(target_arch = "aarch64")] + /// PL061 GPIO error: {0} + PL061Error(devices::legacy::PL061Error), #[cfg(target_arch = "x86_64")] /// Cannot add devices to the legacy I/O Bus. {0} LegacyIOBus(device_manager::legacy::LegacyDeviceError), @@ -481,18 +487,52 @@ impl Vmm { Ok(()) } - /// Injects CTRL+ALT+DEL keystroke combo in the i8042 device. - #[cfg(target_arch = "x86_64")] + /// Injects the external graceful-shutdown event into the guest. pub fn send_ctrl_alt_del(&mut self) -> Result<(), VmmError> { - self.device_manager - .legacy_devices - .as_ref() - .ok_or(VmmError::NotSupported)? - .i8042 - .lock() - .expect("i8042 lock was poisoned") - .trigger_ctrl_alt_del() - .map_err(VmmError::I8042Error) + #[cfg(target_arch = "x86_64")] + { + self.device_manager + .legacy_devices + .as_ref() + .ok_or(VmmError::NotSupported)? + .i8042 + .lock() + .expect("i8042 lock was poisoned") + .trigger_ctrl_alt_del() + .map_err(VmmError::I8042Error) + } + + #[cfg(target_arch = "aarch64")] + { + let gpio = self + .device_manager + .mmio_devices + .gpio_pl061 + .as_ref() + .ok_or(device_manager::FindDeviceError::DeviceNotFound)? + .inner + .clone(); + + // Drive a press/release pulse on the virtual power button. The lock is dropped + // during the delay so the guest can take the press interrupt and read the line as + // asserted before it is released. This runs synchronously (rather than from a + // detached thread) so the two edges stay atomic with respect to other API actions: + // a concurrent snapshot can therefore never capture the button stuck pressed, which + // would otherwise make a later SendCtrlAltDel a no-op on the restored microVM. + gpio.lock() + .expect("PL061 lock was poisoned") + .trigger_power_button(true) + .map_err(VmmError::PL061Error)?; + + std::thread::sleep(POWER_BUTTON_PULSE_DURATION); + + gpio.lock() + .expect("PL061 lock was poisoned") + .trigger_power_button(false) + .map_err(VmmError::PL061Error)?; + + Ok(()) + } } /// Saves the state of a paused Microvm. diff --git a/src/vmm/src/persist.rs b/src/vmm/src/persist.rs index feb53c0e19e..9112eef66cb 100644 --- a/src/vmm/src/persist.rs +++ b/src/vmm/src/persist.rs @@ -160,7 +160,7 @@ pub enum CreateSnapshotError { } /// Snapshot version -pub const SNAPSHOT_VERSION: Version = Version::new(10, 0, 0); +pub const SNAPSHOT_VERSION: Version = Version::new(11, 0, 0); /// Creates a Microvm snapshot. pub fn create_snapshot( diff --git a/src/vmm/src/rpc_interface.rs b/src/vmm/src/rpc_interface.rs index 7693b305f65..1024f005b48 100644 --- a/src/vmm/src/rpc_interface.rs +++ b/src/vmm/src/rpc_interface.rs @@ -129,9 +129,10 @@ pub enum VmmAction { UpdateMemoryHotplugSize(MemoryHotplugSizeUpdate), /// Launch the microVM. This action can only be called before the microVM has booted. StartMicroVm, - /// Send CTRL+ALT+DEL to the microVM, using the i8042 keyboard function. If an AT-keyboard - /// driver is listening on the guest end, this can be used to shut down the microVM gracefully. - #[cfg(target_arch = "x86_64")] + /// Send an external graceful shutdown input to the microVM. + /// + /// On x86_64 this injects CTRL+ALT+DEL through the i8042 keyboard device. On aarch64 this + /// injects a virtual power-button press through the PL061 GPIO device. SendCtrlAltDel, /// Update the balloon size, after microVM start. UpdateBalloon(BalloonUpdateConfig), @@ -515,9 +516,8 @@ impl<'a> PrebootApiController<'a> { | StartFreePageHinting(_) | GetFreePageHintingStatus | StopFreePageHinting - | HotUnplugDevice(_) => Err(VmmActionError::OperationNotSupportedPreBoot), - #[cfg(target_arch = "x86_64")] - SendCtrlAltDel => Err(VmmActionError::OperationNotSupportedPreBoot), + | HotUnplugDevice(_) + | SendCtrlAltDel => Err(VmmActionError::OperationNotSupportedPreBoot), } } @@ -797,7 +797,6 @@ impl RuntimeApiController { value, ), Resume => self.resume(), - #[cfg(target_arch = "x86_64")] SendCtrlAltDel => self.send_ctrl_alt_del(), UpdateBalloon(balloon_update) => self .vmm @@ -906,8 +905,7 @@ impl RuntimeApiController { .map_err(VmmActionError::InternalVmm) } - /// Injects CTRL+ALT+DEL keystroke combo to the inner Vmm (if present). - #[cfg(target_arch = "x86_64")] + /// Injects the external graceful-shutdown event to the inner Vmm (if present). fn send_ctrl_alt_del(&mut self) -> Result { self.vmm .lock() @@ -1242,7 +1240,6 @@ mod tests { mem_file_path: PathBuf::new(), }, ))); - #[cfg(target_arch = "x86_64")] check_unsupported(preboot_request(VmmAction::SendCtrlAltDel)); check_unsupported(preboot_request(VmmAction::UpdateMemoryHotplugSize( MemoryHotplugSizeUpdate { @@ -1258,6 +1255,57 @@ mod tests { runtime.handle_request(request, &mut event_manager) } + #[cfg(target_arch = "aarch64")] + #[test] + fn test_runtime_send_ctrl_alt_del() { + use crate::builder::tests::default_kernel_cmdline; + + let mut vmm = default_vmm(); + let mut cmdline = default_kernel_cmdline(); + let mut event_manager = EventManager::new().unwrap(); + vmm.device_manager + .attach_legacy_devices_aarch64( + vmm.vm.as_kvm().unwrap(), + &mut event_manager, + &mut cmdline, + None, + None, + ) + .unwrap(); + + let gpio = vmm + .device_manager + .mmio_devices + .gpio_pl061 + .as_ref() + .unwrap() + .inner + .clone(); + { + let mut gpio = gpio.lock().unwrap(); + gpio.bus_write(0x40c, &1u32.to_le_bytes()); + gpio.bus_write(0x410, &1u32.to_le_bytes()); + } + + let vmm = Arc::new(Mutex::new(vmm)); + let mut runtime = RuntimeApiController::new(vmm); + + assert_eq!( + runtime + .handle_request(VmmAction::SendCtrlAltDel, &mut event_manager) + .unwrap(), + VmmData::Empty + ); + + // The action drives a synchronous press/release pulse, so by the time it returns the + // button line is back low and exactly one interrupt (the rising/press edge) has fired. + let mut data = [0u8; 4]; + let mut gpio = gpio.lock().unwrap(); + gpio.bus_read(0x004, &mut data); + assert_eq!(u32::from_le_bytes(data), 0); + assert_eq!(gpio.interrupt_evt.read().unwrap(), 1); + } + #[test] fn test_runtime_get_vm_config() { assert_eq!( diff --git a/tests/integration_tests/functional/test_api.py b/tests/integration_tests/functional/test_api.py index 5fc32105231..dda29ebdab8 100644 --- a/tests/integration_tests/functional/test_api.py +++ b/tests/integration_tests/functional/test_api.py @@ -774,7 +774,13 @@ def test_drive_patch(uvm, io_engine): @pytest.mark.skipif( - platform.machine() != "x86_64", reason="not yet implemented on aarch64" + platform.machine() != "x86_64", + reason="On x86 CTRL+ALT+DEL triggers a kernel-level reboot via the i8042 device. The " + "aarch64 SendCtrlAltDel path is implemented (PL061 GPIO power button) and the required " + "guest kernel config (CONFIG_GPIOLIB/GPIO_PL061/INPUT_KEYBOARD/KEYBOARD_GPIO) is now in " + "resources/guest_configs/ci.config, with systemd-logind already present in the CI rootfs. " + "This can be un-skipped on aarch64 once the CI guest-kernel artifacts are rebuilt with that " + "config so gpio-keys is available.", ) def test_send_ctrl_alt_del(uvm): """