diff --git a/src/vmm/src/device_manager/pci_mngr.rs b/src/vmm/src/device_manager/pci_mngr.rs index 832d12b9195..1658fac948d 100644 --- a/src/vmm/src/device_manager/pci_mngr.rs +++ b/src/vmm/src/device_manager/pci_mngr.rs @@ -93,11 +93,12 @@ impl PciDevices { debug!( "Inserting MMIO BAR region: {:#x}:{:#x}", - virtio_device_locked.bar_address, CAPABILITY_BAR_SIZE + virtio_device_locked.config_bar_addr(), + CAPABILITY_BAR_SIZE ); vm.common.mmio_bus.insert( virtio_device.clone(), - virtio_device_locked.bar_address, + virtio_device_locked.config_bar_addr(), CAPABILITY_BAR_SIZE, )?; diff --git a/src/vmm/src/devices/virtio/transport/pci/device.rs b/src/vmm/src/devices/virtio/transport/pci/device.rs index 8a8d500678d..3fee33b9f54 100644 --- a/src/vmm/src/devices/virtio/transport/pci/device.rs +++ b/src/vmm/src/devices/virtio/transport/pci/device.rs @@ -21,6 +21,7 @@ use vm_allocator::{AddressAllocator, AllocPolicy, RangeInclusive}; use vm_memory::{Address, ByteValued, GuestAddress, Le32}; use vmm_sys_util::errno; use vmm_sys_util::eventfd::EventFd; +use zerocopy::IntoBytes; use crate::Vm; use crate::devices::virtio::device::{VirtioDevice, VirtioDeviceType}; @@ -32,7 +33,9 @@ use crate::devices::virtio::transport::pci::common_config::{ use crate::devices::virtio::transport::pci::device_status::*; use crate::devices::virtio::transport::{VirtioInterrupt, VirtioInterruptType}; use crate::logger::{debug, error, warn}; -use crate::pci::configuration::{PciCapability, PciConfiguration, PciConfigurationState}; +use crate::pci::configuration::{ + BAR0_REG_IDX, Bars, NUM_BAR_REGS, PciCapability, PciConfiguration, PciConfigurationState, +}; use crate::pci::msix::{MsixCap, MsixConfig, MsixConfigState}; use crate::pci::{ BarReprogrammingParams, DeviceRelocationError, PciCapabilityId, PciClassCode, PciDevice, @@ -213,8 +216,6 @@ const MSIX_PBA_BAR_OFFSET: u32 = 0x48000; const MSIX_PBA_SIZE: u32 = 0x800; /// The BAR size must be a power of 2. pub const CAPABILITY_BAR_SIZE: u64 = 0x80000; -const VIRTIO_COMMON_BAR_INDEX: u8 = 0; -const VIRTIO_SHM_BAR_INDEX: usize = 2; const NOTIFY_OFF_MULTIPLIER: u32 = 4; // A dword per notification address. @@ -230,7 +231,8 @@ pub struct VirtioPciDeviceState { pub pci_configuration_state: PciConfigurationState, pub pci_dev_state: VirtioPciCommonConfigState, pub msix_state: MsixConfigState, - pub bar_address: u64, + pub bars: Bars, + pub msix_config_cap_offset: u16, } #[derive(Debug, thiserror::Error, displaydoc::Display)] @@ -274,8 +276,10 @@ pub struct VirtioPciDevice { // a device. cap_pci_cfg_info: VirtioPciCfgCapInfo, - // Allocated address for the BAR - pub bar_address: u64, + // BARs region for the device + pub bars: Bars, + pub msix_config_cap_offset: u16, + pub msix_config: Arc>, } impl Debug for VirtioPciDevice { @@ -315,7 +319,6 @@ impl VirtioPciDevice { subclass, VIRTIO_PCI_VENDOR_ID, pci_device_id, - Some(msix_config.clone()), ) } @@ -338,16 +341,14 @@ impl VirtioPciDevice { ) .unwrap() .start(); - - self.configuration.add_pci_bar( - VIRTIO_COMMON_BAR_INDEX, + // Virtio BAR is 64bit BAR without prefetchable bit since it is an MMIO region. + self.bars.set_bar_64( + VIRTIO_BAR_INDEX, virtio_pci_bar_addr, CAPABILITY_BAR_SIZE, + false, ); - - // Once the BARs are allocated, the capabilities can be added to the PCI configuration. self.add_pci_capabilities(); - self.bar_address = virtio_pci_bar_addr; } /// Constructs a new PCI transport for the given virtio device. @@ -393,7 +394,9 @@ impl VirtioPciDevice { virtio_interrupt: Some(interrupt), memory, cap_pci_cfg_info: VirtioPciCfgCapInfo::default(), - bar_address: 0, + bars: Bars::default(), + msix_config, + msix_config_cap_offset: 0, }; Ok(virtio_pci_device) @@ -409,10 +412,7 @@ impl VirtioPciDevice { let vectors = msix_config.vectors.clone(); let msix_config = Arc::new(Mutex::new(msix_config)); - let pci_config = PciConfiguration::type0_from_state( - state.pci_configuration_state, - Some(msix_config.clone()), - ); + let pci_config = PciConfiguration::type0_from_state(state.pci_configuration_state); let virtio_common_config = VirtioPciCommonConfig::new(state.pci_dev_state); let cap_pci_cfg_info = VirtioPciCfgCapInfo { offset: state.cap_pci_cfg_offset, @@ -437,7 +437,9 @@ impl VirtioPciDevice { virtio_interrupt: Some(interrupt), memory: vm.guest_memory().clone(), cap_pci_cfg_info, - bar_address: state.bar_address, + bars: state.bars, + msix_config, + msix_config_cap_offset: state.msix_config_cap_offset, }; if state.device_activated { @@ -465,7 +467,7 @@ impl VirtioPciDevice { } pub fn config_bar_addr(&self) -> u64 { - self.configuration.get_bar_addr(VIRTIO_BAR_INDEX) + self.bars.get_bar_addr_64(VIRTIO_BAR_INDEX) } fn add_pci_capabilities(&mut self) { @@ -519,7 +521,10 @@ impl VirtioPciDevice { VIRTIO_BAR_INDEX, MSIX_PBA_BAR_OFFSET, ); - self.configuration.add_capability(&msix_cap); + // The whole Configuration region is 4K, so u16 can address it all + #[allow(clippy::cast_possible_truncation)] + let offset = self.configuration.add_capability(&msix_cap) as u16; + self.msix_config_cap_offset = offset; } } @@ -621,7 +626,8 @@ impl VirtioPciDevice { .lock() .expect("Poisoned lock") .state(), - bar_address: self.bar_address, + bars: self.bars, + msix_config_cap_offset: self.msix_config_cap_offset, } } } @@ -725,15 +731,33 @@ impl PciDevice for VirtioPciDevice { offset: u8, data: &[u8], ) -> Option> { - // Handle the special case where the capability VIRTIO_PCI_CAP_PCI_CFG - // is accessed. This capability has a special meaning as it allows the - // guest to access other capabilities without mapping the PCI BAR. - let base = reg_idx as usize * 4; - if base + offset as usize >= self.cap_pci_cfg_info.offset as usize - && base + offset as usize + data.len() - <= self.cap_pci_cfg_info.offset as usize + self.cap_pci_cfg_info.cap.bytes().len() - { - let offset = base + offset as usize - self.cap_pci_cfg_info.offset as usize; + let in_bars = BAR0_REG_IDX <= reg_idx && reg_idx < BAR0_REG_IDX + u16::from(NUM_BAR_REGS); + let in_msix_config = reg_idx * 4 == self.msix_config_cap_offset; + let in_pci_cfg = { + let base = reg_idx * 4; + let cap_start = self.cap_pci_cfg_info.offset; + let cap_end = + self.cap_pci_cfg_info.offset as usize + self.cap_pci_cfg_info.cap.bytes().len(); + let write_start = base + u16::from(offset); + let write_end = (base + u16::from(offset)) as usize + data.len(); + cap_start <= write_start && write_end <= cap_end + }; + if in_bars { + // reg_idx is in [BAR0_REG_IDX, BAR0_REG_IDX+NUM_BAR_REGS), so the difference is 0..5. + #[allow(clippy::cast_possible_truncation)] + let bar_idx = (reg_idx - BAR0_REG_IDX) as u8; + self.bars.write(bar_idx, offset, data); + None + } else if in_msix_config { + self.msix_config + .lock() + .unwrap() + .write_msg_ctl_register(offset, data); + self.configuration + .write_config_register(reg_idx, offset, data); + None + } else if in_pci_cfg { + let offset = (reg_idx * 4 + u16::from(offset) - self.cap_pci_cfg_info.offset) as usize; self.write_cap_pci_cfg(offset, data) } else { self.configuration @@ -743,15 +767,29 @@ impl PciDevice for VirtioPciDevice { } fn read_config_register(&mut self, reg_idx: u16) -> u32 { - // Handle the special case where the capability VIRTIO_PCI_CAP_PCI_CFG - // is accessed. This capability has a special meaning as it allows the - // guest to access other capabilities without mapping the PCI BAR. - let base = reg_idx as usize * 4; - if base >= self.cap_pci_cfg_info.offset as usize - && base + 4 - <= self.cap_pci_cfg_info.offset as usize + self.cap_pci_cfg_info.cap.bytes().len() - { - let offset = base - self.cap_pci_cfg_info.offset as usize; + let in_bars = BAR0_REG_IDX <= reg_idx && reg_idx < BAR0_REG_IDX + u16::from(NUM_BAR_REGS); + let in_pci_cfg = { + let base = reg_idx * 4; + let cap_start = self.cap_pci_cfg_info.offset; + let cap_end = + self.cap_pci_cfg_info.offset as usize + self.cap_pci_cfg_info.cap.bytes().len(); + let write_start = base; + let write_end = (base + 4) as usize; + cap_start <= write_start && write_end <= cap_end + }; + + if in_bars { + // reg_idx is in [BAR0_REG_IDX, BAR0_REG_IDX+NUM_BAR_REGS), so the difference is 0..5. + #[allow(clippy::cast_possible_truncation)] + let bar_idx = (reg_idx - BAR0_REG_IDX) as u8; + let mut value: u32 = 0; + self.bars.read(bar_idx, 0, value.as_mut_bytes()); + value + } else if in_pci_cfg { + // Handle the special case where the capability VIRTIO_PCI_CAP_PCI_CFG + // is accessed. This capability has a special meaning as it allows the + // guest to access other capabilities without mapping the PCI BAR. + let offset = (reg_idx * 4 - self.cap_pci_cfg_info.offset) as usize; let mut data = [0u8; 4]; let len = u32::from(self.cap_pci_cfg_info.cap.cap.length) as usize; if len <= 4 { @@ -770,16 +808,10 @@ impl PciDevice for VirtioPciDevice { reg_idx: u16, data: &[u8], ) -> Option { - self.configuration.detect_bar_reprogramming(reg_idx, data) + None } fn move_bar(&mut self, old_base: u64, new_base: u64) -> Result<(), DeviceRelocationError> { - // We only update our idea of the bar in order to support free_bars() above. - // The majority of the reallocation is done inside DeviceManager. - if self.bar_address == old_base { - self.bar_address = new_base; - } - Ok(()) } diff --git a/src/vmm/src/pci/bus.rs b/src/vmm/src/pci/bus.rs index 08490715c4d..3f756a67e07 100644 --- a/src/vmm/src/pci/bus.rs +++ b/src/vmm/src/pci/bus.rs @@ -51,7 +51,6 @@ impl PciRoot { PciBridgeSubclass::HostBridge as u8, 0, 0, - None, ), } } @@ -492,12 +491,6 @@ mod tests { reloc_cnt: AtomicUsize, } - impl RelocationMock { - fn cnt(&self) -> usize { - self.reloc_cnt.load(std::sync::atomic::Ordering::SeqCst) - } - } - impl DeviceRelocation for RelocationMock { fn move_bar( &self, @@ -516,7 +509,7 @@ mod tests { impl PciDevMock { fn new() -> Self { - let mut config = PciConfiguration::new_type0( + let config = PciConfiguration::new_type0( 0x42, 0x0, 0x0, @@ -524,11 +517,7 @@ mod tests { PciMassStorageSubclass::SerialScsiController as u8, 0x13, 0x12, - None, ); - - config.add_pci_bar(0, 0x1000, 0x1000); - PciDevMock(config) } } @@ -550,10 +539,10 @@ mod tests { fn detect_bar_reprogramming( &mut self, - reg_idx: u16, - data: &[u8], + _reg_idx: u16, + _data: &[u8], ) -> Option { - self.0.detect_bar_reprogramming(reg_idx, data) + None } } @@ -936,56 +925,4 @@ mod tests { read_mmio_config(&mut mmio_config, 0, 0, 0, 15, 0, &mut buffer); assert_eq!(buffer[0], 0x42); } - - #[test] - fn test_bar_reprogramming() { - let (mut mmio_config, _, mock) = initialize_bus(); - let mut buffer = [0u8; 4]; - assert_eq!(mock.cnt(), 0); - - read_mmio_config(&mut mmio_config, 0, 1, 0, 0x4, 0, &mut buffer); - let old_addr = u32::from_le_bytes(buffer) & 0xffff_fff0; - assert_eq!(old_addr, 0x1000); - - // Writing the lower 32bits first should not trigger any reprogramming - write_mmio_config( - &mut mmio_config, - 0, - 1, - 0, - 0x4, - 0, - &u32::to_le_bytes(0x1312_0000), - ); - - read_mmio_config(&mut mmio_config, 0, 1, 0, 0x4, 0, &mut buffer); - let new_addr = u32::from_le_bytes(buffer) & 0xffff_fff0; - assert_eq!(new_addr, 0x1312_0000); - assert_eq!(mock.cnt(), 0); - - // Writing the upper 32bits first should now trigger the reprogramming logic - write_mmio_config(&mut mmio_config, 0, 1, 0, 0x5, 0, &u32::to_le_bytes(0x1110)); - read_mmio_config(&mut mmio_config, 0, 1, 0, 0x5, 0, &mut buffer); - let new_addr = u32::from_le_bytes(buffer); - assert_eq!(new_addr, 0x1110); - assert_eq!(mock.cnt(), 1); - - // BAR2 should not be used, so reading its address should return all 0s - read_mmio_config(&mut mmio_config, 0, 1, 0, 0x6, 0, &mut buffer); - assert_eq!(buffer, [0x0, 0x0, 0x0, 0x0]); - - // and reprogramming shouldn't have any effect - write_mmio_config( - &mut mmio_config, - 0, - 1, - 0, - 0x5, - 0, - &u32::to_le_bytes(0x1312_1110), - ); - - read_mmio_config(&mut mmio_config, 0, 1, 0, 0x6, 0, &mut buffer); - assert_eq!(buffer, [0x0, 0x0, 0x0, 0x0]); - } } diff --git a/src/vmm/src/pci/configuration.rs b/src/vmm/src/pci/configuration.rs index e6d56a8e26f..615b9b8e8f6 100644 --- a/src/vmm/src/pci/configuration.rs +++ b/src/vmm/src/pci/configuration.rs @@ -5,14 +5,11 @@ // // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause -use std::sync::{Arc, Mutex}; - use byteorder::{ByteOrder, LittleEndian}; use serde::{Deserialize, Serialize}; +use zerocopy::{FromBytes, IntoBytes}; -use super::BarReprogrammingParams; -use super::msix::MsixConfig; -use crate::logger::{info, warn}; +use crate::logger::warn; use crate::pci::{PciCapabilityId, PciClassCode}; // The number of 32bit registers in the config space, 4096 bytes. @@ -20,17 +17,120 @@ const NUM_CONFIGURATION_REGISTERS: usize = 1024; const STATUS_REG: usize = 1; const STATUS_REG_CAPABILITIES_USED_MASK: u32 = 0x0010_0000; -const BAR0_REG: u16 = 4; const ROM_BAR_REG: u16 = 12; -const BAR_MEM_ADDR_MASK: u32 = 0xffff_fff0; const ROM_BAR_ADDR_MASK: u32 = 0xffff_f800; const MSI_CAPABILITY_REGISTER_MASK: u32 = 0x0071_0000; const MSIX_CAPABILITY_REGISTER_MASK: u32 = 0xc000_0000; -const NUM_BAR_REGS: usize = 6; const CAPABILITY_LIST_HEAD_OFFSET: u8 = 0x34; const FIRST_CAPABILITY_OFFSET: u8 = 0x40; const CAPABILITY_MAX_OFFSET: u16 = 192; +/// First register in the BARs region +pub const BAR0_REG_IDX: u16 = 4; +/// Number of BAR registers +pub const NUM_BAR_REGS: u8 = 6; + +/// Type representing information about single BAR register +/// 31 4 3 2 1 0 +/// +---------------------------------------------------+----+---+----+ +/// | | | | | +/// | Base Address (28 bits) |Pref|Type| 0 | +/// | | | | | +/// +---------------------------------------------------+----+---+----+ +/// \___________________________________________________/ \__/ \_/ \_/ +/// Base Address Pre- Type Memory +/// (16-byte aligned minimum) fetch Space +/// able Indicator +/// +/// Bit 0 : Memory Space Indicator (hardwired to 0) +/// Bits 2:1 : Type - 00 = 32-bit address space +/// 10 = 64-bit address space +/// (01, 11 = reserved) +/// Bit 3 : Prefetchable - 0 = non-prefetchable +/// 1 = prefetchable +/// Bits 31:4: Base Address (read/write, writable bits depend on size) +#[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)] +pub struct Bar { + /// Encoded address value of the register (lower bits might carry information) + pub encoded_addr: u32, + /// Encoded size value of the register (according to PCI rules of size encoding). + pub encoded_size: u32, + /// Indicator if the register was prepared to be read as the `size` instead of `addr` + pub about_to_be_read: bool, +} + +/// Type to handle basic interactions with BARs region +#[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)] +pub struct Bars { + /// BARs + pub bars: [Bar; NUM_BAR_REGS as usize], +} +impl Bars { + /// Set 2 consecutive BAR slots as a single 64bit bar + pub fn set_bar_64(&mut self, bar_idx: u8, addr: u64, size: u64, prefetchable: bool) { + assert_ne!(size, 0); + assert!(size.is_power_of_two()); + assert!(addr & 0b1111 == 0); + addr.checked_add(size - 1).unwrap(); + assert!(bar_idx < NUM_BAR_REGS - 1); + + // Unused BARs will have address and size of 0 + assert_eq!(self.bars[bar_idx as usize].encoded_addr, 0); + assert_eq!(self.bars[bar_idx as usize].encoded_size, 0); + assert_eq!(self.bars[bar_idx as usize + 1].encoded_addr, 0); + assert_eq!(self.bars[bar_idx as usize + 1].encoded_size, 0); + + let (size_hi, size_lo) = encode_64_bits_bar_size(size); + // Add prefetchable bit and 64bit bit + self.bars[bar_idx as usize].encoded_addr = + (addr & 0xfffffff0) as u32 | (prefetchable as u32) << 3 | 0b100; + self.bars[bar_idx as usize].encoded_size = size_lo; + self.bars[(bar_idx + 1) as usize].encoded_addr = (addr >> 32) as u32; + self.bars[(bar_idx + 1) as usize].encoded_size = size_hi; + } + /// Get the address of the 64bit bar + pub fn get_bar_addr_64(&self, bar_idx: u8) -> u64 { + assert!(bar_idx < NUM_BAR_REGS - 1); + let addr_hi = self.bars[(bar_idx + 1) as usize].encoded_addr; + let addr_lo = self.bars[bar_idx as usize].encoded_addr & !0b1111; + (addr_hi as u64) << 32 | (addr_lo as u64) + } + /// Writes into a given BAR register at the given offset + pub fn write(&mut self, bar_idx: u8, offset: u8, data: &[u8]) { + assert!(bar_idx < NUM_BAR_REGS); + if let Ok(value) = u32::read_from_bytes(data) + && value == 0xffff_ffff + { + assert!(offset == 0); + self.bars[bar_idx as usize].about_to_be_read = true; + } else { + // There is no BAR relocation support as of right now. + // PCI specification does not provide a way for a device to + // tell the driver that it does not support BAR relocation, but + // linux kernel does check this at: + // https://elixir.bootlin.com/linux/v6.19.8/source/drivers/pci/setup-res.c#L107 + } + } + /// Reads from a given BAR register at the given offset + pub fn read(&mut self, bar_idx: u8, offset: u8, data: &mut [u8]) { + assert!(bar_idx < NUM_BAR_REGS); + let bar = &mut self.bars[bar_idx as usize]; + if data.len() == 4 { + if bar.about_to_be_read { + data.copy_from_slice(bar.encoded_size.as_bytes()); + bar.about_to_be_read = false; + } else { + data.copy_from_slice(bar.encoded_addr.as_bytes()); + } + } else { + assert!(offset < 4); + assert!(data.len() < 4); + assert!(!bar.about_to_be_read); + data.copy_from_slice(&bar.encoded_addr.as_bytes()[offset as usize..][0..data.len()]); + } + } +} + /// A PCI capability list. Devices can optionally specify capabilities in their configuration space. pub trait PciCapability { /// Bytes of the PCI capability @@ -49,29 +149,12 @@ fn encode_64_bits_bar_size(bar_size: u64) -> (u32, u32) { (result_hi, result_lo) } -// This decoes the BAR size from the value stored in the BAR registers. -fn decode_64_bits_bar_size(bar_size_hi: u32, bar_size_lo: u32) -> u64 { - let bar_size: u64 = ((bar_size_hi as u64) << 32) | (bar_size_lo as u64); - let size = !bar_size + 1; - assert_ne!(size, 0); - size -} - -#[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)] -struct PciBar { - addr: u32, - size: u32, - used: bool, -} - /// PCI configuration space state for (de)serialization #[derive(Debug, Clone, Serialize, Deserialize)] pub struct PciConfigurationState { registers: Vec, writable_bits: Vec, - bars: Vec, last_capability: Option<(u8, u8)>, - msix_cap_reg_idx: Option, } #[derive(Debug)] @@ -82,11 +165,8 @@ pub struct PciConfigurationState { pub struct PciConfiguration { registers: [u32; NUM_CONFIGURATION_REGISTERS], writable_bits: [u32; NUM_CONFIGURATION_REGISTERS], // writable bits for each register. - bars: [PciBar; NUM_BAR_REGS], // Contains the byte offset and size of the last capability. last_capability: Option<(u8, u8)>, - msix_cap_reg_idx: Option, - msix_config: Option>>, } impl PciConfiguration { @@ -100,7 +180,6 @@ impl PciConfiguration { subclass: u8, subsystem_vendor_id: u16, subsystem_id: u16, - msix_config: Option>>, ) -> Self { let mut registers = [0u32; NUM_CONFIGURATION_REGISTERS]; let mut writable_bits = [0u32; NUM_CONFIGURATION_REGISTERS]; @@ -118,25 +197,16 @@ impl PciConfiguration { PciConfiguration { registers, writable_bits, - bars: [PciBar::default(); NUM_BAR_REGS], last_capability: None, - msix_cap_reg_idx: None, - msix_config, } } /// Create a type 0 PCI configuration from snapshot state - pub fn type0_from_state( - state: PciConfigurationState, - msix_config: Option>>, - ) -> Self { + pub fn type0_from_state(state: PciConfigurationState) -> Self { PciConfiguration { registers: state.registers.try_into().unwrap(), writable_bits: state.writable_bits.try_into().unwrap(), - bars: state.bars.try_into().unwrap(), last_capability: state.last_capability, - msix_cap_reg_idx: state.msix_cap_reg_idx, - msix_config, } } @@ -145,9 +215,7 @@ impl PciConfiguration { PciConfigurationState { registers: self.registers.to_vec(), writable_bits: self.writable_bits.to_vec(), - bars: self.bars.to_vec(), last_capability: self.last_capability, - msix_cap_reg_idx: self.msix_cap_reg_idx, } } @@ -160,13 +228,7 @@ impl PciConfiguration { pub fn write_reg(&mut self, reg_idx: u16, value: u32) { let mut mask = self.writable_bits[reg_idx as usize]; - if (BAR0_REG as usize..BAR0_REG as usize + NUM_BAR_REGS).contains(&(reg_idx as usize)) { - // Handle very specific case where the BAR is being written with - // all 1's to retrieve the BAR size during next BAR reading. - if value == 0xffff_ffff { - mask &= self.bars[(reg_idx - BAR0_REG) as usize].size; - } - } else if reg_idx == ROM_BAR_REG { + if reg_idx == ROM_BAR_REG { // Handle very specific case where the BAR is being written with // all 1's on bits 31-11 to retrieve the BAR size during next BAR // reading. @@ -228,68 +290,6 @@ impl PciConfiguration { } } - /// Add the [addr, addr + size) BAR region. - /// - /// Configures the specified BAR to report this region and size to the guest kernel. - /// Enforces a few constraints (i.e, region size must be power of two, register not already - /// used). - pub fn add_pci_bar(&mut self, bar_idx: u8, addr: u64, size: u64) { - let reg_idx = (BAR0_REG + u16::from(bar_idx)) as usize; - - // These are a few constraints that are imposed due to the fact - // that only VirtIO devices are actually allocating a BAR. Moreover, this is - // a single 64-bit BAR. Not conforming to these requirements is an internal - // Firecracker bug. - - // We are only using BAR 0 - assert_eq!(bar_idx, 0); - // We shouldn't be trying to use the same BAR twice - assert!(!self.bars[0].used); - assert!(!self.bars[1].used); - // We can't have a size of 0 - assert_ne!(size, 0); - // BAR size needs to be a power of two - assert!(size.is_power_of_two()); - // We should not be overflowing the address space - addr.checked_add(size - 1).unwrap(); - - // Encode the BAR size as expected by the software running in - // the guest. - let (bar_size_hi, bar_size_lo) = encode_64_bits_bar_size(size); - - self.registers[reg_idx + 1] = (addr >> 32) as u32; - self.writable_bits[reg_idx + 1] = 0xffff_ffff; - self.bars[bar_idx as usize + 1].addr = self.registers[reg_idx + 1]; - self.bars[bar_idx as usize].size = bar_size_lo; - self.bars[bar_idx as usize + 1].size = bar_size_hi; - self.bars[bar_idx as usize + 1].used = true; - - // Addresses of memory BARs are 16-byte aligned so the lower 4 bits are always 0. Within - // the register we use this 4 bits to encode extra information about the BAR. The meaning - // of these bits is: - // - // | Bit 3 | Bits 2-1 | Bit 0 | - // | Prefetchable | type | Always 0 | - // - // Non-prefetchable, 64 bits BAR region - self.registers[reg_idx] = (((addr & 0xffff_ffff) as u32) & BAR_MEM_ADDR_MASK) | 4u32; - self.writable_bits[reg_idx] = BAR_MEM_ADDR_MASK; - self.bars[bar_idx as usize].addr = self.registers[reg_idx]; - self.bars[bar_idx as usize].used = true; - } - - /// Returns the address of the given BAR region. - /// - /// This assumes that `bar_idx` is a valid BAR register. - pub fn get_bar_addr(&self, bar_idx: u8) -> u64 { - assert!((bar_idx as usize) < NUM_BAR_REGS); - - let reg_idx = (BAR0_REG + u16::from(bar_idx)) as usize; - - (u64::from(self.bars[bar_idx as usize].addr & self.writable_bits[reg_idx])) - | (u64::from(self.bars[bar_idx as usize + 1].addr) << 32) - } - /// Adds the capability `cap_data` to the list of capabilities. /// /// `cap_data` should not include the two-byte PCI capability header (type, next). @@ -329,9 +329,7 @@ impl PciConfiguration { self.writable_bits[(cap_offset / 4) as usize] = MSI_CAPABILITY_REGISTER_MASK; } PciCapabilityId::MsiX => { - self.msix_cap_reg_idx = Some(u16::from(cap_offset) / 4); - self.writable_bits[self.msix_cap_reg_idx.unwrap() as usize] = - MSIX_CAPABILITY_REGISTER_MASK; + self.writable_bits[(cap_offset / 4) as usize] = MSIX_CAPABILITY_REGISTER_MASK; } _ => {} } @@ -355,26 +353,6 @@ impl PciConfiguration { return; } - // Handle potential write to MSI-X message control register - if let Some(msix_cap_reg_idx) = self.msix_cap_reg_idx - && let Some(msix_config) = &self.msix_config - { - if msix_cap_reg_idx == reg_idx && offset == 2 && data.len() == 2 { - // 2-bytes write in the Message Control field - msix_config - .lock() - .unwrap() - .set_msg_ctl(LittleEndian::read_u16(data)); - } else if msix_cap_reg_idx == reg_idx && offset == 0 && data.len() == 4 { - // 4 bytes write at the beginning. Ignore the first 2 bytes which are the - // capability id and next capability pointer - msix_config - .lock() - .unwrap() - .set_msg_ctl((LittleEndian::read_u32(data) >> 16) as u16); - } - } - match data.len() { 1 => self.write_byte(reg_idx * 4 + offset as u16, data[0]), 2 => self.write_word( @@ -385,74 +363,6 @@ impl PciConfiguration { _ => (), } } - - /// Detect whether the guest wants to reprogram the address of a BAR - pub fn detect_bar_reprogramming( - &mut self, - reg_idx: u16, - data: &[u8], - ) -> Option { - if data.len() != 4 { - return None; - } - - let value = LittleEndian::read_u32(data); - - let mask = self.writable_bits[reg_idx as usize]; - if !(BAR0_REG as usize..BAR0_REG as usize + NUM_BAR_REGS).contains(&(reg_idx as usize)) { - return None; - } - - // Ignore the case where the BAR size is being asked for. - if value == 0xffff_ffff { - return None; - } - - let bar_idx = (reg_idx - BAR0_REG) as usize; - - // Do not reprogram BARs we are not using - if !self.bars[bar_idx].used { - return None; - } - - // We are always using 64bit BARs, so two BAR registers. We don't do anything until - // the upper BAR is modified, otherwise we would be moving the BAR to a wrong - // location in memory. - if bar_idx == 0 { - return None; - } - - // The lower BAR (of this 64bit BAR) has been reprogrammed to a different value - // than it used to be - let reg_idx = reg_idx as usize; - if (self.registers[reg_idx - 1] & self.writable_bits[reg_idx - 1]) - != (self.bars[bar_idx - 1].addr & self.writable_bits[reg_idx - 1]) || - // Or the lower BAR hasn't been changed but the upper one is being reprogrammed - // now to a different value - (value & mask) != (self.bars[bar_idx].addr & mask) - { - info!( - "Detected BAR reprogramming: (BAR {}) 0x{:x}->0x{:x}", - reg_idx, self.registers[reg_idx], value - ); - let old_base = (u64::from(self.bars[bar_idx].addr & mask) << 32) - | u64::from(self.bars[bar_idx - 1].addr & self.writable_bits[reg_idx - 1]); - let new_base = (u64::from(value & mask) << 32) - | u64::from(self.registers[reg_idx - 1] & self.writable_bits[reg_idx - 1]); - let len = decode_64_bits_bar_size(self.bars[bar_idx].size, self.bars[bar_idx - 1].size); - - self.bars[bar_idx].addr = value; - self.bars[bar_idx - 1].addr = self.registers[reg_idx - 1]; - - return Some(BarReprogrammingParams { - old_base, - new_base, - len, - }); - } - - None - } } #[cfg(test)] @@ -660,7 +570,6 @@ mod tests { PciMultimediaSubclass::AudioController as u8, 0xABCD, 0x2468, - None, ) } @@ -676,104 +585,6 @@ mod tests { assert_eq!(prog_if, 0x0); } - #[test] - #[should_panic] - fn test_encode_zero_sized_bar() { - encode_64_bits_bar_size(0); - } - - #[test] - #[should_panic] - fn test_decode_zero_sized_bar() { - decode_64_bits_bar_size(0, 0); - } - - #[test] - fn test_bar_size_encoding() { - // According to OSDev wiki (https://wiki.osdev.org/PCI#Address_and_size_of_the_BAR): - // - // > To determine the amount of address space needed by a PCI device, you must save the - // > original value of the BAR, write a value of all 1's to the register, then read it back. - // > The amount of memory can then be determined by masking the information bits, performing - // > a bitwise NOT ('~' in C), and incrementing the value by 1. The original value of the - // BAR > should then be restored. The BAR register is naturally aligned and as such you can - // only > modify the bits that are set. For example, if a device utilizes 16 MB it will - // have BAR0 > filled with 0xFF000000 (0x1000000 after decoding) and you can only modify - // the upper > 8-bits. - // - // So, we encode a 64 bits size and then store it as a 2 32bit addresses (we use - // two BARs). - let (hi, lo) = encode_64_bits_bar_size(0xffff_ffff_ffff_fff0); - assert_eq!(hi, 0); - assert_eq!(lo, 0x0000_0010); - assert_eq!(decode_64_bits_bar_size(hi, lo), 0xffff_ffff_ffff_fff0); - } - - #[test] - #[should_panic] - fn test_bar_size_no_power_of_two() { - let mut pci_config = default_pci_config(); - pci_config.add_pci_bar(0, 0x1000, 0x1001); - } - - #[test] - #[should_panic] - fn test_bad_bar_index() { - let mut pci_config = default_pci_config(); - pci_config.add_pci_bar(NUM_BAR_REGS as u8, 0x1000, 0x1000); - } - - #[test] - #[should_panic] - fn test_bad_64bit_bar_index() { - let mut pci_config = default_pci_config(); - pci_config.add_pci_bar((NUM_BAR_REGS - 1) as u8, 0x1000, 0x1000); - } - - #[test] - #[should_panic] - fn test_bar_size_overflows() { - let mut pci_config = default_pci_config(); - pci_config.add_pci_bar(0, u64::MAX, 0x2); - } - - #[test] - #[should_panic] - fn test_lower_bar_free_upper_used() { - let mut pci_config = default_pci_config(); - pci_config.add_pci_bar(1, 0x1000, 0x1000); - pci_config.add_pci_bar(0, 0x1000, 0x1000); - } - - #[test] - #[should_panic] - fn test_lower_bar_used() { - let mut pci_config = default_pci_config(); - pci_config.add_pci_bar(0, 0x1000, 0x1000); - pci_config.add_pci_bar(0, 0x1000, 0x1000); - } - - #[test] - #[should_panic] - fn test_upper_bar_used() { - let mut pci_config = default_pci_config(); - pci_config.add_pci_bar(0, 0x1000, 0x1000); - pci_config.add_pci_bar(1, 0x1000, 0x1000); - } - - #[test] - fn test_add_pci_bar() { - let mut pci_config = default_pci_config(); - - pci_config.add_pci_bar(0, 0x1_0000_0000, 0x1000); - - assert_eq!(pci_config.get_bar_addr(0), 0x1_0000_0000); - assert_eq!(pci_config.read_reg(BAR0_REG) & 0xffff_fff0, 0x0); - assert!(pci_config.bars[0].used); - assert_eq!(pci_config.read_reg(BAR0_REG + 1), 1); - assert!(pci_config.bars[0].used); - } - #[test] fn test_access_invalid_reg() { let mut pci_config = default_pci_config(); @@ -821,105 +632,86 @@ mod tests { } #[test] - fn test_detect_bar_reprogramming() { + fn test_rom_bar() { let mut pci_config = default_pci_config(); - // Trying to reprogram with something less than 4 bytes (length of the address) should fail - assert!( - pci_config - .detect_bar_reprogramming(BAR0_REG, &[0x13]) - .is_none() - ); - assert!( - pci_config - .detect_bar_reprogramming(BAR0_REG, &[0x13, 0x12]) - .is_none() - ); - assert!( - pci_config - .detect_bar_reprogramming(BAR0_REG, &[0x13, 0x12]) - .is_none() - ); - assert!( - pci_config - .detect_bar_reprogramming(BAR0_REG, &[0x13, 0x12, 0x16]) - .is_none() - ); + // ROM BAR address should always be 0 and writes to it shouldn't do anything + assert_eq!(pci_config.read_reg(ROM_BAR_REG), 0); + pci_config.write_reg(ROM_BAR_REG, 0x42); + assert_eq!(pci_config.read_reg(ROM_BAR_REG), 0); - // Writing all 1s is a special case where we're actually asking for the size of the BAR - assert!( - pci_config - .detect_bar_reprogramming(BAR0_REG, &u32::to_le_bytes(0xffff_ffff)) - .is_none() - ); + // Reading the size of the BAR should always return 0 as well + pci_config.write_reg(ROM_BAR_REG, 0xffff_ffff); + assert_eq!(pci_config.read_reg(ROM_BAR_REG), 0); + } - // Trying to reprogram a BAR that hasn't be initialized does nothing - for reg_idx in BAR0_REG..BAR0_REG + NUM_BAR_REGS as u16 { - assert!( - pci_config - .detect_bar_reprogramming(reg_idx, &u32::to_le_bytes(0x1312_4243)) - .is_none() - ); - } + #[test] + #[should_panic] + fn test_encode_zero_sized_bar() { + encode_64_bits_bar_size(0); + } - // Reprogramming of a 64bit BAR - pci_config.add_pci_bar(0, 0x13_1200_0000, 0x8000); + #[test] + #[should_panic] + fn test_bars_size_no_power_of_two() { + let mut bars = Bars::default(); + bars.set_bar_64(0, 0x1000, 0x1001, false); + } - // First we write the lower 32 bits and this shouldn't cause any reprogramming - assert!( - pci_config - .detect_bar_reprogramming(BAR0_REG, &u32::to_le_bytes(0x4200_0000)) - .is_none() - ); - pci_config.write_config_register(BAR0_REG, 0, &u32::to_le_bytes(0x4200_0000)); + #[test] + #[should_panic] + fn test_bars_bad_bar_index() { + let mut bars = Bars::default(); + bars.set_bar_64(NUM_BAR_REGS, 0x1000, 0x1000, false); + } - // Writing the upper 32 bits should trigger the reprogramming - assert_eq!( - pci_config.detect_bar_reprogramming(BAR0_REG + 1, &u32::to_le_bytes(0x84)), - Some(BarReprogrammingParams { - old_base: 0x13_1200_0000, - new_base: 0x84_4200_0000, - len: 0x8000, - }) - ); - pci_config.write_config_register(BAR0_REG + 1, 0, &u32::to_le_bytes(0x84)); + #[test] + #[should_panic] + fn test_bars_bad_64bit_bar_index() { + let mut bars = Bars::default(); + bars.set_bar_64(NUM_BAR_REGS - 1, 0x1000, 0x1000, false); + } - // Trying to reprogram the upper bits directly (without first touching the lower bits) - // should trigger a reprogramming - assert_eq!( - pci_config.detect_bar_reprogramming(BAR0_REG + 1, &u32::to_le_bytes(0x1312)), - Some(BarReprogrammingParams { - old_base: 0x84_4200_0000, - new_base: 0x1312_4200_0000, - len: 0x8000, - }) - ); - pci_config.write_config_register(BAR0_REG + 1, 0, &u32::to_le_bytes(0x1312)); + #[test] + #[should_panic] + fn test_bars_bar_size_overflows() { + let mut bars = Bars::default(); + bars.set_bar_64(0, u64::MAX, 0x2, false); + } - // Attempting to reprogram the BAR with the same address should not have any effect - assert!( - pci_config - .detect_bar_reprogramming(BAR0_REG, &u32::to_le_bytes(0x4200_0000)) - .is_none() - ); - assert!( - pci_config - .detect_bar_reprogramming(BAR0_REG + 1, &u32::to_le_bytes(0x1312)) - .is_none() - ); + #[test] + #[should_panic] + fn test_bars_lower_bar_free_upper_used() { + let mut bars = Bars::default(); + bars.set_bar_64(1, 0x1000, 0x1000, false); + bars.set_bar_64(0, 0x1000, 0x1000, false); } #[test] - fn test_rom_bar() { - let mut pci_config = default_pci_config(); + #[should_panic] + fn test_bars_lower_bar_used() { + let mut bars = Bars::default(); + bars.set_bar_64(0, 0x1000, 0x1000, false); + bars.set_bar_64(0, 0x1000, 0x1000, false); + } - // ROM BAR address should always be 0 and writes to it shouldn't do anything - assert_eq!(pci_config.read_reg(ROM_BAR_REG), 0); - pci_config.write_reg(ROM_BAR_REG, 0x42); - assert_eq!(pci_config.read_reg(ROM_BAR_REG), 0); + #[test] + #[should_panic] + fn test_bars_upper_bar_used() { + let mut bars = Bars::default(); + bars.set_bar_64(0, 0x1000, 0x1000, false); + bars.set_bar_64(1, 0x1000, 0x1000, false); + } - // Reading the size of the BAR should always return 0 as well - pci_config.write_reg(ROM_BAR_REG, 0xffff_ffff); - assert_eq!(pci_config.read_reg(ROM_BAR_REG), 0); + #[test] + fn test_bars_add_pci_bar() { + let mut bars = Bars::default(); + bars.set_bar_64(0, 0x1_0000_0000, 0x1000, false); + assert_eq!(bars.get_bar_addr_64(0), 0x1_0000_0000); + let mut v: u32 = 0; + bars.read(0, 0, v.as_mut_bytes()); + assert_eq!(v & 0xffff_fff0, 0x0); + bars.read(1, 0, v.as_mut_bytes()); + assert_eq!(v, 1); } } diff --git a/src/vmm/src/pci/msix.rs b/src/vmm/src/pci/msix.rs index 17ef9abfec7..e046cfb4ad9 100644 --- a/src/vmm/src/pci/msix.rs +++ b/src/vmm/src/pci/msix.rs @@ -9,6 +9,7 @@ use std::sync::Arc; use byteorder::{ByteOrder, LittleEndian}; use serde::{Deserialize, Serialize}; use vm_memory::ByteValued; +use zerocopy::FromBytes; use crate::Vm; use crate::logger::{debug, error, warn}; @@ -209,6 +210,18 @@ impl MsixConfig { } } + /// Write to the Message Control register + pub fn write_msg_ctl_register(&mut self, offset: u8, data: &[u8]) { + if offset == 2 && data.len() == 2 { + // 2-bytes write in the Message Control field + self.set_msg_ctl(u16::read_from_bytes(data).unwrap()); + } else if offset == 0 && data.len() == 4 { + // 4 bytes write at the beginning. Ignore the first 2 bytes which are the + // capability id and next capability pointer + self.set_msg_ctl((u32::read_from_bytes(data).unwrap() >> 16) as u16); + } + } + /// Read an MSI-X table entry pub fn read_table(&self, offset: u64, data: &mut [u8]) { assert!(data.len() <= 8);