Skip to content

Commit b5ac3a6

Browse files
committed
fix(aarch64): override fabricated CLIDR_EL1 to match host cache topology
Since host kernel 6.3 (commit 7af0c2534f4c), KVM fabricates CLIDR_EL1 instead of passing through the host's real value. On hosts with IDC=1 and DIC=0 (e.g. Neoverse V1), the fabricated CLIDR exposes only L1=Unified when the host actually has separate L1d+L1i, L2, and L3. Guest kernels >= 6.1.156 backported init_of_cache_level() which counts cache leaves from the DT, while populate_cache_leaves() uses CLIDR_EL1. When the DT (built from host sysfs) describes more cache entries than CLIDR_EL1, the mismatch causes cache sysfs entries to not be created, breaking /sys/devices/system/cpu/cpu*/cache/* in the guest. Fix this by reading the current CLIDR_EL1 from vCPU 0, merging in the ctype and LoC fields derived from the host's sysfs cache topology, and writing the result back to each vCPU via KVM_SET_ONE_REG. Fields that cannot be derived from sysfs (LoUU, LoUIS, ICB, Ttype) are preserved from the original CLIDR_EL1. This makes CLIDR_EL1 consistent with the FDT, which already describes the real host caches. On pre-6.3 kernels, KVM passes through the real host CLIDR rather than fabricating one. Since the sysfs cache topology already matches the real CLIDR, the merge produces the same value, the write is skipped, and the override is effectively a no-op. This approach preserves the full host cache information for the guest rather than stripping the FDT to match the fabricated CLIDR. Signed-off-by: Nikita Kalyazin <kalyazin@amazon.com>
1 parent dc84e40 commit b5ac3a6

4 files changed

Lines changed: 280 additions & 3 deletions

File tree

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ and this project adheres to
3232
HID (Hardware ID) of VMGenID device so that it aligns with the upstream Linux
3333
kernel. This caused the driver not to be bound correctly to the device prior
3434
to Linux kernel 6.10.
35+
- [#5780](https://github.com/firecracker-microvm/firecracker/pull/5780): Fixed
36+
missing `/sys/devices/system/cpu/cpu*/cache/*` in aarch64 guests when running
37+
on host kernels >= 6.3 with guest kernels >= 6.1.156.
3538

3639
## [1.15.0]
3740

src/vmm/src/arch/aarch64/cache_info.rs

Lines changed: 199 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use crate::logger::warn;
1010
const MAX_CACHE_LEVEL: u8 = 7;
1111

1212
#[derive(Debug, thiserror::Error, displaydoc::Display)]
13-
pub(crate) enum CacheInfoError {
13+
pub enum CacheInfoError {
1414
/// Failed to read cache information: {0}
1515
FailedToReadCacheInfo(#[from] io::Error),
1616
/// Invalid cache configuration found for {0}: {1}
@@ -32,7 +32,7 @@ trait CacheStore: std::fmt::Debug {
3232
}
3333

3434
#[derive(Debug)]
35-
pub(crate) struct CacheEntry {
35+
pub struct CacheEntry {
3636
// Cache Level: 1, 2, 3..
3737
pub level: u8,
3838
// Type of cache: Unified, Data, Instruction.
@@ -154,7 +154,7 @@ impl Default for CacheEntry {
154154

155155
#[derive(Debug)]
156156
// Based on https://elixir.free-electrons.com/linux/v4.9.62/source/include/linux/cacheinfo.h#L11.
157-
pub(crate) enum CacheType {
157+
pub enum CacheType {
158158
Instruction,
159159
Data,
160160
Unified,
@@ -314,6 +314,105 @@ pub(crate) fn read_cache_config(
314314
Ok(())
315315
}
316316

317+
// CLIDR_EL1 field positions
318+
// https://developer.arm.com/documentation/ddi0595/2021-12/AArch64-Registers/CLIDR-EL1--Cache-Level-ID-Register
319+
const CLIDR_CTYPE_SHIFT: u8 = 3; // Each Ctype field is 3 bits
320+
const CLIDR_LOC_SHIFT: u8 = 24;
321+
322+
// CLIDR_EL1 Ctype field values
323+
const CLIDR_CTYPE_NO_CACHE: u64 = 0;
324+
const CLIDR_CTYPE_INSTRUCTION: u64 = 1;
325+
const CLIDR_CTYPE_DATA: u64 = 2;
326+
const CLIDR_CTYPE_SEPARATE: u64 = 3;
327+
const CLIDR_CTYPE_UNIFIED: u64 = 4;
328+
329+
/// Classify a set of cache entries at the same level into a CLIDR Ctype value.
330+
fn ctype_for_entries<'a>(entries: impl Iterator<Item = &'a CacheEntry>) -> u64 {
331+
let (mut has_data, mut has_inst, mut has_unified) = (false, false, false);
332+
let mut any = false;
333+
for c in entries {
334+
any = true;
335+
match c.type_ {
336+
CacheType::Data => has_data = true,
337+
CacheType::Instruction => has_inst = true,
338+
CacheType::Unified => has_unified = true,
339+
}
340+
}
341+
if !any {
342+
return CLIDR_CTYPE_NO_CACHE;
343+
}
344+
if has_unified {
345+
CLIDR_CTYPE_UNIFIED
346+
} else if has_data && has_inst {
347+
CLIDR_CTYPE_SEPARATE
348+
} else if has_data {
349+
CLIDR_CTYPE_DATA
350+
} else if has_inst {
351+
CLIDR_CTYPE_INSTRUCTION
352+
} else {
353+
CLIDR_CTYPE_NO_CACHE
354+
}
355+
}
356+
357+
/// Build a CLIDR_EL1 value from the host's cache topology read from sysfs.
358+
///
359+
/// Since host kernel 6.3 (commit 7af0c2534f4c), KVM fabricates CLIDR_EL1 to
360+
/// expose a different cache topology than the host. Guest kernels >= 6.1.156
361+
/// backported `init_of_cache_level()` which counts cache leaves from the DT,
362+
/// while `populate_cache_leaves()` uses CLIDR_EL1. If the DT (built from
363+
/// sysfs) describes different cache entries than CLIDR_EL1, the mismatch
364+
/// causes cache sysfs entries to not be created in the guest.
365+
///
366+
/// This function builds a CLIDR_EL1 value that matches the host's real cache
367+
/// topology so it can be written to each vCPU, making CLIDR_EL1 consistent
368+
/// with the FDT.
369+
pub(crate) fn build_clidr_from_caches(
370+
l1_caches: &[CacheEntry],
371+
non_l1_caches: &[CacheEntry],
372+
) -> u64 {
373+
let mut clidr: u64 = 0;
374+
let mut max_level: u8 = 0;
375+
376+
let l1_ctype = ctype_for_entries(l1_caches.iter());
377+
if l1_ctype != CLIDR_CTYPE_NO_CACHE {
378+
clidr |= l1_ctype;
379+
max_level = 1;
380+
}
381+
382+
for level in 2..=MAX_CACHE_LEVEL {
383+
let ctype = ctype_for_entries(non_l1_caches.iter().filter(|c| c.level == level));
384+
if ctype == CLIDR_CTYPE_NO_CACHE {
385+
break;
386+
}
387+
388+
let shift = CLIDR_CTYPE_SHIFT * (level - 1);
389+
clidr |= ctype << shift;
390+
max_level = level;
391+
}
392+
393+
// Set LoC (Level of Coherence) to the highest cache level
394+
clidr |= u64::from(max_level) << CLIDR_LOC_SHIFT;
395+
396+
clidr
397+
}
398+
399+
/// Merge sysfs-derived ctype/LoC fields into an existing CLIDR_EL1 value,
400+
/// preserving LoUU, LoUIS, ICB, and Ttype fields from the original.
401+
///
402+
/// This ensures that on pre-6.3 kernels (where CLIDR already matches sysfs),
403+
/// the write is effectively a no-op, and fields we can't derive from sysfs
404+
/// (like LoUU, LoUIS, ICB) are never clobbered.
405+
pub(crate) fn merge_clidr(current: u64, sysfs: u64) -> u64 {
406+
// Ctype fields: bits [20:0] (7 levels × 3 bits each = 21 bits)
407+
// LoC field: bits [26:24]
408+
// We replace only these fields from sysfs, preserving LoUIS [23:21],
409+
// LoUU [29:27], ICB [32:30], and Ttype [46:33] from the original.
410+
const CTYPE_MASK: u64 = 0x001F_FFFF; // bits [20:0]
411+
const LOC_MASK: u64 = 0x0700_0000; // bits [26:24]
412+
const REPLACE_MASK: u64 = CTYPE_MASK | LOC_MASK;
413+
(current & !REPLACE_MASK) | (sysfs & REPLACE_MASK)
414+
}
415+
317416
#[cfg(test)]
318417
mod tests {
319418
use std::collections::HashMap;
@@ -576,4 +675,101 @@ mod tests {
576675
assert_eq!(l1_caches.len(), 2);
577676
assert_eq!(l1_caches.len(), 2);
578677
}
678+
679+
#[test]
680+
fn test_build_clidr_from_caches() {
681+
// L1 Separate (Data + Instruction) + L2 Unified + L3 Unified
682+
let l1 = vec![
683+
CacheEntry {
684+
level: 1,
685+
type_: CacheType::Data,
686+
..CacheEntry::default()
687+
},
688+
CacheEntry {
689+
level: 1,
690+
type_: CacheType::Instruction,
691+
..CacheEntry::default()
692+
},
693+
];
694+
let non_l1 = vec![
695+
CacheEntry {
696+
level: 2,
697+
type_: CacheType::Unified,
698+
..CacheEntry::default()
699+
},
700+
CacheEntry {
701+
level: 3,
702+
type_: CacheType::Unified,
703+
..CacheEntry::default()
704+
},
705+
];
706+
let clidr = build_clidr_from_caches(&l1, &non_l1);
707+
// ctype1=3 (Separate), ctype2=4 (Unified), ctype3=4 (Unified), LoC=3
708+
assert_eq!(clidr & 0x7, 3, "L1 should be Separate");
709+
assert_eq!((clidr >> 3) & 0x7, 4, "L2 should be Unified");
710+
assert_eq!((clidr >> 6) & 0x7, 4, "L3 should be Unified");
711+
assert_eq!((clidr >> 24) & 0x7, 3, "LoC should be 3");
712+
713+
// L1 Unified only (no higher levels)
714+
let l1_unified = vec![CacheEntry {
715+
level: 1,
716+
type_: CacheType::Unified,
717+
..CacheEntry::default()
718+
}];
719+
let clidr = build_clidr_from_caches(&l1_unified, &[]);
720+
assert_eq!(clidr & 0x7, 4, "L1 should be Unified");
721+
assert_eq!((clidr >> 3) & 0x7, 0, "L2 should be NoCache");
722+
assert_eq!((clidr >> 24) & 0x7, 1, "LoC should be 1");
723+
724+
// No caches at all
725+
let clidr = build_clidr_from_caches(&[], &[]);
726+
assert_eq!(clidr, 0, "Empty caches should produce CLIDR=0");
727+
728+
// Mock store default: L1 Data + L1 Instruction + L2 Unified
729+
let mut l1_mock: Vec<CacheEntry> = Vec::new();
730+
let mut non_l1_mock: Vec<CacheEntry> = Vec::new();
731+
read_cache_config(&mut l1_mock, &mut non_l1_mock).unwrap();
732+
let clidr = build_clidr_from_caches(&l1_mock, &non_l1_mock);
733+
assert_eq!(clidr & 0x7, 3, "Mock L1 should be Separate");
734+
assert_eq!((clidr >> 3) & 0x7, 4, "Mock L2 should be Unified");
735+
assert_eq!((clidr >> 24) & 0x7, 2, "Mock LoC should be 2");
736+
}
737+
738+
#[test]
739+
fn test_merge_clidr() {
740+
// CLIDR_EL1 layout:
741+
// [20:0] Ctype1..Ctype7 (7 × 3 bits)
742+
// [23:21] LoUIS
743+
// [26:24] LoC
744+
// [29:27] LoUU
745+
// [32:30] ICB
746+
// [46:33] Ttype1..Ttype7
747+
//
748+
// merge_clidr replaces only Ctype [20:0] and LoC [26:24] from sysfs,
749+
// preserving LoUIS, LoUU, ICB, and Ttype from current.
750+
751+
// current: LoUU=2 [29:27], LoUIS=1 [23:21], ICB=1 [32:30]
752+
// Ctype1=Unified(4) [2:0], LoC=1 [26:24]
753+
let current: u64 = (1 << 30) // ICB=1
754+
| (2 << 27) // LoUU=2
755+
| (1 << 24) // LoC=1
756+
| (1 << 21) // LoUIS=1
757+
| 4; // Ctype1=Unified
758+
// sysfs: Ctype1=Separate(3), Ctype2=Unified(4), Ctype3=Unified(4), LoC=3
759+
let sysfs: u64 = (3 << 24) | (4 << 6) | (4 << 3) | 3;
760+
let merged = merge_clidr(current, sysfs);
761+
762+
// Ctype and LoC should come from sysfs
763+
assert_eq!(merged & 0x001F_FFFF, sysfs & 0x001F_FFFF, "Ctype mismatch");
764+
assert_eq!((merged >> 24) & 0x7, 3, "LoC should be 3 from sysfs");
765+
// LoUIS, LoUU, ICB should be preserved from current
766+
assert_eq!((merged >> 21) & 0x7, 1, "LoUIS should be preserved");
767+
assert_eq!((merged >> 27) & 0x7, 2, "LoUU should be preserved");
768+
assert_eq!((merged >> 30) & 0x7, 1, "ICB should be preserved");
769+
770+
// When current == sysfs in the replaced region, merge is identity
771+
let current = 0x0000_0000_0300_0123_u64;
772+
let sysfs = 0x0000_0000_0300_0123_u64;
773+
assert_eq!(merge_clidr(current, sysfs), current);
774+
}
579775
}

src/vmm/src/arch/aarch64/mod.rs

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ use crate::arch::{BootProtocol, EntryPoint, arch_memory_regions_with_gap};
2828
use crate::cpu_config::aarch64::{CpuConfiguration, CpuConfigurationError};
2929
use crate::cpu_config::templates::CustomCpuTemplate;
3030
use crate::initrd::InitrdConfig;
31+
use zerocopy::IntoBytes;
32+
33+
use crate::logger::warn;
3134
use crate::utils::{align_up, u64_to_usize, usize_to_u64};
3235
use crate::vmm_config::machine_config::MachineConfig;
3336
use crate::vstate::memory::{
@@ -51,6 +54,8 @@ pub enum ConfigurationError {
5154
VcpuConfig(#[from] CpuConfigurationError),
5255
/// Error configuring the vcpu: {0}
5356
VcpuConfigure(#[from] KvmVcpuError),
57+
/// Failed to read host cache information: {0}
58+
CacheInfo(#[from] cache_info::CacheInfoError),
5459
}
5560

5661
/// Returns a Vec of the valid memory addresses for aarch64.
@@ -118,6 +123,11 @@ pub fn configure_system_for_boot(
118123
&optional_capabilities,
119124
)?;
120125
}
126+
127+
// Override CLIDR_EL1 ctype/LoC fields on each vCPU to match the host's
128+
// real cache topology. See `override_clidr` for details.
129+
override_clidr(vcpus)?;
130+
121131
let vcpu_mpidr = vcpus
122132
.iter_mut()
123133
.map(|cpu| cpu.kvm_vcpu.get_mpidr())
@@ -142,6 +152,70 @@ pub fn configure_system_for_boot(
142152
Ok(())
143153
}
144154

155+
/// Override CLIDR_EL1 ctype/LoC fields on each vCPU to match the host's real
156+
/// cache topology.
157+
///
158+
/// Since host kernel 6.3 (commit 7af0c2534f4c), KVM fabricates CLIDR_EL1
159+
/// instead of passing through the host's real value. This can cause the guest
160+
/// to see fewer cache levels than actually exist. Guest kernels >= 6.1.156
161+
/// backported `init_of_cache_level()` which counts cache leaves from the DT,
162+
/// while `populate_cache_leaves()` uses CLIDR_EL1. If the DT (built from host
163+
/// sysfs) describes different cache entries than CLIDR_EL1, the mismatch
164+
/// causes cache sysfs entries to not be created.
165+
///
166+
/// We read the current (possibly fabricated) CLIDR_EL1, replace only the ctype
167+
/// and LoC fields with values derived from sysfs, and preserve all other fields
168+
/// (LoUU, LoUIS, ICB, Ttype). This is safe on pre-6.3 kernels where CLIDR
169+
/// already matches sysfs — the write is skipped as a no-op.
170+
fn override_clidr(vcpus: &[Vcpu]) -> Result<(), ConfigurationError> {
171+
let mut l1_caches = Vec::new();
172+
let mut non_l1_caches = Vec::new();
173+
cache_info::read_cache_config(&mut l1_caches, &mut non_l1_caches)?;
174+
175+
// If sysfs reports no L1 caches, we cannot build a meaningful CLIDR.
176+
// Writing an all-zero CLIDR would tell the guest there are no caches,
177+
// which is worse than whatever KVM fabricated. Leave it alone.
178+
if l1_caches.is_empty() {
179+
warn!("No L1 caches found in sysfs, skipping CLIDR override");
180+
return Ok(());
181+
}
182+
183+
let sysfs_clidr = cache_info::build_clidr_from_caches(&l1_caches, &non_l1_caches);
184+
185+
let mut cur_clidr: u64 = 0;
186+
// Reading/writing CLIDR_EL1 via KVM_SET_ONE_REG may not be supported on
187+
// older kernels (pre-6.3). In that case KVM passes through the real host
188+
// CLIDR and the override is unnecessary, so we warn and continue.
189+
if let Err(e) = vcpus[0]
190+
.kvm_vcpu
191+
.fd
192+
.get_one_reg(regs::CLIDR_EL1, cur_clidr.as_mut_bytes())
193+
{
194+
warn!("Failed to read CLIDR_EL1, skipping override: {e}");
195+
return Ok(());
196+
}
197+
198+
let new_clidr = cache_info::merge_clidr(cur_clidr, sysfs_clidr);
199+
200+
if new_clidr != cur_clidr {
201+
for vcpu in vcpus.iter() {
202+
if let Err(e) = vcpu
203+
.kvm_vcpu
204+
.fd
205+
.set_one_reg(regs::CLIDR_EL1, new_clidr.as_bytes())
206+
{
207+
warn!(
208+
"Failed to set CLIDR_EL1 to {:#x} on vCPU {}, skipping override: {e}",
209+
new_clidr, vcpu.kvm_vcpu.index
210+
);
211+
return Ok(());
212+
}
213+
}
214+
}
215+
216+
Ok(())
217+
}
218+
145219
/// Returns the memory address where the kernel could be loaded.
146220
pub fn get_kernel_start() -> u64 {
147221
layout::SYSTEM_MEM_START + layout::SYSTEM_MEM_SIZE

src/vmm/src/arch/aarch64/regs.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,10 @@ arm64_sys_reg!(ID_AA64ISAR0_EL1, 3, 0, 0, 6, 0);
9090
arm64_sys_reg!(ID_AA64ISAR1_EL1, 3, 0, 0, 6, 1);
9191
arm64_sys_reg!(ID_AA64MMFR2_EL1, 3, 0, 0, 7, 2);
9292

93+
// Cache Level ID Register
94+
// https://developer.arm.com/documentation/ddi0595/2021-12/AArch64-Registers/CLIDR-EL1--Cache-Level-ID-Register
95+
arm64_sys_reg!(CLIDR_EL1, 3, 1, 0, 0, 1);
96+
9397
// Counter-timer Virtual Timer CompareValue register.
9498
// https://developer.arm.com/documentation/ddi0595/2021-12/AArch64-Registers/CNTV-CVAL-EL0--Counter-timer-Virtual-Timer-CompareValue-register
9599
// https://elixir.bootlin.com/linux/v6.8/source/arch/arm64/include/asm/sysreg.h#L468

0 commit comments

Comments
 (0)