Skip to content

Commit b212069

Browse files
committed
feat(devices): rate-limit log callsites in device paths
Migrate error!, warn!, and info! log callsites in device and VMM paths to their rate-limited equivalents. debug! calls are left unchanged as they are disabled in release Firecracker builds and the log_enabled! check in the rate-limited macros already ensures zero overhead for disabled levels. Covers all virtio devices (balloon, net, block, rng, vsock, pmem, mem), transport layers (MMIO, PCI), vCPU exit handling, memory management, and the I/O rate limiter. Each callsite independently rate-limits to 10 messages per 5-second refill period. Signed-off-by: Nikita Kalyazin <kalyazin@amazon.com>
1 parent 6967a1a commit b212069

31 files changed

Lines changed: 373 additions & 300 deletions

src/vmm/src/arch/aarch64/vcpu.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use crate::arch::aarch64::kvm::OptionalCapabilities;
2121
use crate::arch::aarch64::regs::{Aarch64RegisterVec, KVM_REG_ARM64_SVE_VLS};
2222
use crate::cpu_config::aarch64::custom_cpu_template::VcpuFeatures;
2323
use crate::cpu_config::templates::CpuConfiguration;
24-
use crate::logger::{IncMetric, METRICS, error};
24+
use crate::logger::{IncMetric, METRICS, error_rate_limited};
2525
use crate::vcpu::{VcpuConfig, VcpuError};
2626
use crate::vstate::bus::Bus;
2727
use crate::vstate::memory::{Address, GuestMemoryMmap};
@@ -503,7 +503,7 @@ impl Peripherals {
503503
METRICS.vcpu.failures.inc();
504504
// TODO: Are we sure we want to finish running a vcpu upon
505505
// receiving a vm exit that is not necessarily an error?
506-
error!("Unexpected exit reason on vcpu run: {:?}", exit);
506+
error_rate_limited!("Unexpected exit reason on vcpu run: {:?}", exit);
507507
Err(VcpuError::UnhandledKvmExit(format!("{:?}", exit)))
508508
}
509509
}

src/vmm/src/arch/x86_64/vcpu.rs

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ use kvm_bindings::{
1414
kvm_mp_state, kvm_regs, kvm_sregs, kvm_vcpu_events, kvm_xcrs, kvm_xsave, kvm_xsave2,
1515
};
1616
use kvm_ioctls::{VcpuExit, VcpuFd};
17-
use log::{error, warn};
1817
use serde::{Deserialize, Serialize};
1918
use vmm_sys_util::fam::{self, FamStruct};
2019

@@ -24,7 +23,7 @@ use crate::arch::x86_64::interrupts;
2423
use crate::arch::x86_64::msr::{MsrError, create_boot_msr_entries};
2524
use crate::arch::x86_64::regs::{SetupFpuError, SetupRegistersError, SetupSpecialRegistersError};
2625
use crate::cpu_config::x86_64::{CpuConfiguration, cpuid};
27-
use crate::logger::{IncMetric, METRICS};
26+
use crate::logger::{IncMetric, METRICS, error_rate_limited, warn_rate_limited};
2827
use crate::vstate::bus::Bus;
2928
use crate::vstate::memory::GuestMemoryMmap;
3029
use crate::vstate::vcpu::{VcpuConfig, VcpuEmulation, VcpuError};
@@ -281,7 +280,7 @@ impl KvmVcpu {
281280
// https://elixir.bootlin.com/linux/v6.17.5/source/arch/x86/kvm/x86.c#L5736-L5737
282281
if let Err(err) = self.fd.kvmclock_ctrl() {
283282
METRICS.vcpu.kvmclock_ctrl_fails.inc();
284-
warn!("KVM_KVMCLOCK_CTRL call failed {}", err);
283+
warn_rate_limited!("KVM_KVMCLOCK_CTRL call failed {}", err);
285284
}
286285
}
287286

@@ -403,7 +402,7 @@ impl KvmVcpu {
403402
.flat_map(|msrs| msrs.as_mut_slice())
404403
.filter(|msr| msr.index == MSR_IA32_TSC_DEADLINE && msr.data == 0)
405404
.for_each(|msr| {
406-
warn!(
405+
warn_rate_limited!(
407406
"MSR_IA32_TSC_DEADLINE is 0, replacing with {:#x}.",
408407
tsc_value
409408
);
@@ -430,9 +429,10 @@ impl KvmVcpu {
430429
deferred_msrs
431430
.push(*msr)
432431
.inspect_err(|err| {
433-
error!(
432+
error_rate_limited!(
434433
"Failed to move MSR {} into later chunk: {:?}",
435-
msr.index, err
434+
msr.index,
435+
err
436436
)
437437
})
438438
.is_err()
@@ -589,7 +589,9 @@ impl KvmVcpu {
589589
// v0.25 and newer snapshots without TSC will only work on
590590
// the same CPU model as the host on which they were taken.
591591
// TODO: Add negative test for this warning failure.
592-
warn!("TSC freq not available. Snapshot cannot be loaded on a different CPU model.");
592+
warn_rate_limited!(
593+
"TSC freq not available. Snapshot cannot be loaded on a different CPU model."
594+
);
593595
None
594596
});
595597
let cpuid = self.get_cpuid()?;
@@ -729,7 +731,10 @@ impl Peripherals {
729731
if let Some(pio_bus) = &self.pio_bus {
730732
let _metric = METRICS.vcpu.exit_io_in_agg.record_latency_metrics();
731733
if let Err(err) = pio_bus.read(u64::from(addr), data) {
732-
warn!("vcpu: IO read @ {addr:#x}:{:#x} failed: {err}", data.len());
734+
warn_rate_limited!(
735+
"vcpu: IO read @ {addr:#x}:{:#x} failed: {err}",
736+
data.len()
737+
);
733738
}
734739
METRICS.vcpu.exit_io_in.inc();
735740
}
@@ -739,15 +744,18 @@ impl Peripherals {
739744
if let Some(pio_bus) = &self.pio_bus {
740745
let _metric = METRICS.vcpu.exit_io_out_agg.record_latency_metrics();
741746
if let Err(err) = pio_bus.write(u64::from(addr), data) {
742-
warn!("vcpu: IO write @ {addr:#x}:{:#x} failed: {err}", data.len());
747+
warn_rate_limited!(
748+
"vcpu: IO write @ {addr:#x}:{:#x} failed: {err}",
749+
data.len()
750+
);
743751
}
744752
METRICS.vcpu.exit_io_out.inc();
745753
}
746754
Ok(VcpuEmulation::Handled)
747755
}
748756
unexpected_exit => {
749757
METRICS.vcpu.failures.inc();
750-
error!("Unexpected exit reason on vcpu run: {:?}", unexpected_exit);
758+
error_rate_limited!("Unexpected exit reason on vcpu run: {:?}", unexpected_exit);
751759
Err(VcpuError::UnhandledKvmExit(format!(
752760
"{:?}",
753761
unexpected_exit

src/vmm/src/devices/virtio/balloon/device.rs

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ use std::ops::Deref;
55
use std::sync::Arc;
66
use std::time::Duration;
77

8-
use log::{debug, error, info, warn};
98
use serde::{Deserialize, Serialize};
109
use utils::time::TimerFd;
1110
use vmm_sys_util::eventfd::EventFd;
@@ -32,7 +31,10 @@ use crate::devices::virtio::device::{ActiveState, VirtioDeviceType};
3231
use crate::devices::virtio::generated::virtio_config::VIRTIO_F_VERSION_1;
3332
use crate::devices::virtio::queue::InvalidAvailIdx;
3433
use crate::devices::virtio::transport::{VirtioInterrupt, VirtioInterruptType};
35-
use crate::logger::{IncMetric, log_dev_preview_warning};
34+
use crate::logger::{
35+
IncMetric, debug, error_rate_limited, info_rate_limited, log_dev_preview_warning,
36+
warn_rate_limited,
37+
};
3638
use crate::utils::u64_to_usize;
3739
use crate::vstate::memory::{
3840
Address, ByteValued, Bytes, GuestAddress, GuestMemoryExtension, GuestMemoryMmap,
@@ -404,7 +406,7 @@ impl Balloon {
404406
if !head.is_write_only() && len.is_multiple_of(SIZE_OF_U32) {
405407
// Check descriptor pfn count.
406408
if len > max_len {
407-
error!(
409+
error_rate_limited!(
408410
"Inflate descriptor has bogus page count {} > {}, skipping.",
409411
len / SIZE_OF_U32,
410412
MAX_PAGES_IN_DESC
@@ -455,7 +457,7 @@ impl Balloon {
455457
guest_addr,
456458
usize::try_from(range_len).unwrap() << VIRTIO_BALLOON_PFN_SHIFT,
457459
) {
458-
error!("Error removing memory range: {:?}", err);
460+
error_rate_limited!("Error removing memory range: {:?}", err);
459461
}
460462
}
461463
}
@@ -496,7 +498,9 @@ impl Balloon {
496498
if let Some(prev_stats_desc) = self.stats_desc_index {
497499
// We shouldn't ever have an extra buffer if the driver follows
498500
// the protocol, but return it if we find one.
499-
error!("balloon: driver is not compliant, more than one stats buffer received");
501+
error_rate_limited!(
502+
"balloon: driver is not compliant, more than one stats buffer received"
503+
);
500504
self.queues[STATS_INDEX].add_used(prev_stats_desc, 0)?;
501505
self.queues[STATS_INDEX].advance_used_ring_idx();
502506
self.signal_used_queue(STATS_INDEX)?;
@@ -508,9 +512,10 @@ impl Balloon {
508512
// so that the stats request/response protocol is preserved and
509513
// trigger_stats_update can return it to the guest later.
510514
if head.len > MAX_STATS_DESC_LEN {
511-
warn!(
515+
warn_rate_limited!(
512516
"balloon: stats descriptor too large: {} > {}, skipping",
513-
head.len, MAX_STATS_DESC_LEN
517+
head.len,
518+
MAX_STATS_DESC_LEN
514519
);
515520
self.stats_desc_index = Some(head.index);
516521
continue;
@@ -571,7 +576,7 @@ impl Balloon {
571576

572577
// We don't expect this from the driver, but lets treat as a stop
573578
if cmd == FREE_PAGE_HINT_DONE {
574-
warn!("balloon hinting: Unexpected cmd from guest: {cmd}");
579+
warn_rate_limited!("balloon hinting: Unexpected cmd from guest: {cmd}");
575580
complete = true;
576581
}
577582

@@ -584,19 +589,21 @@ impl Balloon {
584589
}
585590

586591
let Some(chain_cmd) = self.hinting_state.guest_cmd else {
587-
warn!("balloon hinting: received range with no command id.");
592+
warn_rate_limited!("balloon hinting: received range with no command id.");
588593
continue;
589594
};
590595

591596
if chain_cmd != host_cmd {
592-
info!("balloon hinting: Received chain from previous command ignoring.");
597+
info_rate_limited!(
598+
"balloon hinting: Received chain from previous command ignoring."
599+
);
593600
continue;
594601
}
595602

596603
METRICS.free_page_hint_count.inc();
597604
if let Err(err) = mem.discard_range(desc.addr, desc.len as usize) {
598605
METRICS.free_page_hint_fails.inc();
599-
error!("balloon hinting: failed to remove range: {err:?}");
606+
error_rate_limited!("balloon hinting: failed to remove range: {err:?}");
600607
} else {
601608
METRICS.free_page_hint_freed.add(desc.len as u64);
602609
}
@@ -638,7 +645,7 @@ impl Balloon {
638645
METRICS.free_page_report_count.inc();
639646
if let Err(err) = mem.discard_range(desc.addr, desc.len as usize) {
640647
METRICS.free_page_report_fails.inc();
641-
error!("balloon: failed to remove range: {err:?}");
648+
error_rate_limited!("balloon: failed to remove range: {err:?}");
642649
} else {
643650
METRICS.free_page_report_freed.add(desc.len as u64);
644651
}
@@ -709,7 +716,7 @@ impl Balloon {
709716
self.queues[STATS_INDEX].advance_used_ring_idx();
710717
self.signal_used_queue(STATS_INDEX)
711718
} else {
712-
error!("Failed to update balloon stats, missing descriptor.");
719+
error_rate_limited!("Failed to update balloon stats, missing descriptor.");
713720
Ok(())
714721
}
715722
}
@@ -933,7 +940,7 @@ impl VirtioDevice for Balloon {
933940
let len = config_space_bytes.len().min(data.len());
934941
data[..len].copy_from_slice(&config_space_bytes[..len]);
935942
} else {
936-
error!("Failed to read config space");
943+
error_rate_limited!("Failed to read config space");
937944
}
938945
}
939946

@@ -945,7 +952,7 @@ impl VirtioDevice for Balloon {
945952
.zip(end)
946953
.and_then(|(start, end)| config_space_bytes.get_mut(start..end))
947954
else {
948-
error!("Failed to write config space");
955+
error_rate_limited!("Failed to write config space");
949956
return;
950957
};
951958

@@ -983,7 +990,7 @@ impl VirtioDevice for Balloon {
983990
fn kick(&mut self) {
984991
if self.is_activated() {
985992
if self.free_page_hinting() {
986-
info!(
993+
info_rate_limited!(
987994
"[{:?}:{}] resetting free page hinting to DONE",
988995
self.device_type(),
989996
self.id()

src/vmm/src/devices/virtio/balloon/event_handler.rs

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use vmm_sys_util::epoll::EventSet;
77
use super::{DEFLATE_INDEX, INFLATE_INDEX, STATS_INDEX, report_balloon_event_fail};
88
use crate::devices::virtio::balloon::device::Balloon;
99
use crate::devices::virtio::device::VirtioDevice;
10-
use crate::logger::{error, warn};
10+
use crate::logger::{error_rate_limited, warn_rate_limited};
1111

1212
impl Balloon {
1313
const PROCESS_ACTIVATE: u32 = 0;
@@ -24,29 +24,29 @@ impl Balloon {
2424
Self::PROCESS_VIRTQ_INFLATE,
2525
EventSet::IN,
2626
)) {
27-
error!("Failed to register inflate queue event: {}", err);
27+
error_rate_limited!("Failed to register inflate queue event: {}", err);
2828
}
2929
if let Err(err) = ops.add(Events::with_data(
3030
&self.queue_evts[DEFLATE_INDEX],
3131
Self::PROCESS_VIRTQ_DEFLATE,
3232
EventSet::IN,
3333
)) {
34-
error!("Failed to register deflate queue event: {}", err);
34+
error_rate_limited!("Failed to register deflate queue event: {}", err);
3535
}
3636
if self.stats_enabled() {
3737
if let Err(err) = ops.add(Events::with_data(
3838
&self.queue_evts[STATS_INDEX],
3939
Self::PROCESS_VIRTQ_STATS,
4040
EventSet::IN,
4141
)) {
42-
error!("Failed to register stats queue event: {}", err);
42+
error_rate_limited!("Failed to register stats queue event: {}", err);
4343
}
4444
if let Err(err) = ops.add(Events::with_data(
4545
&self.stats_timer,
4646
Self::PROCESS_STATS_TIMER,
4747
EventSet::IN,
4848
)) {
49-
error!("Failed to register stats timerfd event: {}", err);
49+
error_rate_limited!("Failed to register stats timerfd event: {}", err);
5050
}
5151
}
5252

@@ -57,7 +57,7 @@ impl Balloon {
5757
EventSet::IN,
5858
))
5959
{
60-
error!("Failed to register free page hinting queue event: {}", err);
60+
error_rate_limited!("Failed to register free page hinting queue event: {}", err);
6161
}
6262

6363
if self.free_page_reporting()
@@ -67,7 +67,7 @@ impl Balloon {
6767
EventSet::IN,
6868
))
6969
{
70-
error!(
70+
error_rate_limited!(
7171
"Failed to register free page reporting queue event: {}",
7272
err
7373
);
@@ -80,21 +80,21 @@ impl Balloon {
8080
Self::PROCESS_ACTIVATE,
8181
EventSet::IN,
8282
)) {
83-
error!("Failed to register activate event: {}", err);
83+
error_rate_limited!("Failed to register activate event: {}", err);
8484
}
8585
}
8686

8787
fn process_activate_event(&self, ops: &mut EventOps) {
8888
if let Err(err) = self.activate_evt.read() {
89-
error!("Failed to consume balloon activate event: {:?}", err);
89+
error_rate_limited!("Failed to consume balloon activate event: {:?}", err);
9090
}
9191
self.register_runtime_events(ops);
9292
if let Err(err) = ops.remove(Events::with_data(
9393
&self.activate_evt,
9494
Self::PROCESS_ACTIVATE,
9595
EventSet::IN,
9696
)) {
97-
error!("Failed to un-register activate event: {}", err);
97+
error_rate_limited!("Failed to un-register activate event: {}", err);
9898
}
9999
}
100100
}
@@ -106,9 +106,10 @@ impl MutEventSubscriber for Balloon {
106106
let supported_events = EventSet::IN;
107107

108108
if !supported_events.contains(event_set) {
109-
warn!(
109+
warn_rate_limited!(
110110
"Received unknown event: {:?} from source: {:?}",
111-
event_set, source
111+
event_set,
112+
source
112113
);
113114
return;
114115
}
@@ -135,11 +136,11 @@ impl MutEventSubscriber for Balloon {
135136
.process_free_page_reporting_queue_event()
136137
.unwrap_or_else(report_balloon_event_fail),
137138
_ => {
138-
warn!("Balloon: Spurious event received: {:?}", source);
139+
warn_rate_limited!("Balloon: Spurious event received: {:?}", source);
139140
}
140141
};
141142
} else {
142-
warn!(
143+
warn_rate_limited!(
143144
"Balloon: The device is not yet activated. Spurious event received: {:?}",
144145
source
145146
);

src/vmm/src/devices/virtio/balloon/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ pub mod persist;
1010
pub mod test_utils;
1111
mod util;
1212

13-
use log::error;
13+
use crate::logger::error_rate_limited;
1414

1515
pub use self::device::{Balloon, BalloonConfig, BalloonStats};
1616
use super::queue::{InvalidAvailIdx, QueueError};
@@ -120,6 +120,6 @@ pub(super) fn report_balloon_event_fail(err: BalloonError) {
120120
if let BalloonError::InvalidAvailIdx(err) = err {
121121
panic!("{}", err);
122122
}
123-
error!("{:?}", err);
123+
error_rate_limited!("{:?}", err);
124124
METRICS.event_fails.inc();
125125
}

src/vmm/src/devices/virtio/balloon/util.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
use std::io;
55

66
use super::{MAX_PAGE_COMPACT_BUFFER, RemoveRegionError};
7-
use crate::logger::error;
7+
use crate::logger::error_rate_limited;
88
use crate::utils::u64_to_usize;
99
use crate::vstate::memory::{GuestAddress, GuestMemory, GuestMemoryMmap, GuestMemoryRegion};
1010

@@ -36,7 +36,7 @@ pub(crate) fn compact_page_frame_numbers(v: &mut [u32]) -> Vec<(u32, u32)> {
3636
// Skip duplicate pages. This will ensure we only consider
3737
// distinct PFNs.
3838
if page_frame_number == v[pfn_index - 1] {
39-
error!("Skipping duplicate PFN {}.", page_frame_number);
39+
error_rate_limited!("Skipping duplicate PFN {}.", page_frame_number);
4040
continue;
4141
}
4242

0 commit comments

Comments
 (0)