Skip to content

Commit 602c7a0

Browse files
committed
feat: switch to a per-device metrics
vsock and rng devices used a global variable which prevented running vmm tests in parallel Signed-off-by: aerosouund <aerosound161@gmail.com>
1 parent 395a0ee commit 602c7a0

8 files changed

Lines changed: 398 additions & 162 deletions

File tree

src/vmm/src/devices/virtio/net/tap.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,7 @@ pub mod tests {
224224
use super::*;
225225
use crate::devices::virtio::net::generated;
226226
use crate::devices::virtio::net::test_utils::{TapTrafficSimulator, enable, if_index};
227+
use regex::Regex;
227228

228229
// Redefine `IoVecBufferMut` with specific length. Otherwise
229230
// Rust will not know what to do.
@@ -241,10 +242,16 @@ pub mod tests {
241242
generated::ifreq__bindgen_ty_1::default().ifrn_name.len()
242243
});
243244

244-
// Empty name - The tap should be named "tap0" by default
245+
// Empty name - The tap should be named by the kernel (e.g., "tap0", "tap1", etc.)
245246
let tap = Tap::open_named("").unwrap();
246-
assert_eq!(b"tap0\0\0\0\0\0\0\0\0\0\0\0\0", &tap.if_name);
247-
assert_eq!("tap0", tap.if_name_as_str());
247+
let tap_name_str = tap.if_name_as_str();
248+
249+
// Check that it starts with "tap" and the remainder is numeric.
250+
assert!(
251+
Regex::new(r"^tap\d+$").unwrap().is_match(tap_name_str),
252+
"Generated tap name '{}' does not match expected pattern",
253+
tap_name_str
254+
);
248255

249256
// Test using '%d' to have the kernel assign an unused name,
250257
// and that we correctly copy back that generated name

src/vmm/src/devices/virtio/rng/device.rs

Lines changed: 99 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
use std::io;
55
use std::ops::Deref;
6+
use std::rc::Rc;
67
use std::sync::Arc;
78

89
use aws_lc_rs::rand;
@@ -19,7 +20,9 @@ use crate::devices::virtio::generated::virtio_config::VIRTIO_F_VERSION_1;
1920
use crate::devices::virtio::iov_deque::IovDequeError;
2021
use crate::devices::virtio::iovec::IoVecBufferMut;
2122
use crate::devices::virtio::queue::{FIRECRACKER_MAX_QUEUE_SIZE, InvalidAvailIdx, Queue};
23+
use crate::devices::virtio::rng::metrics::EntropyDeviceMetrics;
2224
use crate::devices::virtio::transport::{VirtioInterrupt, VirtioInterruptType};
25+
use crate::devices::virtio::vsock::metrics::VsockDeviceMetrics;
2326
use crate::impl_device_type;
2427
use crate::logger::{IncMetric, debug, error};
2528
use crate::rate_limiter::{RateLimiter, TokenType};
@@ -55,6 +58,7 @@ pub struct Entropy {
5558
rate_limiter: RateLimiter,
5659

5760
buffer: IoVecBufferMut,
61+
metrics: Arc<EntropyDeviceMetrics>,
5862
}
5963

6064
impl Entropy {
@@ -71,6 +75,7 @@ impl Entropy {
7175
let queue_events = (0..RNG_NUM_QUEUES)
7276
.map(|_| EventFd::new(libc::EFD_NONBLOCK))
7377
.collect::<Result<Vec<EventFd>, io::Error>>()?;
78+
let metrics = METRICS.get_or_init(|| Arc::new(EntropyDeviceMetrics::default()));
7479

7580
Ok(Self {
7681
avail_features: 1 << VIRTIO_F_VERSION_1,
@@ -81,6 +86,7 @@ impl Entropy {
8186
queue_events,
8287
rate_limiter,
8388
buffer: IoVecBufferMut::new()?,
89+
metrics: metrics.clone(),
8490
})
8591
}
8692

@@ -116,7 +122,7 @@ impl Entropy {
116122

117123
let mut rand_bytes = vec![0; self.buffer.len() as usize];
118124
rand::fill(&mut rand_bytes).inspect_err(|_| {
119-
METRICS.host_rng_fails.inc();
125+
self.metrics.host_rng_fails.inc();
120126
})?;
121127

122128
// It is ok to unwrap here. We are writing `iovec.len()` bytes at offset 0.
@@ -130,7 +136,7 @@ impl Entropy {
130136
// This is safe since we checked in the event handler that the device is activated.
131137
let mem = &self.device_state.active_state().unwrap().mem;
132138
let index = desc.index;
133-
METRICS.entropy_event_count.inc();
139+
self.metrics.entropy_event_count.inc();
134140

135141
// SAFETY: This descriptor chain points to a single `DescriptorChain` memory buffer,
136142
// no other `IoVecBufferMut` object points to the same `DescriptorChain` at the same
@@ -147,33 +153,33 @@ impl Entropy {
147153
// to handle once we do have budget.
148154
if !self.rate_limit_request(u64::from(self.buffer.len())) {
149155
debug!("entropy: throttling entropy queue");
150-
METRICS.entropy_rate_limiter_throttled.inc();
156+
self.metrics.entropy_rate_limiter_throttled.inc();
151157
self.queues[RNG_QUEUE].undo_pop();
152158
break;
153159
}
154160

155161
self.handle_one().unwrap_or_else(|err| {
156162
error!("entropy: {err}");
157-
METRICS.entropy_event_fails.inc();
163+
self.metrics.entropy_event_fails.inc();
158164
0
159165
})
160166
}
161167
Err(err) => {
162168
error!("entropy: Could not parse descriptor chain: {err}");
163-
METRICS.entropy_event_fails.inc();
169+
self.metrics.entropy_event_fails.inc();
164170
0
165171
}
166172
};
167173

168174
match self.queues[RNG_QUEUE].add_used(index, bytes) {
169175
Ok(_) => {
170176
used_any = true;
171-
METRICS.entropy_bytes.add(bytes.into());
177+
self.metrics.entropy_bytes.add(bytes.into());
172178
}
173179
Err(err) => {
174180
error!("entropy: Could not add used descriptor to queue: {err}");
175181
Self::rate_limit_replenish_request(&mut self.rate_limiter, bytes.into());
176-
METRICS.entropy_event_fails.inc();
182+
self.metrics.entropy_event_fails.inc();
177183
// If we are not able to add a buffer to the used queue, something
178184
// is probably seriously wrong, so just stop processing additional
179185
// buffers
@@ -186,7 +192,7 @@ impl Entropy {
186192
if used_any {
187193
self.signal_used_queue().unwrap_or_else(|err| {
188194
error!("entropy: {err:?}");
189-
METRICS.entropy_event_fails.inc()
195+
self.metrics.entropy_event_fails.inc()
190196
});
191197
}
192198

@@ -196,25 +202,25 @@ impl Entropy {
196202
pub(crate) fn process_entropy_queue_event(&mut self) {
197203
if let Err(err) = self.queue_events[RNG_QUEUE].read() {
198204
error!("Failed to read entropy queue event: {err}");
199-
METRICS.entropy_event_fails.inc();
205+
self.metrics.entropy_event_fails.inc();
200206
} else if !self.rate_limiter.is_blocked() {
201207
// We are not throttled, handle the entropy queue
202208
self.process_entropy_queue().unwrap()
203209
} else {
204-
METRICS.rate_limiter_event_count.inc();
210+
self.metrics.rate_limiter_event_count.inc();
205211
}
206212
}
207213

208214
pub(crate) fn process_rate_limiter_event(&mut self) {
209-
METRICS.rate_limiter_event_count.inc();
215+
self.metrics.rate_limiter_event_count.inc();
210216
match self.rate_limiter.event_handler() {
211217
Ok(_) => {
212218
// There might be enough budget now to process entropy requests.
213219
self.process_entropy_queue().unwrap()
214220
}
215221
Err(err) => {
216222
error!("entropy: Failed to handle rate-limiter event: {err:?}");
217-
METRICS.entropy_event_fails.inc();
223+
self.metrics.entropy_event_fails.inc();
218224
}
219225
}
220226
}
@@ -306,7 +312,7 @@ impl VirtioDevice for Entropy {
306312
}
307313

308314
self.activate_event.write(1).map_err(|_| {
309-
METRICS.activate_fails.inc();
315+
self.metrics.activate_fails.inc();
310316
ActivateError::EventFd
311317
})?;
312318
self.device_state = DeviceState::Activated(ActiveState { mem, interrupt });
@@ -320,7 +326,7 @@ mod tests {
320326

321327
use super::*;
322328
use crate::check_metric_after_block;
323-
use crate::devices::virtio::device::{VirtioDevice, VirtioDeviceType};
329+
use crate::devices::virtio::device::VirtioDevice;
324330
use crate::devices::virtio::queue::VIRTQ_DESC_F_WRITE;
325331
use crate::devices::virtio::test_utils::test::{
326332
VirtioTestDevice, VirtioTestHelper, create_virtio_mem,
@@ -358,7 +364,7 @@ mod tests {
358364
#[test]
359365
fn test_device_type() {
360366
let entropy_dev = default_entropy();
361-
assert_eq!(entropy_dev.device_type(), VirtioDeviceType::Rng);
367+
assert_eq!(entropy_dev.device_type(), VIRTIO_ID_RNG);
362368
}
363369

364370
#[test]
@@ -446,29 +452,44 @@ mod tests {
446452
// Add a read-only descriptor (this should fail)
447453
th.add_desc_chain(RNG_QUEUE, 0, &[(0, 64, 0)]);
448454

449-
let entropy_event_fails = METRICS.entropy_event_fails.count();
450-
let entropy_event_count = METRICS.entropy_event_count.count();
451-
let entropy_bytes = METRICS.entropy_bytes.count();
452-
let host_rng_fails = METRICS.host_rng_fails.count();
455+
let entropy_event_fails = th.device().metrics.entropy_event_fails.count();
456+
let entropy_event_count = th.device().metrics.entropy_event_count.count();
457+
let entropy_bytes = th.device().metrics.entropy_bytes.count();
458+
let host_rng_fails = th.device().metrics.host_rng_fails.count();
453459
assert_eq!(th.emulate_for_msec(100).unwrap(), 1);
454-
assert_eq!(METRICS.entropy_event_fails.count(), entropy_event_fails + 1);
455-
assert_eq!(METRICS.entropy_event_count.count(), entropy_event_count + 1);
456-
assert_eq!(METRICS.entropy_bytes.count(), entropy_bytes);
457-
assert_eq!(METRICS.host_rng_fails.count(), host_rng_fails);
460+
assert_eq!(
461+
th.device().metrics.entropy_event_fails.count(),
462+
entropy_event_fails + 1
463+
);
464+
assert_eq!(
465+
th.device().metrics.entropy_event_count.count(),
466+
entropy_event_count + 1
467+
);
468+
assert_eq!(th.device().metrics.entropy_bytes.count(), entropy_bytes);
469+
assert_eq!(th.device().metrics.host_rng_fails.count(), host_rng_fails);
458470

459471
// Add two good descriptors
460472
th.add_desc_chain(RNG_QUEUE, 0, &[(1, 10, VIRTQ_DESC_F_WRITE)]);
461473
th.add_desc_chain(RNG_QUEUE, 100, &[(2, 20, VIRTQ_DESC_F_WRITE)]);
462474

463-
let entropy_event_fails = METRICS.entropy_event_fails.count();
464-
let entropy_event_count = METRICS.entropy_event_count.count();
465-
let entropy_bytes = METRICS.entropy_bytes.count();
466-
let host_rng_fails = METRICS.host_rng_fails.count();
475+
let entropy_event_fails = th.device().metrics.entropy_event_fails.count();
476+
let entropy_event_count = th.device().metrics.entropy_event_count.count();
477+
let entropy_bytes = th.device().metrics.entropy_bytes.count();
478+
let host_rng_fails = th.device().metrics.host_rng_fails.count();
467479
assert_eq!(th.emulate_for_msec(100).unwrap(), 1);
468-
assert_eq!(METRICS.entropy_event_fails.count(), entropy_event_fails);
469-
assert_eq!(METRICS.entropy_event_count.count(), entropy_event_count + 2);
470-
assert_eq!(METRICS.entropy_bytes.count(), entropy_bytes + 30);
471-
assert_eq!(METRICS.host_rng_fails.count(), host_rng_fails);
480+
assert_eq!(
481+
th.device().metrics.entropy_event_fails.count(),
482+
entropy_event_fails
483+
);
484+
assert_eq!(
485+
th.device().metrics.entropy_event_count.count(),
486+
entropy_event_count + 2
487+
);
488+
assert_eq!(
489+
th.device().metrics.entropy_bytes.count(),
490+
entropy_bytes + 30
491+
);
492+
assert_eq!(th.device().metrics.host_rng_fails.count(), host_rng_fails);
472493

473494
th.add_desc_chain(
474495
RNG_QUEUE,
@@ -480,15 +501,24 @@ mod tests {
480501
],
481502
);
482503

483-
let entropy_event_fails = METRICS.entropy_event_fails.count();
484-
let entropy_event_count = METRICS.entropy_event_count.count();
485-
let entropy_bytes = METRICS.entropy_bytes.count();
486-
let host_rng_fails = METRICS.host_rng_fails.count();
504+
let entropy_event_fails = th.device().metrics.entropy_event_fails.count();
505+
let entropy_event_count = th.device().metrics.entropy_event_count.count();
506+
let entropy_bytes = th.device().metrics.entropy_bytes.count();
507+
let host_rng_fails = th.device().metrics.host_rng_fails.count();
487508
assert_eq!(th.emulate_for_msec(100).unwrap(), 1);
488-
assert_eq!(METRICS.entropy_event_fails.count(), entropy_event_fails);
489-
assert_eq!(METRICS.entropy_event_count.count(), entropy_event_count + 1);
490-
assert_eq!(METRICS.entropy_bytes.count(), entropy_bytes + 512);
491-
assert_eq!(METRICS.host_rng_fails.count(), host_rng_fails);
509+
assert_eq!(
510+
th.device().metrics.entropy_event_fails.count(),
511+
entropy_event_fails
512+
);
513+
assert_eq!(
514+
th.device().metrics.entropy_event_count.count(),
515+
entropy_event_count + 1
516+
);
517+
assert_eq!(
518+
th.device().metrics.entropy_bytes.count(),
519+
entropy_bytes + 512
520+
);
521+
assert_eq!(th.device().metrics.host_rng_fails.count(), host_rng_fails);
492522
}
493523

494524
#[test]
@@ -500,7 +530,7 @@ mod tests {
500530
let mut dev = th.device();
501531

502532
check_metric_after_block!(
503-
&METRICS.entropy_event_fails,
533+
&dev.metrics.entropy_event_fails,
504534
1,
505535
dev.process_rate_limiter_event()
506536
);
@@ -519,7 +549,7 @@ mod tests {
519549
// buffer should be processed normally
520550
th.add_desc_chain(RNG_QUEUE, 0, &[(0, 4000, VIRTQ_DESC_F_WRITE)]);
521551
check_metric_after_block!(
522-
METRICS.entropy_bytes,
552+
th.device().metrics.entropy_bytes,
523553
4000,
524554
th.device().process_entropy_queue()
525555
);
@@ -535,12 +565,12 @@ mod tests {
535565
th.add_desc_chain(RNG_QUEUE, 0, &[(0, 4000, VIRTQ_DESC_F_WRITE)]);
536566
th.add_desc_chain(RNG_QUEUE, 1, &[(1, 1000, VIRTQ_DESC_F_WRITE)]);
537567
check_metric_after_block!(
538-
METRICS.entropy_bytes,
568+
th.device().metrics.entropy_bytes,
539569
4000,
540570
th.device().process_entropy_queue()
541571
);
542572
check_metric_after_block!(
543-
METRICS.entropy_rate_limiter_throttled,
573+
th.device().metrics.entropy_rate_limiter_throttled,
544574
1,
545575
th.device().process_entropy_queue()
546576
);
@@ -549,7 +579,11 @@ mod tests {
549579
// 250 msec should give enough time for replenishing 1000 bytes worth of tokens.
550580
// Give it an extra 100 ms just to be sure the timer event reaches us from the kernel.
551581
std::thread::sleep(Duration::from_millis(350));
552-
check_metric_after_block!(METRICS.entropy_bytes, 1000, th.emulate_for_msec(100));
582+
check_metric_after_block!(
583+
th.device().metrics.entropy_bytes,
584+
1000,
585+
th.emulate_for_msec(100)
586+
);
553587
assert!(!th.device().rate_limiter().is_blocked());
554588
}
555589

@@ -567,7 +601,7 @@ mod tests {
567601
// so this should succeed.
568602
th.add_desc_chain(RNG_QUEUE, 0, &[(0, 4000, VIRTQ_DESC_F_WRITE)]);
569603
check_metric_after_block!(
570-
METRICS.entropy_bytes,
604+
th.device().metrics.entropy_bytes,
571605
4000,
572606
th.device().process_entropy_queue()
573607
);
@@ -577,30 +611,43 @@ mod tests {
577611
std::thread::sleep(Duration::from_millis(1000));
578612

579613
// First one should succeed
580-
let entropy_bytes = METRICS.entropy_bytes.count();
614+
let entropy_bytes = th.device().metrics.entropy_bytes.count();
581615
th.add_desc_chain(RNG_QUEUE, 0, &[(0, 64, VIRTQ_DESC_F_WRITE)]);
582-
check_metric_after_block!(METRICS.entropy_bytes, 64, th.emulate_for_msec(100));
583-
assert_eq!(METRICS.entropy_bytes.count(), entropy_bytes + 64);
616+
check_metric_after_block!(
617+
th.device().metrics.entropy_bytes,
618+
64,
619+
th.emulate_for_msec(100)
620+
);
621+
assert_eq!(
622+
th.device().metrics.entropy_bytes.count(),
623+
entropy_bytes + 64
624+
);
584625
// The rate limiter is not blocked yet.
585626
assert!(!th.device().rate_limiter().is_blocked());
586627
// But immediately asking another operation should block it because we have 1 op every 100
587628
// msec.
588629
th.add_desc_chain(RNG_QUEUE, 0, &[(0, 64, VIRTQ_DESC_F_WRITE)]);
589630
check_metric_after_block!(
590-
METRICS.entropy_rate_limiter_throttled,
631+
th.device().metrics.entropy_rate_limiter_throttled,
591632
1,
592633
th.emulate_for_msec(50)
593634
);
594635
// Entropy bytes count should not have increased.
595-
assert_eq!(METRICS.entropy_bytes.count(), entropy_bytes + 64);
636+
assert_eq!(
637+
th.device().metrics.entropy_bytes.count(),
638+
entropy_bytes + 64
639+
);
596640
// After 100 msec (plus 50 msec for ensuring the event reaches us from the kernel), the
597641
// timer of the rate limiter should fire saying that there's now more tokens available
598642
check_metric_after_block!(
599-
METRICS.rate_limiter_event_count,
643+
th.device().metrics.rate_limiter_event_count,
600644
1,
601645
th.emulate_for_msec(150)
602646
);
603647
// The rate limiter event should have processed the pending buffer as well
604-
assert_eq!(METRICS.entropy_bytes.count(), entropy_bytes + 128);
648+
assert_eq!(
649+
th.device().metrics.entropy_bytes.count(),
650+
entropy_bytes + 128
651+
);
605652
}
606653
}

0 commit comments

Comments
 (0)