Skip to content

Commit d59ac2d

Browse files
viona: support (enough) VIRTIO_NET_F_CTRL_RX to leave all-multicast mode (#1125)
* viona: support (enough) `VIRTIO_NET_F_CTRL_RX` to remove promisc Closes #1122. * Actually get it all working. Well, that was an adventure. Closes 1127. * Actually move filter tables and associated state during migration. * Review feedback. * Clippy.. * Debugging... * Got it. * Ensure we unset the control queue if we re-init without CTRL_VQ * do reset is_control with the rest of the queue having `is_control` survive across reset is benign but might be confusing for debugging: after a device reset the previous control queue(s[^1]) remains "is_control: true" all the way until a VirtIO driver negotiates features and has prompted us to set new control queues. In the case of viona, if we tried to manage (say, pause) queues before feature negotiation was complete, we've left a more complicated situation to think through! instead, we can clear `is_control` on reset, but doing so exposes a bug. reset_queues_locked() would drive the queue state change *after* clearing queue state, but queue_change() itself uses queue state to operate correctly. if queue_change() wanted to tear down Propolis-side bookkeeping (or debugging, logging, ...) we'll have removed necessary data before giving it a chance to do its job. so, `VirtQueue::reset` should come *after* a successful queue_change(). this has the mild benefit(?) of if we've failed to reset the queue we'll leave it as it was. might be helpful for debugging the erroneous case? * drivers which request their own MAC and others do need promisc * `needed_promisc` should treat an empty unicast list as non-promisc. * Missed a multiqueue bug... * Fixup iopair calculation during import. * Test that `num_queues` is static, device control queue is correct --------- Co-authored-by: iximeow <iximeow@oxide.computer>
1 parent fa39721 commit d59ac2d

4 files changed

Lines changed: 694 additions & 146 deletions

File tree

lib/propolis/src/hw/virtio/bits.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ pub const VIRTIO_NET_F_STATUS: u64 = 1 << 16;
2121
pub const VIRTIO_NET_F_CTRL_VQ: u64 = 1 << 17;
2222
pub const VIRTIO_NET_F_CTRL_RX: u64 = 1 << 18;
2323
pub const VIRTIO_NET_F_CTRL_VLAN: u64 = 1 << 19;
24+
pub const VIRTIO_NET_F_CTRL_RX_EXTRA: u64 = 1 << 20;
25+
pub const VIRTIO_NET_F_GUEST_ANNOUNCE: u64 = 1 << 21;
2426
pub const VIRTIO_NET_F_MQ: u64 = 1 << 22;
2527

2628
// virtio-block feature bits

lib/propolis/src/hw/virtio/pci.rs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -530,7 +530,14 @@ impl PciVirtioState {
530530
ro.write_u16(state.msix_cfg_vec);
531531
}
532532
CommonConfigReg::NumQueues => {
533-
ro.write_u16(self.queues.count().get());
533+
// This is the maximum count of *non-administration* virtqueues
534+
// supported by the device. As we do not support
535+
// `VIRTIO_F_ADMIN_VQ`, this is all of the virtqueues we have
536+
// allocated.
537+
ro.write_u16(
538+
u16::try_from(self.queues.max_capacity())
539+
.expect("must have fewer than u16::MAX queues"),
540+
);
534541
}
535542
CommonConfigReg::DeviceStatus => {
536543
let state = self.state.lock().unwrap();
@@ -1161,10 +1168,13 @@ impl PciVirtioState {
11611168
state: &mut MutexGuard<VirtioState>,
11621169
) {
11631170
for queue in self.queues.iter_all() {
1164-
queue.reset();
1171+
// We must queue_change() before the reset: the device may use
1172+
// information from the queue in handling the state transition, so
1173+
// give it an opportunity to use that data as part of a reset.
11651174
if dev.queue_change(queue, VqChange::Reset).is_err() {
11661175
self.needs_reset_locked(dev, state);
11671176
}
1177+
queue.reset();
11681178
}
11691179
}
11701180

lib/propolis/src/hw/virtio/queue.rs

Lines changed: 97 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// License, v. 2.0. If a copy of the MPL was not distributed with this
33
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
44

5+
use std::io::Error as IoError;
56
use std::mem;
67
use std::num::{NonZeroU16, Wrapping};
78
use std::sync::atomic::{fence, AtomicBool, AtomicUsize, Ordering};
@@ -192,10 +193,11 @@ impl VqUsed {
192193
mem.write(self.gpa_flags, &value);
193194
}
194195

195-
/// Disables notifications on this queue; returns the previous state.
196+
/// Disables notifications on this queue; returns whether notfications were
197+
/// enabled before.
196198
fn disable_notify(&self, mem: &MemCtx) -> bool {
197199
let flags = self.flags(mem);
198-
let current = flags.contains(UsedFlags::NO_NOTIFY);
200+
let current = !flags.contains(UsedFlags::NO_NOTIFY);
199201
self.set_flags(flags | UsedFlags::NO_NOTIFY, mem);
200202
current
201203
}
@@ -326,6 +328,7 @@ impl VirtQueue {
326328
used.reset();
327329
self.live.store(false, Ordering::Release);
328330
self.enabled.store(false, Ordering::Release);
331+
self.is_control.store(false, Ordering::Release);
329332
}
330333

331334
pub(super) fn enable(&self) {
@@ -348,8 +351,8 @@ impl VirtQueue {
348351
self.is_control.load(Ordering::Acquire)
349352
}
350353

351-
pub(super) fn set_control(&self) {
352-
self.is_control.store(true, Ordering::Release);
354+
fn set_control(&self, val: bool) {
355+
self.is_control.store(val, Ordering::Release);
353356
}
354357

355358
#[inline(always)]
@@ -564,7 +567,9 @@ impl VirtQueue {
564567
used.interrupt.as_ref().map(|x| x.read())
565568
}
566569

567-
/// Disables interrupts (notifications) on the `Used` ring
570+
/// Disables interrupts (notifications) on the `Used` ring.
571+
///
572+
/// Returns `true` if notifications were previously enabled.
568573
pub(super) fn disable_intr(&self, mem: &MemCtx) -> bool {
569574
let used = self.used.lock().unwrap();
570575
used.disable_notify(mem)
@@ -786,6 +791,66 @@ impl Chain {
786791
});
787792
total == item_sz
788793
}
794+
795+
pub fn read_many_owned<T: Copy + FromBytes>(
796+
&mut self,
797+
mem: &MemCtx,
798+
n_elements: usize,
799+
) -> Result<Vec<T>, IoError> {
800+
let required_sz = mem::size_of::<T>() * n_elements;
801+
if (self.read_stat.bytes_remain as usize) < required_sz {
802+
return Err(IoError::new(
803+
std::io::ErrorKind::InvalidData,
804+
"Buffer too small",
805+
));
806+
}
807+
808+
let mut vec = Vec::with_capacity(n_elements);
809+
810+
if n_elements == 0 {
811+
return Ok(vec);
812+
}
813+
814+
// SAFETY: a `u8` has no alignment requirement so it is safe to
815+
// construct a byte slice from another Vec. These bytes will not be read
816+
// before initialisation.
817+
let raw = unsafe {
818+
std::slice::from_raw_parts_mut(
819+
vec.spare_capacity_mut().as_ptr() as *mut u8,
820+
required_sz,
821+
)
822+
};
823+
824+
let mut done = 0;
825+
let total = self.for_remaining_type(true, |addr, len| {
826+
let mut remain = GuestData::from(&mut raw[done..]);
827+
if let Some(copied) = mem.read_into(addr, &mut remain, len) {
828+
let need_more = copied != remain.len();
829+
830+
done += copied;
831+
(copied, need_more)
832+
} else {
833+
// Copy failed, so do not attempt anything else
834+
(0, false)
835+
}
836+
});
837+
838+
if total == required_sz {
839+
// SAFETY: read_many() was successful and just initialized all
840+
// `n_elements` of the vector.
841+
unsafe {
842+
vec.set_len(n_elements);
843+
}
844+
845+
Ok(vec)
846+
} else {
847+
Err(IoError::new(
848+
std::io::ErrorKind::UnexpectedEof,
849+
"Insufficient bytes to complete read",
850+
))
851+
}
852+
}
853+
789854
/// Fetch a string of readable guest regions from the chain, provided there
790855
/// are enough to cover a specified length.
791856
pub fn readable_bufs(&mut self, len: usize) -> Option<Vec<GuestRegion>> {
@@ -808,6 +873,7 @@ impl Chain {
808873
assert_eq!(remain, 0);
809874
Some(bufs)
810875
}
876+
811877
pub fn write<T: Copy + IntoBytes>(
812878
&mut self,
813879
item: &T,
@@ -1030,8 +1096,19 @@ impl VirtQueues {
10301096
Ok(())
10311097
}
10321098

1099+
pub fn set_ctl_queues(&self, indices: &[u16]) -> Result<(), ()> {
1100+
for vq in &self.queues {
1101+
vq.set_control(false);
1102+
}
1103+
for i in indices {
1104+
let vq = self.queues.get(usize::from(*i)).ok_or(())?;
1105+
vq.set_control(true);
1106+
}
1107+
Ok(())
1108+
}
1109+
10331110
pub fn count(&self) -> NonZeroU16 {
1034-
NonZeroU16::try_from(self.len() as u16)
1111+
NonZeroU16::try_from(self.iter().count() as u16)
10351112
.expect("queue count already validated")
10361113
}
10371114

@@ -1055,34 +1132,31 @@ impl VirtQueues {
10551132
pub fn get(&self, qid: u16) -> Option<&Arc<VirtQueue>> {
10561133
let len = self.len();
10571134
let qid = usize::from(qid);
1058-
// XXX: This special case is for the virtio network device, which always
1059-
// puts the control queue at the end of queue vector (see VirtIO 1.2
1060-
// section 5.1.2). None of the other devices currently handle queues
1061-
// specially in this way, but we should come up with some better
1062-
// mechanism here.
1063-
if qid + 1 == len {
1064-
Some(self.get_control())
1065-
} else {
1066-
self.queues[..len].get(qid)
1067-
}
1068-
}
10691135

1070-
fn get_control(&self) -> &Arc<VirtQueue> {
1071-
&self.queues[self.max_capacity() - 1]
1136+
// Control queues may be placed almost arbitrarily depending on the
1137+
// device type -- they may be mixed in with other queues, or placed at
1138+
// the very end after preallocated but unused queues.
1139+
self.queues.get(qid).filter(|v| v.is_control() || qid < len)
10721140
}
10731141

10741142
pub fn iter(&self) -> impl std::iter::Iterator<Item = &Arc<VirtQueue>> {
1075-
let len = self.len() - 1;
1076-
self.queues[..len].iter().chain([self.get_control()])
1143+
let len = self.len();
1144+
self.queues
1145+
.iter()
1146+
.enumerate()
1147+
.filter_map(move |(i, v)| (i < len || v.is_control()).then_some(v))
10771148
}
10781149

10791150
/// Iterate all queues the device may have used; the current number of
10801151
/// VirtQueues may be lower than a previous high watermark, but in cases
10811152
/// like device reset and teardown we must manage all viona rings
10821153
/// corresponding to ever-active VirtQueues.
10831154
pub fn iter_all(&self) -> impl std::iter::Iterator<Item = &Arc<VirtQueue>> {
1084-
let peak = self.peak() - 1;
1085-
self.queues[..peak].iter().chain([self.get_control()])
1155+
let peak = self.peak();
1156+
self.queues
1157+
.iter()
1158+
.enumerate()
1159+
.filter_map(move |(i, v)| (i < peak || v.is_control()).then_some(v))
10861160
}
10871161

10881162
pub fn export(&self) -> migrate::VirtQueuesV1 {

0 commit comments

Comments
 (0)