Skip to content

Commit a28f6cf

Browse files
[review] Address feedback, nexthop is now the selected switch
Changes: - Remove global eip_gateways map from PortManagerInner, as the VPC route manager RPW activates after instance start - Refactor member reconciler methods to take &MemberReconcileCtx - Change forwarding next hop from member sleds to a single switch zone IP - Add resolver to MulticastSledClient for switch zone address lookup
1 parent 18943d0 commit a28f6cf

8 files changed

Lines changed: 153 additions & 337 deletions

File tree

.github/buildomat/jobs/deploy.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,6 @@ source .github/buildomat/ci-env.sh
190190
# swap them in. The deploy target is a ramdisk image without pkg(5), so we
191191
# use rem_drv/add_drv instead of the p5p approach used by install_opte.sh
192192
# and releng.
193-
# shellcheck source=/dev/null
194193
source tools/opte_version_override
195194
if [[ "x$OPTE_COMMIT" != "x" ]]; then
196195
curl -sSfOL "https://buildomat.eng.oxide.computer/public/file/oxidecomputer/opte/module/$OPTE_COMMIT/xde"

illumos-utils/src/opte/port_manager.rs

Lines changed: 10 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -109,14 +109,6 @@ struct RouteSet {
109109
///
110110
/// The only lock nesting is:
111111
/// - `routes` then `ports` in `vpc_routes_ensure`
112-
/// - `ports` then `eip_gateways` in `create_port`
113-
///
114-
/// `set_eip_gateways` acquires each lock separately (global map first,
115-
/// then ports), so there is no nesting. A concurrent `create_port`
116-
/// between the two reads the already-updated global map. The subsequent
117-
/// port iteration is redundant but idempotent. Neither path is hot:
118-
/// `set_eip_gateways` runs once per background task pass and
119-
/// `create_port` runs at instance boot.
120112
///
121113
/// Note: `release_inner` acquires `ports` then `routes` sequentially
122114
/// (dropping each before acquiring the next).
@@ -137,11 +129,6 @@ struct PortManagerInner {
137129

138130
/// Map of all current resolved routes.
139131
routes: Mutex<HashMap<RouterId, RouteSet>>,
140-
141-
/// Most recent EIP gateway mappings, keyed by NIC ID. We store this here so
142-
/// that ports created after `set_eip_gateways` can seed their initial
143-
/// gateway state.
144-
eip_gateways: Mutex<HashMap<Uuid, HashMap<IpAddr, HashSet<Uuid>>>>,
145132
}
146133

147134
/// Mutable per-port state tracked alongside the immutable `Port`.
@@ -403,7 +390,6 @@ impl PortManager {
403390
underlay_ip,
404391
ports: Mutex::new(BTreeMap::new()),
405392
routes: Mutex::new(Default::default()),
406-
eip_gateways: Mutex::new(HashMap::new()),
407393
});
408394

409395
Self { inner }
@@ -479,17 +465,7 @@ impl PortManager {
479465
vni,
480466
gateway,
481467
});
482-
let mut new_port_state = PortState::new(port.clone());
483-
484-
// Seed gateway mappings from the global map so that a port
485-
// created after set_eip_gateways has the correct state
486-
// immediately. Lock order: ports then eip_gateways.
487-
if let Some(gw) =
488-
self.inner.eip_gateways.lock().unwrap().get(&nic.id).cloned()
489-
{
490-
new_port_state.eip_gateways = gw;
491-
}
492-
468+
let new_port_state = PortState::new(port.clone());
493469
let old = ports.insert((nic.id, nic.kind), new_port_state);
494470
assert!(
495471
old.is_none(),
@@ -761,24 +737,17 @@ impl PortManager {
761737
///
762738
/// Returns whether the internal mappings were changed.
763739
pub fn set_eip_gateways(&self, mappings: ExternalIpGatewayMap) -> bool {
764-
// Update global map (single lock). A concurrent create_port
765-
// between these two locks will read the updated global map and
766-
// seed correctly; the port iteration below is then a redundant
767-
// but idempotent overwrite.
768-
let mut global_gw = self.inner.eip_gateways.lock().unwrap();
769-
let changed = &*global_gw != &mappings.mappings;
770-
*global_gw = mappings.mappings.clone();
771-
drop(global_gw);
772-
773-
// Push into existing ports.
774740
let mut ports = self.inner.ports.lock().unwrap();
775-
for ((nic_id, _), port_state) in ports.iter_mut() {
741+
ports.iter_mut().fold(false, |changed, ((nic_id, _), port_state)| {
776742
let new_gw =
777743
mappings.mappings.get(nic_id).cloned().unwrap_or_default();
778-
port_state.eip_gateways = new_gw;
779-
}
780-
781-
changed
744+
if port_state.eip_gateways != new_gw {
745+
port_state.eip_gateways = new_gw;
746+
true
747+
} else {
748+
changed
749+
}
750+
})
782751
}
783752

784753
/// Lookup an OPTE port, and ensure its external IP config is up to date.
@@ -1627,7 +1596,7 @@ mod tests {
16271596
const SERVICES_VPC_VNI: Vni = Vni::SERVICES_VNI;
16281597

16291598
let handle = Handle::new().unwrap();
1630-
handle.set_xde_underlay("underlay0", "underlay1").unwrap();
1599+
handle.set_xde_underlay("foo0", "foo1").unwrap();
16311600

16321601
// First, create a port for a service.
16331602
//

nexus/src/app/background/tasks/multicast/groups.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -973,8 +973,8 @@ impl MulticastGroupReconciler {
973973

974974
// Clear M2P/forwarding from all sleds before DPD cleanup.
975975
// This must succeed before deleting DB records, otherwise
976-
// stale OPTE state would persist on failed sleds with no
977-
// source of truth to drive a later cleanup pass.
976+
// stale OPTE state would persist on sleds where the clear
977+
// failed, with no DB record to drive a retry on a later pass.
978978
sled_client
979979
.clear_m2p_and_forwarding(opctx, group)
980980
.await

0 commit comments

Comments
 (0)