Skip to content

Commit 1eb01e2

Browse files
committed
fix(site-explorer): enforce the declared DPU mode on fallback-serial matches
The per-host matching loop now runs the same NIC/DPU mode check on a DPU paired through `fallback_dpu_serial_numbers` as it does on one the host reports over PCIe: a BlueField in the wrong mode gets `set_nic_mode` and the host is power-cycled to apply it, instead of being trusted as already configured. That reset now fires even when the host BMC never enumerated the DPU over PCIe -- the usual reason we end up on the fallback path -- so the queued flip can actually take effect. Until now the fallback path attached a matched DPU with no mode check. On a host the operator declared `nic_mode`, that DPU would then be dropped (a NIC-mode host has no managed DPUs), so the host registered as zero-DPU with the flip never issued -- the database read "NIC-mode host" while the BlueField stayed in DPU mode. Incomplete PCIe enumeration (a GB200 dropping a DPU from its inventory, say) is exactly what pushes a DPU-to-NIC migration onto this path, so this was the common case, not an edge. Adds a regression test -- a NIC-mode host whose DPU is paired only by fallback serial and still reporting DPU mode -- that fails on the old code (the host registers zero-DPU with no `set_nic_mode`) and passes now. Signed-off-by: Chet Nichols III <chetn@nvidia.com>
1 parent b6d77d8 commit 1eb01e2

2 files changed

Lines changed: 185 additions & 30 deletions

File tree

crates/site-explorer/src/lib.rs

Lines changed: 59 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1145,7 +1145,7 @@ impl SiteExplorer {
11451145
let DpuExplorationState {
11461146
reported_total: host_reported_dpus_total,
11471147
running_as_nic_total: mut host_reported_dpus_nic_mode_total,
1148-
all_configured: all_dpus_configured_properly_in_host,
1148+
all_configured: mut all_dpus_configured_properly_in_host,
11491149
running_as_dpu: mut dpus_explored_for_host,
11501150
} = dpu_exploration;
11511151

@@ -1162,30 +1162,52 @@ impl SiteExplorer {
11621162
{
11631163
for dpu_sn in &expected_machine.data.fallback_dpu_serial_numbers {
11641164
if let Some(dpu_ep) = dpu_sn_to_endpoint.remove(dpu_sn.as_str()) {
1165-
// We do not want to attach bluefields that are in NIC mode as DPUs to the host
1166-
if is_dpu_in_nic_mode(&dpu_ep, &ep)
1167-
&& host_reported_dpus_total
1168-
.saturating_sub(host_reported_dpus_nic_mode_total)
1169-
> 0
1170-
{
1171-
host_reported_dpus_nic_mode_total += 1;
1172-
continue;
1173-
}
1165+
// Enforce the host's declared DPU mode on a fallback-serial
1166+
// match the same way the host-reported path does, rather than
1167+
// trusting it as already-configured. A DPU still in the wrong
1168+
// mode gets a `set_nic_mode` here and has to wait for the host
1169+
// reset to apply it; without this, a DPU-mode BlueField on a
1170+
// `NicMode` host would be attached and then dropped to zero-DPU
1171+
// (the `NicMode` arm further down), leaving the database reading
1172+
// "NIC-mode host" while the hardware stayed in DPU mode.
1173+
let mode_check = Some(
1174+
self.check_and_configure_dpu_mode(
1175+
&dpu_ep,
1176+
dpu_ep.report.model().unwrap_or_default(),
1177+
host_dpu_mode,
1178+
)
1179+
.await,
1180+
);
11741181

1175-
// we found at least one DPU from expected machines for this host
1176-
// assume that the expected machines is the source of truth. Clear the
1177-
// contents of dpus_explored_for_host to discard the previous results of
1178-
// iterating over the hosts pcie devices.
1179-
if !dpu_added {
1180-
dpus_explored_for_host.clear();
1182+
match classify_matched_dpu(&dpu_ep, &ep, mode_check) {
1183+
DiscoveredDpu::RunningAsDpu(dpu) => {
1184+
// The expected-machine fallback list is the source of
1185+
// truth here, so discard whatever the PCIe scan found
1186+
// on the first confirmed match.
1187+
if !dpu_added {
1188+
dpus_explored_for_host.clear();
1189+
}
1190+
dpu_added = true;
1191+
dpus_explored_for_host.push(dpu);
1192+
}
1193+
DiscoveredDpu::RunningAsNic => {
1194+
host_reported_dpus_nic_mode_total += 1;
1195+
}
1196+
DiscoveredDpu::NeedsReconfig => {
1197+
// `set_nic_mode` was just issued; the host needs a
1198+
// reset before this DPU re-reports in the new mode, so
1199+
// mark it not-yet-configured and let the reset path
1200+
// below run.
1201+
all_dpus_configured_properly_in_host = false;
1202+
}
1203+
DiscoveredDpu::ModeCheckFailed(err) => {
1204+
tracing::warn!(
1205+
dpu = %dpu_ep.address,
1206+
error = %err,
1207+
"failed to check fallback-matched DPU mode; skipping this device this pass",
1208+
);
1209+
}
11811210
}
1182-
1183-
dpu_added = true;
1184-
dpus_explored_for_host.push(ExploredDpu {
1185-
bmc_ip: dpu_ep.address,
1186-
host_pf_mac_address: get_host_pf_mac_address(&dpu_ep),
1187-
report: dpu_ep.report.into(),
1188-
});
11891211
}
11901212
}
11911213
}
@@ -1199,13 +1221,20 @@ impl SiteExplorer {
11991221
// confirmed to be running as plain NICs.
12001222
let expected_managed_dpus_total =
12011223
host_reported_dpus_total.saturating_sub(host_reported_dpus_nic_mode_total);
1202-
if expected_managed_dpus_total > 0 {
1203-
tracing::warn!(
1204-
address = %ep.address,
1205-
exploration_report = ?ep,
1206-
"cannot identify managed host because the site explorer has only discovered {} out of the {} attached DPUs (all_dpus_configured_properly_in_host={all_dpus_configured_properly_in_host}):\n{:#?}",
1207-
dpus_explored_for_host.len(), expected_managed_dpus_total, dpus_explored_for_host
1208-
);
1224+
// Enter the reset/wait path when DPUs are still expected to pair, or
1225+
// when a `set_nic_mode` was just issued -- a fallback-serial match can
1226+
// queue a flip even on a host whose BMC reports no DPU over PCIe
1227+
// (`expected_managed_dpus_total == 0`), which is the usual reason we are
1228+
// on the fallback path at all.
1229+
if expected_managed_dpus_total > 0 || !all_dpus_configured_properly_in_host {
1230+
if expected_managed_dpus_total > 0 {
1231+
tracing::warn!(
1232+
address = %ep.address,
1233+
exploration_report = ?ep,
1234+
"cannot identify managed host because the site explorer has only discovered {} out of the {} attached DPUs (all_dpus_configured_properly_in_host={all_dpus_configured_properly_in_host}):\n{:#?}",
1235+
dpus_explored_for_host.len(), expected_managed_dpus_total, dpus_explored_for_host
1236+
);
1237+
}
12091238

12101239
if !all_dpus_configured_properly_in_host {
12111240
// A queued `set_nic_mode` only takes effect after a host

crates/site-explorer/tests/site_explorer.rs

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2604,6 +2604,132 @@ async fn test_site_explorer_power_cycles_non_dell_host_to_apply_nic_mode(
26042604
Ok(())
26052605
}
26062606

2607+
/// Regression guard for the fallback-serial path (#2631): a DPU paired only
2608+
/// through `fallback_dpu_serial_numbers` must get the same NIC-mode enforcement
2609+
/// as a host-reported one. The host BMC here enumerates no DPU over PCIe -- the
2610+
/// usual reason the fallback exists (e.g. a GB200 that drops a DPU from its
2611+
/// inventory) -- so the only link is the operator-listed serial, and the DPU is
2612+
/// still reporting DPU mode against a `NicMode` host.
2613+
///
2614+
/// Before the fix the fallback path trusted the match as already-configured: it
2615+
/// attached the DPU without a mode check, then dropped it to zero-DPU, so the
2616+
/// host registered as a NIC-mode host while the BlueField stayed in DPU mode and
2617+
/// `set_nic_mode` was never issued. Now the flip is issued, the host is
2618+
/// power-cycled to apply it, and the host waits instead of settling this pass.
2619+
#[sqlx_test]
2620+
async fn test_site_explorer_enforces_nic_mode_on_fallback_serial_match(
2621+
pool: PgPool,
2622+
) -> Result<(), Box<dyn std::error::Error>> {
2623+
use model::expected_machine::{DpuMode, ExpectedMachine, ExpectedMachineData};
2624+
use model::site_explorer::NicMode;
2625+
2626+
let env = Env::new(pool).await;
2627+
2628+
const FALLBACK_DPU_SERIAL: &str = "fallback-only-dpu-serial";
2629+
// DPU reports DPU mode; the host report carries no DPU device, so the
2630+
// serial is the only thing that can pair them.
2631+
let dpu_config = DpuConfig {
2632+
nic_mode: Some(NicMode::Dpu),
2633+
serial: FALLBACK_DPU_SERIAL.to_string(),
2634+
..DpuConfig::default()
2635+
};
2636+
let mock_host = ManagedHostConfig::default();
2637+
let host_bmc_mac = mock_host.bmc_mac_address;
2638+
2639+
// Operator declares the host NIC mode and lists the DPU's serial as a
2640+
// pairing fallback.
2641+
let mut txn = env.pool.begin().await?;
2642+
db::expected_machine::create(
2643+
&mut txn,
2644+
ExpectedMachine {
2645+
id: None,
2646+
bmc_mac_address: host_bmc_mac,
2647+
data: ExpectedMachineData {
2648+
bmc_username: "ADMIN".to_string(),
2649+
bmc_password: "PASS".to_string(),
2650+
serial_number: "EM-2631-FALLBACK-NIC".to_string(),
2651+
metadata: model::metadata::Metadata::new_with_default_name(),
2652+
dpu_mode: DpuMode::NicMode,
2653+
fallback_dpu_serial_numbers: vec![FALLBACK_DPU_SERIAL.to_string()],
2654+
..Default::default()
2655+
},
2656+
},
2657+
)
2658+
.await?;
2659+
txn.commit().await?;
2660+
2661+
let mut host_bmc = env.new_machine(&host_bmc_mac.to_string(), "SomeVendor");
2662+
let mut dpu_bmc = env.new_machine(&dpu_config.bmc_mac_address.to_string(), "NVIDIA/BF/BMC");
2663+
host_bmc.discover_dhcp(env.api()).await?;
2664+
dpu_bmc.discover_dhcp(env.api()).await?;
2665+
2666+
let explorer_config = SiteExplorerConfig {
2667+
enabled: Arc::new(true.into()),
2668+
retained_boot_interface_window: None,
2669+
explorations_per_run: 10,
2670+
concurrent_explorations: 1,
2671+
run_interval: std::time::Duration::from_secs(1),
2672+
create_machines: Arc::new(true.into()),
2673+
..Default::default()
2674+
};
2675+
let explorer = env.test_site_explorer(explorer_config);
2676+
explorer.insert_endpoint_results(vec![
2677+
(dpu_bmc.ip.parse().unwrap(), Ok(dpu_config.clone().into())),
2678+
(host_bmc.ip.parse().unwrap(), Ok(mock_host.into())),
2679+
]);
2680+
2681+
// First iteration: initial endpoint exploration.
2682+
explorer.run_single_iteration().await.unwrap();
2683+
let mut txn = env.pool.begin().await?;
2684+
for ip in [host_bmc.ip.parse()?, dpu_bmc.ip.parse()?] {
2685+
db::explored_endpoints::set_preingestion_complete(ip, &mut txn).await?;
2686+
}
2687+
txn.commit().await?;
2688+
// Second iteration: per-host matching falls through to the fallback-serial
2689+
// path, which must enforce the declared NIC mode.
2690+
explorer.run_single_iteration().await.unwrap();
2691+
2692+
{
2693+
let calls = explorer
2694+
.endpoint_explorer()
2695+
.set_nic_mode_calls
2696+
.lock()
2697+
.unwrap();
2698+
assert!(
2699+
calls.iter().any(|(_, mode)| *mode == NicMode::Nic),
2700+
"fallback-matched DPU on a NicMode host should get set_nic_mode(Nic); calls so far: {calls:?}"
2701+
);
2702+
}
2703+
2704+
// The host must not settle as a zero-DPU managed host until the flip has
2705+
// applied -- otherwise the database reads "NIC-mode host" while the
2706+
// BlueField is still physically in DPU mode.
2707+
let explored_managed_hosts = db::explored_managed_host::find_all(&env.pool).await?;
2708+
assert!(
2709+
explored_managed_hosts.is_empty(),
2710+
"host should wait for the queued NIC-mode flip to apply, not register as zero-DPU this pass"
2711+
);
2712+
2713+
// The reset path fires even though the host BMC never enumerated the DPU
2714+
// over PCIe (`expected_managed_dpus_total == 0`), so the queued flip can
2715+
// actually apply.
2716+
{
2717+
let power_calls = explorer
2718+
.endpoint_explorer()
2719+
.redfish_power_control_calls
2720+
.lock()
2721+
.unwrap();
2722+
assert!(
2723+
power_calls
2724+
.iter()
2725+
.any(|(_, action)| matches!(action, libredfish::SystemPowerControl::PowerCycle)),
2726+
"host should be power-cycled to apply the queued NIC-mode flip; power calls so far: {power_calls:?}"
2727+
);
2728+
}
2729+
2730+
Ok(())
2731+
}
2732+
26072733
/// A managed host's DPU-facing `machine_interface` is created (via DHCP) with
26082734
/// just a MAC and no `boot_interface_id`. The exploration that ingests the host
26092735
/// then backfills the vendor-specific Redfish interface id onto that row, matched

0 commit comments

Comments
 (0)