Skip to content

Commit 042f398

Browse files
[internal-dns] register and publish ddmd in the switch zone
DDMD has always run in the switch zone alongside Dendrite, MGS, and MGD, but it was never registered in internal DNS, leaving no path for a cross-host consumer to discover it. This adds `ServiceName::Ddm`, plumbs `ddm_port` through the host-zone switch (RSS plan + reconfigurator DNS execution), threads an `Overridables::ddm_ports` map for the test suite, and lands a `DdmInstance` dropshot sim in test utils so that the test harness registers a real DDM port in DNS the same way it does for the other switch-zone services. We also drop the duplicate DDMD_PORT const in `ddm-admin-client` in favor of the canonical `omicron_common::address::DDMD_PORT`. Same-host callers continue to use `Client::localhost()`. This was extracted from the multicast PR (zl/multicast-mgd-ddm), which uses ddmd cross-host as the first DNS-resolved consumer, as Nexus is the consumer.
1 parent 61a6d60 commit 042f398

13 files changed

Lines changed: 222 additions & 14 deletions

File tree

clients/ddm-admin-client/src/lib.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ pub use ddm_admin_client::types;
1313

1414
use ddm_admin_client::Client as InnerClient;
1515
use either::Either;
16+
use omicron_common::address::DDMD_PORT;
1617
use oxnet::Ipv6Net;
1718
use sled_hardware_types::underlay::BOOTSTRAP_MASK;
1819
use sled_hardware_types::underlay::BOOTSTRAP_PREFIX;
@@ -26,9 +27,6 @@ use thiserror::Error;
2627

2728
use crate::types::EnableStatsRequest;
2829

29-
// TODO-cleanup Is it okay to hardcode this port number here?
30-
const DDMD_PORT: u16 = 8000;
31-
3230
#[derive(Debug, Error, SlogInlineError)]
3331
pub enum DdmError {
3432
#[error("Failed to construct an HTTP client:")]

internal-dns/types/src/config.rs

Lines changed: 69 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,7 @@ impl DnsConfigBuilder {
399399
dendrite_port: u16,
400400
mgs_port: u16,
401401
mgd_port: u16,
402+
ddm_port: u16,
402403
) -> anyhow::Result<()> {
403404
let zone = self.host_dendrite(sled_id, switch_zone_ip)?;
404405
self.service_backend_zone(ServiceName::Dendrite, &zone, dendrite_port)?;
@@ -407,7 +408,8 @@ impl DnsConfigBuilder {
407408
&zone,
408409
mgs_port,
409410
)?;
410-
self.service_backend_zone(ServiceName::Mgd, &zone, mgd_port)
411+
self.service_backend_zone(ServiceName::Mgd, &zone, mgd_port)?;
412+
self.service_backend_zone(ServiceName::Ddm, &zone, ddm_port)
411413
}
412414

413415
/// Higher-level shorthand for adding a Nexus zone with both its internal
@@ -731,7 +733,7 @@ impl DnsConfigBuilder {
731733

732734
#[cfg(test)]
733735
mod test {
734-
use super::{DnsConfigBuilder, Host, ServiceName};
736+
use super::{DnsConfigBuilder, DnsRecord, Host, ServiceName};
735737
use crate::{config::Zone, names::DNS_ZONE};
736738
use omicron_common::api::external::Generation;
737739
use omicron_uuid_kinds::{OmicronZoneUuid, SledUuid};
@@ -779,6 +781,8 @@ mod test {
779781
"_oximeter-reader._tcp",
780782
);
781783
assert_eq!(ServiceName::Dendrite.dns_name(), "_dendrite._tcp",);
784+
assert_eq!(ServiceName::Mgd.dns_name(), "_mgd._tcp",);
785+
assert_eq!(ServiceName::Ddm.dns_name(), "_ddm._tcp",);
782786
assert_eq!(
783787
ServiceName::CruciblePantry.dns_name(),
784788
"_crucible-pantry._tcp",
@@ -796,6 +800,69 @@ mod test {
796800
);
797801
}
798802

803+
#[test]
804+
fn host_zone_switch_publishes_all_services() {
805+
let sled_uuid: SledUuid =
806+
"001de000-51ed-4000-8000-000000000001".parse().unwrap();
807+
let switch_zone_ip = Ipv6Addr::new(0, 0, 0, 0, 0, 0, 0, 1);
808+
809+
// Use distinct port numbers so an arg-order swap in `host_zone_switch`
810+
// surfaces as a port mismatch on the affected service.
811+
let dendrite_port = 11;
812+
let mgs_port = 13;
813+
let mgd_port = 17;
814+
let ddm_port = 19;
815+
816+
let mut builder = DnsConfigBuilder::new();
817+
builder
818+
.host_zone_switch(
819+
sled_uuid,
820+
switch_zone_ip,
821+
dendrite_port,
822+
mgs_port,
823+
mgd_port,
824+
ddm_port,
825+
)
826+
.unwrap();
827+
828+
let config = builder.build_full_config_for_initial_generation();
829+
830+
let mut by_name: BTreeMap<&str, &[DnsRecord]> = BTreeMap::new();
831+
for zone in &config.zones {
832+
for (name, records) in &zone.records {
833+
by_name.insert(name.as_str(), records.as_slice());
834+
}
835+
}
836+
837+
for (expected_name, expected_port) in [
838+
("_dendrite._tcp", dendrite_port),
839+
("_mgs._tcp", mgs_port),
840+
("_mgd._tcp", mgd_port),
841+
("_ddm._tcp", ddm_port),
842+
] {
843+
let records = by_name.get(expected_name).unwrap_or_else(|| {
844+
panic!(
845+
"expected {expected_name} in published switch-zone \
846+
services; got {by_name:?}"
847+
)
848+
});
849+
let srv_port = records
850+
.iter()
851+
.find_map(|r| match r {
852+
DnsRecord::Srv(s) => Some(s.port),
853+
_ => None,
854+
})
855+
.unwrap_or_else(|| {
856+
panic!("no SRV record for {expected_name}: {records:?}")
857+
});
858+
859+
assert_eq!(
860+
srv_port, expected_port,
861+
"wrong SRV port for {expected_name}"
862+
);
863+
}
864+
}
865+
799866
#[test]
800867
fn display_hosts() {
801868
let sled_uuid = SledUuid::nil();

internal-dns/types/src/names.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ pub enum ServiceName {
7575
BoundaryNtp,
7676
InternalNtp,
7777
Mgd,
78+
Ddm,
7879
}
7980

8081
impl ServiceName {
@@ -116,6 +117,7 @@ impl ServiceName {
116117
ServiceName::BoundaryNtp => "boundary-ntp",
117118
ServiceName::InternalNtp => "internal-ntp",
118119
ServiceName::Mgd => "mgd",
120+
ServiceName::Ddm => "ddm",
119121
}
120122
}
121123

@@ -144,7 +146,8 @@ impl ServiceName {
144146
| ServiceName::CruciblePantry
145147
| ServiceName::BoundaryNtp
146148
| ServiceName::InternalNtp
147-
| ServiceName::Mgd => {
149+
| ServiceName::Mgd
150+
| ServiceName::Ddm => {
148151
format!("_{}._tcp", self.service_kind())
149152
}
150153
ServiceName::SledAgent(id) => {

nexus/reconfigurator/execution/src/dns.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -988,9 +988,8 @@ mod test {
988988
// the previous pass (i.e., that corresponds to an Omicron zone).
989989
//
990990
// There are some ServiceNames missing here because they are not part of
991-
// our representative config (e.g., ClickhouseKeeper) or they don't
992-
// currently have DNS record at all (e.g., SledAgent, Maghemite, Mgd,
993-
// Tfport).
991+
// our representative config (e.g., ClickhouseKeeper) or because they
992+
// do not currently have a DNS record at all (e.g., SledAgent).
994993
let mut srv_kinds_expected = BTreeSet::from([
995994
ServiceName::Clickhouse,
996995
ServiceName::ClickhouseNative,
@@ -1001,6 +1000,8 @@ mod test {
10011000
ServiceName::NexusLockstep,
10021001
ServiceName::Oximeter,
10031002
ServiceName::Dendrite,
1003+
ServiceName::Mgd,
1004+
ServiceName::Ddm,
10041005
ServiceName::CruciblePantry,
10051006
ServiceName::BoundaryNtp,
10061007
ServiceName::InternalNtp,

nexus/reconfigurator/execution/src/test_utils.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,10 +113,12 @@ pub fn overridables_for_test(
113113
let dendrite_port =
114114
cptestctx.dendrite.read().unwrap().get(&switch_slot).unwrap().port;
115115
let mgd_port = cptestctx.mgd.get(&switch_slot).unwrap().port;
116+
let ddm_port = cptestctx.ddm.get(&switch_slot).unwrap().port;
116117
overrides.override_switch_zone_ip(sled_id, ip);
117118
overrides.override_dendrite_port(sled_id, dendrite_port);
118119
overrides.override_mgs_port(sled_id, mgs_port);
119120
overrides.override_mgd_port(sled_id, mgd_port);
121+
overrides.override_ddm_port(sled_id, ddm_port);
120122
}
121123
overrides
122124
}

nexus/reconfigurator/planning/src/example.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1854,7 +1854,8 @@ mod tests {
18541854
| ServiceName::RepoDepot
18551855
| ServiceName::ManagementGatewayService
18561856
| ServiceName::Dendrite
1857-
| ServiceName::Mgd => {
1857+
| ServiceName::Mgd
1858+
| ServiceName::Ddm => {
18581859
out.insert(service, Ok(()));
18591860
}
18601861
// InternalNtp is too large to fit in a single DNS packet and

nexus/test-utils/src/nexus_test.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ pub struct ControlPlaneTestContext<N> {
117117
/// Ports of stopped dendrite instances (for use by start_dendrite)
118118
pub stopped_dendrite_ports: RwLock<HashMap<SwitchSlot, u16>>,
119119
pub mgd: HashMap<SwitchSlot, dev::maghemite::MgdInstance>,
120+
pub ddm: HashMap<SwitchSlot, dev::maghemite::DdmInstance>,
120121
pub external_dns_zone_name: String,
121122
pub external_dns: TransientDnsServer,
122123
pub internal_dns: TransientDnsServer,
@@ -320,6 +321,9 @@ impl<N: NexusServer> ControlPlaneTestContext<N> {
320321
for (_, mut mgd) in self.mgd {
321322
mgd.cleanup().await.unwrap();
322323
}
324+
for (_, mut ddm) in self.ddm {
325+
ddm.cleanup().await;
326+
}
323327
self.logctx.cleanup_successful();
324328
}
325329
}

nexus/test-utils/src/starter.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ pub struct ControlPlaneStarter<'a, N: NexusServer> {
146146
pub gateway: BTreeMap<SwitchSlot, GatewayTestContext>,
147147
pub dendrite: RwLock<HashMap<SwitchSlot, dev::dendrite::DendriteInstance>>,
148148
pub mgd: HashMap<SwitchSlot, dev::maghemite::MgdInstance>,
149+
pub ddm: HashMap<SwitchSlot, dev::maghemite::DdmInstance>,
149150

150151
// NOTE: Only exists after starting Nexus, until external Nexus is
151152
// initialized.
@@ -203,6 +204,7 @@ impl<'a, N: NexusServer> ControlPlaneStarter<'a, N> {
203204
gateway: BTreeMap::new(),
204205
dendrite: RwLock::new(HashMap::new()),
205206
mgd: HashMap::new(),
207+
ddm: HashMap::new(),
206208
nexus_internal: None,
207209
nexus_internal_addr: None,
208210
external_dns_zone_name: None,
@@ -461,6 +463,17 @@ impl<'a, N: NexusServer> ControlPlaneStarter<'a, N> {
461463
self.config.pkg.mgd.insert(switch_slot, config);
462464
}
463465

466+
pub async fn start_ddm(&mut self, switch_slot: SwitchSlot) {
467+
let log = &self.logctx.log;
468+
debug!(log, "Starting DDM sim"; "switch_slot" => ?switch_slot);
469+
470+
let ddm = dev::maghemite::DdmInstance::start().await.unwrap();
471+
let port = ddm.port;
472+
self.ddm.insert(switch_slot, ddm);
473+
474+
debug!(log, "DDM sim started"; "port" => port);
475+
}
476+
464477
pub async fn record_switch_dns(
465478
&mut self,
466479
sled_id: SledUuid,
@@ -482,6 +495,7 @@ impl<'a, N: NexusServer> ControlPlaneStarter<'a, N> {
482495
self.dendrite.read().unwrap().get(&switch_slot).unwrap().port,
483496
self.gateway.get(&switch_slot).unwrap().port,
484497
self.mgd.get(&switch_slot).unwrap().port,
498+
self.ddm.get(&switch_slot).unwrap().port,
485499
)
486500
.unwrap()
487501
}
@@ -1250,6 +1264,7 @@ impl<'a, N: NexusServer> ControlPlaneStarter<'a, N> {
12501264
dendrite: RwLock::new(self.dendrite.into_inner().unwrap()),
12511265
stopped_dendrite_ports: RwLock::new(HashMap::new()),
12521266
mgd: self.mgd,
1267+
ddm: self.ddm,
12531268
external_dns_zone_name: self.external_dns_zone_name.unwrap(),
12541269
external_dns: self.external_dns.unwrap(),
12551270
internal_dns: self.internal_dns.unwrap(),
@@ -1291,6 +1306,9 @@ impl<'a, N: NexusServer> ControlPlaneStarter<'a, N> {
12911306
for (_, mut mgd) in self.mgd {
12921307
mgd.cleanup().await.unwrap();
12931308
}
1309+
for (_, mut ddm) in self.ddm {
1310+
ddm.cleanup().await;
1311+
}
12941312
self.logctx.cleanup_successful();
12951313
}
12961314

@@ -1631,6 +1649,12 @@ pub(crate) async fn setup_with_config_impl<N: NexusServer>(
16311649
builder.start_mgd(SwitchSlot::Switch0).boxed()
16321650
}),
16331651
),
1652+
(
1653+
"start_ddm_switch0",
1654+
Box::new(|builder| {
1655+
builder.start_ddm(SwitchSlot::Switch0).boxed()
1656+
}),
1657+
),
16341658
(
16351659
"record_switch_dns",
16361660
Box::new(|builder| {
@@ -1675,6 +1699,12 @@ pub(crate) async fn setup_with_config_impl<N: NexusServer>(
16751699
builder.start_mgd(SwitchSlot::Switch1).boxed()
16761700
}),
16771701
),
1702+
(
1703+
"start_ddm_switch1",
1704+
Box::new(|builder| {
1705+
builder.start_ddm(SwitchSlot::Switch1).boxed()
1706+
}),
1707+
),
16781708
(
16791709
"record_switch_dns",
16801710
Box::new(|builder| {

nexus/tests/integration_tests/initialization.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,11 @@ async fn test_nexus_boots_before_dendrite() {
158158
starter.start_mgd(SwitchSlot::Switch1).await;
159159
info!(log, "Started mgd");
160160

161+
info!(log, "Starting ddm");
162+
starter.start_ddm(SwitchSlot::Switch0).await;
163+
starter.start_ddm(SwitchSlot::Switch1).await;
164+
info!(log, "Started ddm");
165+
161166
info!(log, "Populating internal DNS records");
162167
starter
163168
.record_switch_dns(
@@ -197,6 +202,8 @@ async fn nexus_schema_test_setup(
197202
starter.start_dendrite(SwitchSlot::Switch1).await;
198203
starter.start_mgd(SwitchSlot::Switch0).await;
199204
starter.start_mgd(SwitchSlot::Switch1).await;
205+
starter.start_ddm(SwitchSlot::Switch0).await;
206+
starter.start_ddm(SwitchSlot::Switch1).await;
200207
starter.populate_internal_dns().await;
201208
}
202209

nexus/types/src/deployment/execution/dns.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ pub fn blueprint_internal_dns_config(
158158
overrides.dendrite_port(scrimlet.id()),
159159
overrides.mgs_port(scrimlet.id()),
160160
overrides.mgd_port(scrimlet.id()),
161+
overrides.ddm_port(scrimlet.id()),
161162
)?;
162163
}
163164

0 commit comments

Comments
 (0)