Skip to content

Commit 0d9b2d7

Browse files
authored
ls-apis needs to detect cycles in dependency unit graph (#9707)
1 parent 66df790 commit 0d9b2d7

7 files changed

Lines changed: 563 additions & 19 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dev-tools/ls-apis/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ cargo_metadata.workspace = true
1414
clap.workspace = true
1515
iddqd.workspace = true
1616
indent_write.workspace = true
17+
itertools.workspace = true
1718
newtype_derive.workspace = true
1819
parse-display.workspace = true
1920
petgraph.workspace = true

dev-tools/ls-apis/api-manifest.toml

Lines changed: 238 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -181,10 +181,18 @@ versioned_how_reason = "depends on itself (i.e., instances call each other)"
181181
client_package_name = "bootstrap-agent-lockstep-client"
182182
label = "Bootstrap Agent Lockstep API"
183183
server_package_name = "bootstrap-agent-lockstep-api"
184-
versioned_how = "server"
184+
versioned_how = "client"
185+
versioned_how_reason = "wicketd can wind up calling into sled agent on different sleds"
185186
notes = """
186-
Lockstep API for rack initialization. Only used by wicketd, which is in the \
187-
same deployment unit (Host OS) as the sled-agent that exposes this API.
187+
Lockstep API for rack initialization. Only used by wicketd. Generally
188+
speaking, Wicketd only talks to the sled agent on the same sled, which is in the
189+
same deployment unit as Wicketd itself (host OS). However, it may pick a
190+
different one if for some reason the local baseboard information is not
191+
available.
192+
193+
Either way, none of this matters for API versioning or software update because
194+
we expect that during rack initialization, all components are running the same
195+
system release.
188196
"""
189197

190198
[[apis]]
@@ -612,3 +620,230 @@ note = """
612620
Sled Agent uses the Crucible Agent client types only, and only in the simulated
613621
sled agent.
614622
"""
623+
624+
################################################################################
625+
# Intra-deployment-unit-only edges
626+
#
627+
# These edges are excluded from the deployment unit dependency graph because
628+
# they represent communication that only happens locally within a *single
629+
# instance* of a single deployment unit, not across deployment unit boundaries.
630+
# A single deployment unit is updated atomically, so there's no risk of version
631+
# skew with intra_deployment_unit-only edges.
632+
#
633+
# Note that things do not belong here when they cross different instances of the
634+
# same deployment unit. For example, Sled Agent and MGS are in the same
635+
# deployment unit (the host OS), but Sled Agent's use of MGS crosses different
636+
# instances of the deployment unit (i.e., different sleds). Those don't belong
637+
# in this category.
638+
################################################################################
639+
640+
[[intra_deployment_unit_only_edges]]
641+
server = "ddmd"
642+
client = "dpd-client"
643+
note = """
644+
ddmd runs in both the global zone and the switch zone. The switch zone is
645+
configured with dpd_host=[::1] (services.rs:3127). The global zone configures
646+
"dendrite" to false, meaning it does not expose a dendrite server
647+
(ddm/manifest.xml:25).
648+
"""
649+
permalinks = [
650+
"https://github.com/oxidecomputer/omicron/blob/6cef874/sled-agent/src/services.rs#L3127",
651+
"https://github.com/oxidecomputer/maghemite/blob/396bb3c/smf/ddm/manifest.xml#L25",
652+
]
653+
654+
[[intra_deployment_unit_only_edges]]
655+
server = "dpd"
656+
client = "gateway-client"
657+
note = """
658+
dpd defaults to [::1]:MGS_PORT (switch_identifiers.rs:60), and the SMF start
659+
script doesn't pass in --mgs-address. The comment also explains that it
660+
explicitly wants the local MGS and this can only be overridden for testing.
661+
"""
662+
permalinks = [
663+
"https://github.com/oxidecomputer/dendrite/blob/606c0be/dpd/src/switch_identifiers.rs#L60",
664+
"https://github.com/oxidecomputer/dendrite/blob/606c0be/dpd/misc/dpd.xml",
665+
"https://github.com/oxidecomputer/dendrite/blob/606c0be/tools/svc-dpd",
666+
]
667+
668+
[[intra_deployment_unit_only_edges]]
669+
server = "lldpd"
670+
client = "dpd-client"
671+
note = """
672+
lldpd defaults to localhost for dpd (dendrite.rs:194), and the SMF start
673+
script doesn't pass in --host.
674+
"""
675+
permalinks = [
676+
"https://github.com/oxidecomputer/lldp/blob/61479b6/lldpd/src/dendrite.rs#L194",
677+
"https://github.com/oxidecomputer/lldp/blob/61479b6/lldpd/misc/lldpd.xml",
678+
"https://github.com/oxidecomputer/lldp/blob/61479b6/lldpd/misc/svc-lldpd",
679+
]
680+
681+
[[intra_deployment_unit_only_edges]]
682+
server = "lldpd"
683+
client = "gateway-client"
684+
note = """
685+
lldpd defaults to localhost for gateway (main.rs:194), and the SMF start
686+
script doesn't override it.
687+
"""
688+
permalinks = [
689+
"https://github.com/oxidecomputer/lldp/blob/d22509dfdb051321b859e924948605115691b93c/lldpd/src/main.rs#L148-L154",
690+
"https://github.com/oxidecomputer/lldp/blob/d22509dfdb051321b859e924948605115691b93c/lldpd/misc/svc-lldpd",
691+
]
692+
693+
[[intra_deployment_unit_only_edges]]
694+
server = "mgd"
695+
client = "ddm-admin-client"
696+
note = "mg-lower hardcodes localhost:8000."
697+
permalinks = [
698+
"https://github.com/oxidecomputer/maghemite/blob/396bb3c/mg-lower/src/ddm.rs#L110",
699+
]
700+
701+
[[intra_deployment_unit_only_edges]]
702+
server = "mgd"
703+
client = "dpd-client"
704+
note = "mg-lower hardcodes localhost."
705+
permalinks = [
706+
"https://github.com/oxidecomputer/maghemite/blob/396bb3c/mg-lower/src/dendrite.rs#L491",
707+
]
708+
709+
[[intra_deployment_unit_only_edges]]
710+
server = "mgd"
711+
client = "gateway-client"
712+
note = """
713+
mgd defaults to [::1]:12225 (mgd main.rs:93), and the SMF start script
714+
doesn't set --mgs-addr.
715+
"""
716+
permalinks = [
717+
"https://github.com/oxidecomputer/maghemite/blob/396bb3c/mgd/src/main.rs#L93",
718+
"https://github.com/oxidecomputer/maghemite/blob/396bb3c/smf/mgd/manifest.xml",
719+
"https://github.com/oxidecomputer/maghemite/blob/396bb3c/smf/mgd_method_script.sh",
720+
]
721+
722+
[[intra_deployment_unit_only_edges]]
723+
server = "omicron-sled-agent"
724+
client = "ddm-admin-client"
725+
note = "Sled Agent uses Client::localhost() to connect to ddm."
726+
permalinks = [
727+
"https://github.com/oxidecomputer/omicron/blob/6cef874/sled-agent/src/ddm_reconciler.rs#L56",
728+
"https://github.com/oxidecomputer/omicron/blob/6cef874/sled-agent/src/bootstrap/bootstore_setup.rs#L92",
729+
"https://github.com/oxidecomputer/omicron/blob/6cef874/sled-agent/src/rack_setup/service.rs#L1086",
730+
"https://github.com/oxidecomputer/omicron/blob/6cef874/sled-agent/src/sled_agent.rs#L1378",
731+
]
732+
733+
[[intra_deployment_unit_only_edges]]
734+
server = "omicron-sled-agent"
735+
client = "mg-admin-client"
736+
note = """
737+
Sled Agent only queries the switch zone on the same sled,
738+
which is within the same deployment unit as the global
739+
zone.
740+
"""
741+
permalinks = [
742+
"https://github.com/oxidecomputer/omicron/blob/6cef874/sled-agent/src/bootstrap/early_networking.rs#L483",
743+
]
744+
745+
[[intra_deployment_unit_only_edges]]
746+
server = "omicron-sled-agent"
747+
client = "propolis-client"
748+
note = """
749+
Only queries the Propolis server on the same sled, so Propolis is in the
750+
same deployment unit.
751+
"""
752+
permalinks = [
753+
"https://github.com/oxidecomputer/omicron/blob/6cef874/sled-agent/src/instance.rs#L2283",
754+
]
755+
756+
# NOTE: The following sled-agent edges are bugs: they cross deployment units.
757+
# They are kept here to avoid breaking the build, but need to be fixed, either
758+
# through client-side versioning or by some other means.
759+
760+
[[intra_deployment_unit_only_edges]]
761+
server = "omicron-sled-agent"
762+
client = "gateway-client"
763+
note = """
764+
BUG: Sled Agent creates two MGS clients. One of them
765+
(early_networking.rs:376) queries the switch zone on
766+
the same sled, which is within the same deployment unit
767+
as the global zone, so this is okay.
768+
769+
The other one (early_networking.rs:277) queries both
770+
switch zones, so it crosses deployment units. This needs
771+
to be fixed.
772+
773+
Reference: https://github.com/oxidecomputer/omicron/issues/9708
774+
"""
775+
permalinks = [
776+
"https://github.com/oxidecomputer/omicron/blob/6cef874/sled-agent/src/bootstrap/early_networking.rs#L376",
777+
"https://github.com/oxidecomputer/omicron/blob/6cef874/sled-agent/src/bootstrap/early_networking.rs#L277-L280",
778+
]
779+
780+
[[intra_deployment_unit_only_edges]]
781+
server = "omicron-sled-agent"
782+
client = "dpd-client"
783+
note = """
784+
BUG: Sled Agent creates two DPD clients. One of them (early_networking.rs:419)
785+
is always to the switch zone on the same sled, which is in the same deployment
786+
unit.
787+
788+
The other one (services.rs:1090) queries both switch zones, so it crosses
789+
deployment units. This needs to be fixed.
790+
791+
Reference: https://github.com/oxidecomputer/omicron/issues/9708
792+
"""
793+
permalinks = [
794+
"https://github.com/oxidecomputer/omicron/blob/6cef874/sled-agent/src/bootstrap/early_networking.rs#L419",
795+
"https://github.com/oxidecomputer/omicron/blob/6cef874/sled-agent/src/services.rs#L1090",
796+
]
797+
798+
[[intra_deployment_unit_only_edges]]
799+
server = "tfportd"
800+
client = "dpd-client"
801+
note = """
802+
The tfportd service has the dpd_host SMF property (tfport.xml:52) set to [::1]
803+
(services.rs:2852). tfportd has a --dpd-host option (main.rs:81) but the SMF
804+
start script (svc-tfportd:12) does not use it.
805+
"""
806+
permalinks = [
807+
"https://github.com/oxidecomputer/omicron/blob/6cef874/sled-agent/src/services.rs#L2852",
808+
"https://github.com/oxidecomputer/dendrite/blob/606c0be/tfportd/misc/tfport.xml#L52-L53",
809+
"https://github.com/oxidecomputer/dendrite/blob/cf31be9/tfportd/src/main.rs#L81-L83",
810+
"https://github.com/oxidecomputer/dendrite/blob/606c0be/tools/svc-tfportd#L12",
811+
]
812+
813+
[[intra_deployment_unit_only_edges]]
814+
server = "tfportd"
815+
client = "lldpd-client"
816+
note = "tfportd hardcodes localhost for its lldpd-client (tfport.rs:88)."
817+
permalinks = [
818+
"https://github.com/oxidecomputer/dendrite/blob/606c0be/tfportd/src/tfport.rs#L88",
819+
]
820+
821+
[[intra_deployment_unit_only_edges]]
822+
server = "wicketd"
823+
client = "ddm-admin-client"
824+
note = "wicketd uses DdmAdminClient::localhost() (bootstrap_addrs.rs:162)."
825+
permalinks = [
826+
"https://github.com/oxidecomputer/omicron/blob/6cef874/wicketd/src/bootstrap_addrs.rs#L162",
827+
]
828+
829+
[[intra_deployment_unit_only_edges]]
830+
server = "wicketd"
831+
client = "dpd-client"
832+
note = "wicketd hardcodes [::1] (uplink.rs:83)."
833+
permalinks = [
834+
"https://github.com/oxidecomputer/omicron/blob/6cef874/wicketd/src/preflight_check/uplink.rs#L83",
835+
]
836+
837+
[[intra_deployment_unit_only_edges]]
838+
server = "wicketd"
839+
client = "gateway-client"
840+
note = """
841+
wicketd's mgs-address config (wicketd.rs:35) is always set to [::1]
842+
(services.rs:2586). This config is passed into wicketd at startup
843+
(manifest.xml:20).
844+
"""
845+
permalinks = [
846+
"https://github.com/oxidecomputer/omicron/blob/6cef874/sled-agent/src/services.rs#L2685-L2689",
847+
"https://github.com/oxidecomputer/omicron/blob/6cef874/wicketd/src/bin/wicketd.rs#L35",
848+
"https://github.com/oxidecomputer/omicron/blob/6cef874/smf/wicketd/manifest.xml#L20",
849+
]

dev-tools/ls-apis/src/api_metadata.rs

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ pub struct AllApiMetadata {
3030
deployment_units: BTreeMap<DeploymentUnitName, DeploymentUnitInfo>,
3131
dependency_rules: BTreeMap<ClientPackageName, Vec<DependencyFilterRule>>,
3232
ignored_non_clients: BTreeSet<ClientPackageName>,
33+
intra_deployment_unit_only_edges: Vec<IntraDeploymentUnitOnlyEdge>,
3334
}
3435

3536
impl AllApiMetadata {
@@ -73,6 +74,13 @@ impl AllApiMetadata {
7374
&self.ignored_non_clients
7475
}
7576

77+
/// Returns the list of intra-deployment-unit-only edges
78+
pub fn intra_deployment_unit_only_edges(
79+
&self,
80+
) -> &[IntraDeploymentUnitOnlyEdge] {
81+
&self.intra_deployment_unit_only_edges
82+
}
83+
7684
/// Returns how we should filter the given dependency
7785
pub(crate) fn evaluate_dependency(
7886
&self,
@@ -138,6 +146,7 @@ struct RawApiMetadata {
138146
deployment_units: Vec<DeploymentUnitInfo>,
139147
dependency_filter_rules: Vec<DependencyFilterRule>,
140148
ignored_non_clients: Vec<ClientPackageName>,
149+
intra_deployment_unit_only_edges: Vec<IntraDeploymentUnitOnlyEdge>,
141150
}
142151

143152
impl TryFrom<RawApiMetadata> for AllApiMetadata {
@@ -188,17 +197,41 @@ impl TryFrom<RawApiMetadata> for AllApiMetadata {
188197
for client_pkg in raw.ignored_non_clients {
189198
if !ignored_non_clients.insert(client_pkg.clone()) {
190199
bail!(
191-
"entry in ignored_non_clients appearead twice: {:?}",
200+
"entry in ignored_non_clients appeared twice: {:?}",
192201
&client_pkg
193202
);
194203
}
195204
}
196205

206+
// Validate that IDU-only edges reference only known server components
207+
// and APIs.
208+
let known_components: BTreeSet<_> =
209+
deployment_units.values().flat_map(|u| u.packages.iter()).collect();
210+
for edge in &raw.intra_deployment_unit_only_edges {
211+
if !known_components.contains(&edge.server) {
212+
bail!(
213+
"intra_deployment_unit_only_edges: \
214+
unknown server component {:?}",
215+
edge.server
216+
);
217+
}
218+
219+
if !apis.contains_key(&edge.client) {
220+
bail!(
221+
"intra_deployment_unit_only_edges: \
222+
unknown client {:?}",
223+
edge.client,
224+
);
225+
}
226+
}
227+
197228
Ok(AllApiMetadata {
198229
apis,
199230
deployment_units,
200231
dependency_rules,
201232
ignored_non_clients,
233+
intra_deployment_unit_only_edges: raw
234+
.intra_deployment_unit_only_edges,
202235
})
203236
}
204237
}
@@ -416,3 +449,30 @@ pub enum Evaluation {
416449
/// This dependency should be part of the update DAG
417450
Dag,
418451
}
452+
453+
/// An edge that should be excluded from the deployment unit dependency graph
454+
/// because it represents communication that only happens locally within a
455+
/// single instance of a single deployment unit.
456+
#[derive(Deserialize)]
457+
#[serde(deny_unknown_fields)]
458+
pub struct IntraDeploymentUnitOnlyEdge {
459+
/// The server component that consumes the API.
460+
pub server: ServerComponentName,
461+
/// The client package consumed.
462+
pub client: ClientPackageName,
463+
/// Explanation of why this edge is intra-deployment-unit-only.
464+
pub note: String,
465+
/// Permalinks to source code referenced by `note`
466+
pub permalinks: Vec<String>,
467+
}
468+
469+
impl IntraDeploymentUnitOnlyEdge {
470+
/// Returns true if this rule matches the given (server, client) pair.
471+
pub fn matches(
472+
&self,
473+
server: &ServerComponentName,
474+
client: &ClientPackageName,
475+
) -> bool {
476+
self.server == *server && self.client == *client
477+
}
478+
}

dev-tools/ls-apis/src/bin/ls-apis.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,14 @@ fn print_server_components<'a>(
252252
);
253253
}
254254
for (c, path) in apis.component_apis_consumed(s, filter)? {
255-
println!("{} consumes: {}", prefix, c);
255+
if apis.idu_only_edge_note(s, c).is_some() {
256+
println!(
257+
"{} consumes: {} (intra-deployment-unit-only)",
258+
prefix, c
259+
);
260+
} else {
261+
println!("{} consumes: {}", prefix, c);
262+
}
256263
if show_deps {
257264
for p in path.nodes() {
258265
println!("{} via: {}", prefix, p);

0 commit comments

Comments
 (0)