2020//! call-site migration onto the typed ports proceeds family-by-family;
2121//! the bridge and the [`emit_*`] helpers are removed in the final
2222//! cleanup PR once every call site has moved.
23+ //!
24+ //! ## Metric naming
25+ //!
26+ //! The `AM_*` constants are the **full, literal Prometheus names** —
27+ //! the exact series names that appear in Prometheus / `VictoriaMetrics`.
28+ //! They bake in the suffix that the OTel→Prometheus translation would
29+ //! otherwise add: counters carry `_total`, and gauges / histograms that
30+ //! measure a quantity carry the unit word (`_seconds`, `_milliseconds`);
31+ //! gauges that are bare counts carry no suffix. No `.with_unit()` hint
32+ //! is set on any instrument. Because the name is already literal and
33+ //! unit-free at the instrument level, the rendered Prometheus name is
34+ //! identical whether the collector has `add_metric_suffixes` on or off
35+ //! (the exporter dedups an existing `_total` and adds no unit suffix
36+ //! when none is configured). The const VALUES equal the rendered name
37+ //! under the default `"am"` prefix, so the facade-bridge `match family`
38+ //! dispatch routes correctly.
2339
2440use std:: sync:: Arc ;
2541use std:: sync:: LazyLock ;
@@ -29,62 +45,62 @@ use modkit_macros::domain_model;
2945
3046// @cpt-begin:cpt-cf-account-management-dod-errors-observability-metric-catalog:p1:inst-dod-metric-catalog-constants
3147/// Dependency-call health: `IdP` / Resource Group / GTS / `AuthZ` outbound calls.
32- pub const AM_DEPENDENCY_HEALTH : & str = "am.dependency_health " ;
48+ pub const AM_DEPENDENCY_HEALTH : & str = "am_dependency_health_total " ;
3349
3450/// Tenant-metadata resolution operations and inheritance policy outcomes.
35- pub const AM_METADATA_RESOLUTION : & str = "am.metadata_resolution " ;
51+ pub const AM_METADATA_RESOLUTION : & str = "am_metadata_resolution_total " ;
3652
3753/// Root-tenant bootstrap lifecycle (phase transitions, IdP-wait timeouts).
38- pub const AM_BOOTSTRAP_LIFECYCLE : & str = "am.bootstrap_lifecycle " ;
54+ pub const AM_BOOTSTRAP_LIFECYCLE : & str = "am_bootstrap_lifecycle_total " ;
3955
4056/// Provisioning reaper / hard-delete / deprovision background job telemetry.
41- pub const AM_TENANT_RETENTION : & str = "am.tenant_retention " ;
57+ pub const AM_TENANT_RETENTION : & str = "am_tenant_retention_total " ;
4258
4359/// Invalid retention-window configuration encountered while evaluating due-ness.
44- pub const AM_RETENTION_INVALID_WINDOW : & str = "am.retention.invalid_window " ;
60+ pub const AM_RETENTION_INVALID_WINDOW : & str = "am_retention_invalid_window_total " ;
4561
4662/// Mode-conversion request transitions and outcomes.
47- pub const AM_CONVERSION_LIFECYCLE : & str = "am.conversion_lifecycle " ;
63+ pub const AM_CONVERSION_LIFECYCLE : & str = "am_conversion_lifecycle_total " ;
4864
4965/// Hierarchy-depth threshold exceedance (warning-band + hard-limit rejects).
50- pub const AM_HIERARCHY_DEPTH_EXCEEDANCE : & str = "am.hierarchy_depth_exceedance " ;
66+ pub const AM_HIERARCHY_DEPTH_EXCEEDANCE : & str = "am_hierarchy_depth_exceedance_total " ;
5167
5268/// Cross-tenant denial counter (security-alert candidate family).
53- pub const AM_CROSS_TENANT_DENIAL : & str = "am.cross_tenant_denial " ;
69+ pub const AM_CROSS_TENANT_DENIAL : & str = "am_cross_tenant_denial_total " ;
5470
5571/// Hierarchy-integrity violation telemetry (one per integrity category).
56- pub const AM_HIERARCHY_INTEGRITY_VIOLATIONS : & str = "am.hierarchy_integrity_violations " ;
72+ pub const AM_HIERARCHY_INTEGRITY_VIOLATIONS : & str = "am_hierarchy_integrity_violations " ;
5773
5874/// Periodic integrity-check job tick outcome (`outcome` ∈ `completed` |
5975/// `skipped_in_progress` | `failed`). Distinguishes a clean tick from a
6076/// never-ran job, which [`AM_HIERARCHY_INTEGRITY_VIOLATIONS`] alone cannot.
61- pub const AM_HIERARCHY_INTEGRITY_RUNS : & str = "am.hierarchy_integrity_runs " ;
77+ pub const AM_HIERARCHY_INTEGRITY_RUNS : & str = "am_hierarchy_integrity_runs_total " ;
6278
6379/// Periodic auto-repair tick outcome — separate family from
6480/// [`AM_HIERARCHY_INTEGRITY_RUNS`] so its fixed-label set is not widened.
65- pub const AM_HIERARCHY_INTEGRITY_REPAIR_RUNS : & str = "am.hierarchy_integrity_repair_runs " ;
81+ pub const AM_HIERARCHY_INTEGRITY_REPAIR_RUNS : & str = "am_hierarchy_integrity_repair_runs_total " ;
6682
6783/// Periodic integrity-check tick wall-clock duration in milliseconds.
6884/// The `phase` label disaggregates the check phase (`phase = "check"`)
6985/// from the chained auto-repair phase (`phase = "repair"`) so
7086/// dashboards can tell a slow check from a slow check + repair.
7187/// Drives capacity-planning alerts ("p95 > 60s"), distinct from
7288/// [`AM_HIERARCHY_INTEGRITY_RUNS`] which is a tick-outcome counter.
73- pub const AM_HIERARCHY_INTEGRITY_DURATION : & str = "am.hierarchy_integrity_duration " ;
89+ pub const AM_HIERARCHY_INTEGRITY_DURATION : & str = "am_hierarchy_integrity_duration_milliseconds " ;
7490
7591/// Unix-epoch seconds of the last successful integrity-check tick.
7692/// Used for a freshness watchdog (alert when `last_success` is older
7793/// than twice the configured interval) that the violation gauge
7894/// cannot satisfy on its own — a stuck job and a perfectly-clean tree
7995/// look identical at the violation-gauge level until this gauge stops
8096/// advancing.
81- pub const AM_HIERARCHY_INTEGRITY_LAST_SUCCESS : & str = "am.hierarchy_integrity_last_success " ;
97+ pub const AM_HIERARCHY_INTEGRITY_LAST_SUCCESS : & str = "am_hierarchy_integrity_last_success_seconds " ;
8298
8399/// Unix-epoch seconds of the last failed integrity-check tick — paired
84100/// with [`AM_HIERARCHY_INTEGRITY_LAST_SUCCESS`] so operators can tell
85101/// "sustained failure" from "never ran" (the success gauge keeps the last
86102/// good timestamp indefinitely).
87- pub const AM_HIERARCHY_INTEGRITY_LAST_FAILURE : & str = "am.hierarchy_integrity_last_failure " ;
103+ pub const AM_HIERARCHY_INTEGRITY_LAST_FAILURE : & str = "am_hierarchy_integrity_last_failure_seconds " ;
88104
89105/// Lock-lifecycle event counter for `integrity_check_runs`. Emitted
90106/// from [`crate::infra::storage::integrity::lock::release`] when the
@@ -97,7 +113,7 @@ pub const AM_HIERARCHY_INTEGRITY_LAST_FAILURE: &str = "am.hierarchy_integrity_la
97113/// scheduler-tick outcome set) so dashboards keyed on
98114/// `RUNS{outcome=*}` stay stable; this counter exists for
99115/// lock-health alerting.
100- pub const AM_INTEGRITY_LOCK_EVENTS : & str = "am.integrity_lock_events " ;
116+ pub const AM_INTEGRITY_LOCK_EVENTS : & str = "am_integrity_lock_events_total " ;
101117
102118/// Hierarchy-integrity repair telemetry. Emits one gauge sample per
103119/// run with `category` ∈ all 10
@@ -107,13 +123,25 @@ pub const AM_INTEGRITY_LOCK_EVENTS: &str = "am.integrity_lock_events";
107123/// did not appear). The five derivable categories carry counts only
108124/// in `bucket = repaired`; the five operator-triage categories carry
109125/// counts only in `bucket = deferred`.
110- pub const AM_HIERARCHY_INTEGRITY_REPAIRED : & str = "am.hierarchy_integrity_repaired " ;
126+ pub const AM_HIERARCHY_INTEGRITY_REPAIRED : & str = "am_hierarchy_integrity_repaired " ;
111127
112128/// SERIALIZABLE-isolation retry telemetry for the AM repo's
113129/// `with_serializable_retry` helper.
114- pub const AM_SERIALIZABLE_RETRY : & str = "am.serializable_retry " ;
130+ pub const AM_SERIALIZABLE_RETRY : & str = "am_serializable_retry_total " ;
115131// @cpt-end:cpt-cf-account-management-dod-errors-observability-metric-catalog:p1:inst-dod-metric-catalog-constants
116132
133+ /// Live tenant inventory gauge: current tenant row count, broken down
134+ /// by `status` (provisioning | active | suspended | deleted) and
135+ /// `self_managed` (true | false). A bare-count gauge, so it carries no
136+ /// unit suffix. Refreshed each reaper tick.
137+ pub const AM_TENANTS : & str = "am_tenants" ;
138+
139+ /// Live `tenant_closure` table size gauge: total ancestor-descendant
140+ /// edge count. A bare-count gauge (no unit suffix), refreshed each
141+ /// reaper tick alongside [`AM_TENANTS`]. Grows ~O(tenants × depth);
142+ /// a divergence from that expectation flags closure bloat / stale edges.
143+ pub const AM_TENANT_CLOSURE_ROWS : & str = "am_tenant_closure_rows" ;
144+
117145/// Kinds of metric samples the emitter supports.
118146#[ domain_model]
119147#[ derive( Debug , Clone , Copy , PartialEq , Eq ) ]
0 commit comments