|
13 | 13 | use axum::{response::IntoResponse, routing::get, Router}; |
14 | 14 | use lazy_static::lazy_static; |
15 | 15 | use prometheus::{ |
16 | | - register_counter, register_counter_vec, register_histogram, Counter, CounterVec, Encoder, |
17 | | - Histogram, TextEncoder, |
| 16 | + register_counter, register_counter_vec, register_gauge, register_gauge_vec, |
| 17 | + register_histogram, register_histogram_vec, Counter, CounterVec, Encoder, Gauge, GaugeVec, |
| 18 | + Histogram, HistogramVec, TextEncoder, |
18 | 19 | }; |
19 | 20 | use std::sync::atomic::{AtomicU64, Ordering}; |
20 | 21 | use std::time::Duration; |
@@ -87,13 +88,89 @@ lazy_static! { |
87 | 88 | .expect("Failed to register job_query_duration_seconds histogram"); |
88 | 89 |
|
89 | 90 | // Cluster feed metrics from cluster.rs |
90 | | - pub static ref CLUSTER_ROUND_TRIP_SECONDS: Histogram = register_histogram!( |
| 91 | + // |
| 92 | + // Labeled by `cluster_type` (alloc / manual / hostname / hardware) so the |
| 93 | + // round-trip tail can be attributed to a tag class. A large fan-out of |
| 94 | + // chunked manual-tag clusters lengthens the round-robin lap and shows up |
| 95 | + // here as a worse tail on the `manual` series than on `alloc`. The label is |
| 96 | + // bounded to the four `TagType` variants — never per-tag, which would blow |
| 97 | + // up cardinality on farms with thousands of tags. |
| 98 | + pub static ref CLUSTER_ROUND_TRIP_SECONDS: HistogramVec = register_histogram_vec!( |
91 | 99 | "scheduler_cluster_round_trip_seconds", |
92 | | - "Time between successive emissions of the same active (non-sleeping) cluster", |
| 100 | + "Time between successive emissions of the same active (non-sleeping) cluster, by cluster type", |
| 101 | + &["cluster_type"], |
93 | 102 | vec![0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0, 60.0, 120.0] |
94 | 103 | ) |
95 | 104 | .expect("Failed to register cluster_round_trip_seconds histogram"); |
96 | 105 |
|
| 106 | + // Size of the live cluster set, by tag class, sampled once per round-robin |
| 107 | + // lap by the feed producer. This is the fan-out magnitude: with |
| 108 | + // `manual_tags_chunk_size = N` and T manual tags, the `manual` series is |
| 109 | + // ~T/N. A large value here is the prime suspect for slow per-cluster |
| 110 | + // revisits (each cluster only gets a turn once per full lap). |
| 111 | + pub static ref CLUSTERS_TOTAL: GaugeVec = register_gauge_vec!( |
| 112 | + "scheduler_clusters_total", |
| 113 | + "Number of clusters in the live feed set, by cluster type", |
| 114 | + &["cluster_type"] |
| 115 | + ) |
| 116 | + .expect("Failed to register clusters_total gauge"); |
| 117 | + |
| 118 | + // Clusters currently sleeping (skipped this lap), sampled once per lap. |
| 119 | + // Sleeping clusters are invisible to dispatch until their backoff expires |
| 120 | + // (`cluster_empty_sleep` / `cluster_saturated_sleep`). A high value relative |
| 121 | + // to CLUSTERS_TOTAL means most of the set is backed off at any instant. |
| 122 | + pub static ref CLUSTERS_SLEEPING: Gauge = register_gauge!( |
| 123 | + "scheduler_clusters_sleeping", |
| 124 | + "Number of clusters currently sleeping (skipped each lap)" |
| 125 | + ) |
| 126 | + .expect("Failed to register clusters_sleeping gauge"); |
| 127 | + |
| 128 | + // Frames booked in a single cluster pass (one emission off the feed), |
| 129 | + // labeled by `cluster_type`. If this pins at the per-pass ceiling |
| 130 | + // (`max_jobs_per_cluster_pass` x `dispatch_frames_per_layer_limit`) while a |
| 131 | + // backlog persists, throughput-per-turn x infrequent-turns explains a |
| 132 | + // growing queue: the farm has capacity but each cluster only drains a |
| 133 | + // bounded slice per visit. |
| 134 | + pub static ref FRAMES_DISPATCHED_PER_PASS: HistogramVec = register_histogram_vec!( |
| 135 | + "scheduler_frames_dispatched_per_pass", |
| 136 | + "Frames dispatched in a single cluster pass, by cluster type", |
| 137 | + &["cluster_type"], |
| 138 | + vec![0.0, 1.0, 5.0, 10.0, 20.0, 50.0, 100.0, 250.0, 500.0] |
| 139 | + ) |
| 140 | + .expect("Failed to register frames_dispatched_per_pass histogram"); |
| 141 | + |
| 142 | + // Why each cluster pass ended, labeled by `reason`: |
| 143 | + // booked - placed at least one frame |
| 144 | + // saturated - jobs were pending but nothing fit (farm full or gated) |
| 145 | + // no_jobs - the job query returned nothing eligible |
| 146 | + // query_error - the job query failed (pass backed off and will retry) |
| 147 | + // `saturated` dominating while hosts sit idle points at matching/tagging, |
| 148 | + // not capacity. `no_jobs` dominating means the cluster set is mostly empty |
| 149 | + // churn and the round-robin lap is paying for clusters with no work. |
| 150 | + pub static ref PASS_TERMINATED_REASON_TOTAL: CounterVec = register_counter_vec!( |
| 151 | + "scheduler_pass_terminated_reason_total", |
| 152 | + "Cluster passes by terminal reason", |
| 153 | + &["reason"] |
| 154 | + ) |
| 155 | + .expect("Failed to register pass_terminated_reason_total counter"); |
| 156 | + |
| 157 | + // Outcome of each host-checkout attempt in the matcher loop, labeled by |
| 158 | + // `outcome`: |
| 159 | + // booked - a host was checked out and the frame dispatched |
| 160 | + // no_match - check_out returned NoCandidateAvailable |
| 161 | + // dispatch_error - host checked out but the dispatch transaction failed |
| 162 | + // The reported "hosts available but not booked" symptom shows up as a high |
| 163 | + // `no_match` rate; cross-reference with CLUSTERS_TOTAL / round-trip to tell |
| 164 | + // a genuine no-fit from a starved cluster that simply isn't getting visited. |
| 165 | + // A finer no_match breakdown (reserved / cas_lost / gate_rejected) lives |
| 166 | + // inside host_cache and is deferred to a later tier. |
| 167 | + pub static ref CHECKOUT_OUTCOME_TOTAL: CounterVec = register_counter_vec!( |
| 168 | + "scheduler_checkout_outcome_total", |
| 169 | + "Host checkout attempts by outcome", |
| 170 | + &["outcome"] |
| 171 | + ) |
| 172 | + .expect("Failed to register checkout_outcome_total counter"); |
| 173 | + |
97 | 174 | // E-PVM placement metrics from host_cache/cache.rs. Observed only on the |
98 | 175 | // Epvm path (Saturation always scores 0.0). Buckets are dimensionless W3 |
99 | 176 | // fractional-layer-frames units; calibration may need adjustment after |
@@ -255,10 +332,51 @@ pub fn observe_job_query_duration(duration: Duration) { |
255 | 332 | JOB_QUERY_DURATION_SECONDS.observe(duration.as_secs_f64()); |
256 | 333 | } |
257 | 334 |
|
258 | | -/// Helper function to observe cluster round-trip duration |
| 335 | +/// Helper function to observe cluster round-trip duration, labeled by cluster type. |
| 336 | +#[inline] |
| 337 | +pub fn observe_cluster_round_trip(cluster_type: &str, duration: Duration) { |
| 338 | + CLUSTER_ROUND_TRIP_SECONDS |
| 339 | + .with_label_values(&[cluster_type]) |
| 340 | + .observe(duration.as_secs_f64()); |
| 341 | +} |
| 342 | + |
| 343 | +/// Sets the live cluster-set size for a given cluster type. Sampled once per |
| 344 | +/// round-robin lap by the feed producer. |
| 345 | +#[inline] |
| 346 | +pub fn set_clusters_total(cluster_type: &str, count: i64) { |
| 347 | + CLUSTERS_TOTAL |
| 348 | + .with_label_values(&[cluster_type]) |
| 349 | + .set(count as f64); |
| 350 | +} |
| 351 | + |
| 352 | +/// Sets the number of clusters currently sleeping. Sampled once per lap. |
| 353 | +#[inline] |
| 354 | +pub fn set_clusters_sleeping(count: i64) { |
| 355 | + CLUSTERS_SLEEPING.set(count as f64); |
| 356 | +} |
| 357 | + |
| 358 | +/// Records the number of frames booked in a single cluster pass. |
| 359 | +#[inline] |
| 360 | +pub fn observe_frames_dispatched_per_pass(cluster_type: &str, frames: usize) { |
| 361 | + FRAMES_DISPATCHED_PER_PASS |
| 362 | + .with_label_values(&[cluster_type]) |
| 363 | + .observe(frames as f64); |
| 364 | +} |
| 365 | + |
| 366 | +/// Records the terminal reason for a cluster pass |
| 367 | +/// (`booked` / `saturated` / `no_jobs` / `query_error`). |
| 368 | +#[inline] |
| 369 | +pub fn increment_pass_terminated_reason(reason: &str) { |
| 370 | + PASS_TERMINATED_REASON_TOTAL |
| 371 | + .with_label_values(&[reason]) |
| 372 | + .inc(); |
| 373 | +} |
| 374 | + |
| 375 | +/// Records the outcome of a single host-checkout attempt |
| 376 | +/// (`booked` / `no_match` / `dispatch_error`). |
259 | 377 | #[inline] |
260 | | -pub fn observe_cluster_round_trip(duration: Duration) { |
261 | | - CLUSTER_ROUND_TRIP_SECONDS.observe(duration.as_secs_f64()); |
| 378 | +pub fn increment_checkout_outcome(outcome: &str) { |
| 379 | + CHECKOUT_OUTCOME_TOTAL.with_label_values(&[outcome]).inc(); |
262 | 380 | } |
263 | 381 |
|
264 | 382 | /// Records the E-PVM score of the host returned by `check_out_best`. |
|
0 commit comments