@@ -107,13 +107,6 @@ class Metrics:
107107 scheduler_pending : Gauge
108108 scheduler_kv_live_bytes : Gauge
109109 scheduler_admission_total : Counter
110- # ADR 0007 §2.10 — cross-request KV reuse observability.
111- # Both ``path`` labels are first-class outcomes; neither is an
112- # "error" or "fallback" (per ADR 0007 §2.4.c).
113- path_selection_total : Counter
114- continuation_tokens_skipped_total : Counter
115- verifier_prefill_duration_seconds : Histogram
116- cache_invariant_violations_total : Counter
117110
118111 @classmethod
119112 def build (cls ) -> "Metrics" :
@@ -194,47 +187,6 @@ def build(cls) -> "Metrics":
194187 labelnames = ["result" ],
195188 registry = registry ,
196189 ),
197- path_selection_total = Counter (
198- "path_selection_total" ,
199- "Total path-selection decisions made by the verifier "
200- "for cross-request KV cache reuse (ADR 0007 §2.4). "
201- "Both 'continuation' and 'new_session' are first-class "
202- "first-class outcomes; neither is an 'error' or "
203- "'fallback' (§2.4.c). Healthy long-session agent "
204- "workloads see continuation rate >= 95%." ,
205- labelnames = ["path" ],
206- registry = registry ,
207- ),
208- continuation_tokens_skipped_total = Counter (
209- "continuation_tokens_skipped_total" ,
210- "Cumulative prompt tokens that the continuation path "
211- "did not need to re-prefill (ADR 0007 §2.10). Sums "
212- "ContinuationPlan.skip_n across every continuation-"
213- "path request the server has handled. The win." ,
214- registry = registry ,
215- ),
216- verifier_prefill_duration_seconds = Histogram (
217- "verifier_prefill_duration_seconds" ,
218- "Wall time of the prefill phase of a single request, "
219- "partitioned by path. Continuation-path histogram "
220- "centers around per-incremental-token cost; "
221- "new-session-path histogram tracks full-prefill cost "
222- "(O(history_length))." ,
223- labelnames = ["path" ],
224- buckets = (
225- 0.001 , 0.005 , 0.01 , 0.05 , 0.1 , 0.5 ,
226- 1.0 , 5.0 , 10.0 , 30.0 , 60.0 , 120.0 , 300.0 ,
227- ),
228- registry = registry ,
229- ),
230- cache_invariant_violations_total = Counter (
231- "cache_invariant_violations_total" ,
232- "Count of ADR 0007 §2.9 INV-1 / INV-2 detections at "
233- "runtime. Should always read 0; any non-zero value is "
234- "a critical operational alert (page on it)." ,
235- labelnames = ["kind" ],
236- registry = registry ,
237- ),
238190 )
239191
240192 # ------------------------------------------------------------------
@@ -255,32 +207,6 @@ def record_admission(self, *, admitted: bool) -> None:
255207 result = "admitted" if admitted else "rejected"
256208 ).inc ()
257209
258- def record_path_selection (self , * , path : str , tokens_skipped : int ,
259- prefill_duration_s : float ) -> None :
260- """Record one path-selection decision (ADR 0007 §2.10).
261-
262- ``path`` must be ``"continuation"`` or ``"new_session"``. The
263- method does not validate the label set explicitly because
264- prometheus-client's ``labels()`` already raises for unknown
265- labels; we want such a violation to surface loudly per the
266- no-silent-failure principle.
267- """
268- self .path_selection_total .labels (path = path ).inc ()
269- if tokens_skipped > 0 :
270- self .continuation_tokens_skipped_total .inc (tokens_skipped )
271- self .verifier_prefill_duration_seconds .labels (path = path ).observe (
272- float (prefill_duration_s )
273- )
274-
275- def record_cache_invariant_violation (self , * , kind : str ) -> None :
276- """Record an INV-1 or INV-2 detection (ADR 0007 §2.9).
277-
278- ``kind`` must be ``"inv1"`` or ``"inv2"``. Should never be
279- called in healthy operation; any increment of this counter
280- is a critical alert.
281- """
282- self .cache_invariant_violations_total .labels (kind = kind ).inc ()
283-
284210 def record_completion (self , * , finish_reason : str , n_tokens : int ,
285211 acceptance_rate : Optional [float ]) -> None :
286212 self .inference_completions_total .labels (
0 commit comments