@@ -125,31 +125,28 @@ def _parse_all_kv_metrics(data: str, prefix: str) -> Dict[str, float | None]:
125125 return {name : _parse_prometheus_sample (data , name ) for name in names }
126126
127127
128- def test_cache_config_available_before_first_request (
128+ def test_kv_cache_metrics_available_before_first_request (
129129 server : RemoteOpenAIServer ):
130- """Verify that trtllm_cache_config_info is available at startup, before
131- any inference request. External scrapers (e.g. the Kubernetes Inference
132- Gateway EPP) rely on this metric for routing decisions."""
130+ """Verify that KV cache metrics are available at startup, before any
131+ inference request. External scrapers (e.g. the Kubernetes Inference
132+ Gateway EPP) rely on these metrics for routing decisions."""
133133 metric_prefix = "trtllm_"
134134 max_wait_time = 10.0
135135 poll_interval = 0.5
136136 start_time = time .time ()
137- cache_config_found = False
137+ metrics_found = False
138138
139139 while time .time () - start_time < max_wait_time :
140140 response = urlopen (f'{ server .url_root } /prometheus/metrics' )
141141 assert response .status == 200
142142 data = response .read ().decode ("utf-8" )
143- if metric_prefix + "cache_config_info" in data :
144- cache_config_found = True
145- # Verify the label values are populated and sensible
146- assert 'block_size="' in data
147- assert 'num_gpu_blocks="' in data
143+ if metric_prefix + "kv_cache_utilization" in data :
144+ metrics_found = True
148145 break
149146 time .sleep (poll_interval )
150147
151- assert cache_config_found , \
152- (f"{ metric_prefix } cache_config_info not found in /prometheus/metrics "
148+ assert metrics_found , \
149+ (f"{ metric_prefix } kv_cache_utilization not found in /prometheus/metrics "
153150 f"after { max_wait_time } s — it should be available before any request" )
154151
155152
0 commit comments