Update prometheus test to check kv_cache_utilization instead of cache_config_info

BenjaminBraunDev · BenjaminBraunDev · commit 008f9ab8ccdd · 2026-04-15T23:24:16.000Z
Signed-off-by: BenjaminBraunDev &lt;BenjaminBraunDev@users.noreply.github.com&gt;
diff --git a/tests/unittest/llmapi/apps/_test_openai_prometheus.py b/tests/unittest/llmapi/apps/_test_openai_prometheus.py
@@ -125,31 +125,28 @@ def _parse_all_kv_metrics(data: str, prefix: str) -> Dict[str, float | None]:
     return {name: _parse_prometheus_sample(data, name) for name in names}
 
 
-def test_cache_config_available_before_first_request(
+def test_kv_cache_metrics_available_before_first_request(
         server: RemoteOpenAIServer):
-    """Verify that trtllm_cache_config_info is available at startup, before
-    any inference request.  External scrapers (e.g. the Kubernetes Inference
-    Gateway EPP) rely on this metric for routing decisions."""
+    """Verify that KV cache metrics are available at startup, before any
+    inference request.  External scrapers (e.g. the Kubernetes Inference
+    Gateway EPP) rely on these metrics for routing decisions."""
     metric_prefix = "trtllm_"
     max_wait_time = 10.0
     poll_interval = 0.5
     start_time = time.time()
-    cache_config_found = False
+    metrics_found = False
 
     while time.time() - start_time < max_wait_time:
         response = urlopen(f'{server.url_root}/prometheus/metrics')
         assert response.status == 200
         data = response.read().decode("utf-8")
-        if metric_prefix + "cache_config_info" in data:
-            cache_config_found = True
-            # Verify the label values are populated and sensible
-            assert 'block_size="' in data
-            assert 'num_gpu_blocks="' in data
+        if metric_prefix + "kv_cache_utilization" in data:
+            metrics_found = True
             break
         time.sleep(poll_interval)
 
-    assert cache_config_found, \
-        (f"{metric_prefix}cache_config_info not found in /prometheus/metrics "
+    assert metrics_found, \
+        (f"{metric_prefix}kv_cache_utilization not found in /prometheus/metrics "
          f"after {max_wait_time}s — it should be available before any request")