Skip to content

Commit 2221254

Browse files
committed
feat: add quota monitoring metrics
Signed-off-by: Major Hayden <major@redhat.com>
1 parent ca125c4 commit 2221254

5 files changed

Lines changed: 133 additions & 7 deletions

File tree

src/app/endpoints/responses.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import asyncio
66
import json
7+
import time
78
from collections.abc import AsyncIterator
89
from datetime import UTC, datetime
910
from typing import Annotated, Any, Final, Optional, cast
@@ -39,6 +40,7 @@
3940
from configuration import configuration
4041
from constants import SUBSTITUTED_INSTRUCTIONS_PLACEHOLDER
4142
from log import get_logger
43+
from metrics import recording
4244
from models.config import Action
4345
from models.requests import ResponsesRequest
4446
from models.responses import (
@@ -136,6 +138,27 @@ def _get_user_agent(request: Request) -> Optional[str]:
136138
return sanitized or None
137139

138140

141+
def _check_response_quota(user_id: str, endpoint_path: str) -> None:
142+
"""Check response quota availability and record bounded quota metrics."""
143+
quota_start_time = time.monotonic()
144+
try:
145+
check_tokens_available(configuration.quota_limiters, user_id)
146+
except HTTPException:
147+
recording.record_quota_check(
148+
endpoint_path,
149+
"user_id",
150+
"failure",
151+
time.monotonic() - quota_start_time,
152+
)
153+
raise
154+
recording.record_quota_check(
155+
endpoint_path,
156+
"user_id",
157+
"success",
158+
time.monotonic() - quota_start_time,
159+
)
160+
161+
139162
responses_response: dict[int | str, dict[str, Any]] = {
140163
200: ResponsesResponse.openapi_response(),
141164
401: UnauthorizedResponse.openapi_response(
@@ -275,11 +298,12 @@ async def responses_endpoint_handler(
275298
started_at = datetime.now(UTC)
276299
rh_identity_context = get_rh_identity_context(request)
277300
user_id, _, _, token = auth
301+
endpoint_path = "/v1/responses"
278302

279303
await check_mcp_auth(configuration, mcp_headers, token, request.headers)
280304

281305
# Check token availability
282-
check_tokens_available(configuration.quota_limiters, user_id)
306+
_check_response_quota(user_id, endpoint_path)
283307

284308
# Enforce RBAC: optionally disallow overriding model in requests
285309
validate_model_provider_override(
@@ -331,7 +355,6 @@ async def responses_endpoint_handler(
331355
)
332356
attachments_text = extract_attachments_text(original_request.input)
333357

334-
endpoint_path = "/v1/responses"
335358
moderation_result = await run_shield_moderation(
336359
client,
337360
input_text + "\n\n" + attachments_text,

src/app/endpoints/rlsapi_v1.py

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -532,6 +532,36 @@ def _resolve_quota_subject(request: Request, auth: AuthTuple) -> Optional[str]:
532532
return system_id
533533

534534

535+
def _check_infer_quota(
536+
request: Request, auth: AuthTuple, endpoint_path: str
537+
) -> Optional[str]:
538+
"""Check infer quota availability and record bounded quota metrics."""
539+
quota_id = _resolve_quota_subject(request, auth)
540+
quota_type = configuration.rlsapi_v1.quota_subject or "disabled"
541+
if quota_id is None:
542+
recording.record_quota_check(endpoint_path, quota_type, "skipped", 0.0)
543+
return None
544+
545+
quota_start_time = time.monotonic()
546+
try:
547+
check_tokens_available(configuration.quota_limiters, quota_id)
548+
except HTTPException:
549+
recording.record_quota_check(
550+
endpoint_path,
551+
quota_type,
552+
"failure",
553+
time.monotonic() - quota_start_time,
554+
)
555+
raise
556+
recording.record_quota_check(
557+
endpoint_path,
558+
quota_type,
559+
"success",
560+
time.monotonic() - quota_start_time,
561+
)
562+
return quota_id
563+
564+
535565
def _build_infer_response(
536566
response_text: str,
537567
request_id: str,
@@ -669,12 +699,11 @@ async def infer_endpoint( # pylint: disable=R0914
669699
"""
670700
# Authentication enforced by get_auth_dependency(), authorization by @authorize decorator.
671701
check_configuration_loaded(configuration)
702+
endpoint_path = "/v1/infer"
672703

673704
# Quota enforcement: resolve subject and check availability before any work.
674705
# No-op when quota_subject is not configured or no quota limiters exist.
675-
quota_id = _resolve_quota_subject(request, auth)
676-
if quota_id is not None:
677-
check_tokens_available(configuration.quota_limiters, quota_id)
706+
quota_id = _check_infer_quota(request, auth, endpoint_path)
678707

679708
request_id = get_suid()
680709

@@ -685,8 +714,6 @@ async def infer_endpoint( # pylint: disable=R0914
685714
"Request %s: Combined input source length: %d", request_id, len(input_source)
686715
)
687716

688-
endpoint_path = "/v1/infer"
689-
690717
# Run shield moderation on user input before inference.
691718
# Uses all configured shields; no-op when no shields are registered.
692719
# Runs before model/tool discovery so blocked requests short-circuit

src/metrics/__init__.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,17 @@
5555
"LLM tokens received",
5656
["provider", "model", "endpoint"],
5757
)
58+
59+
# Counter to track pre-request quota checks by bounded quota category.
60+
quota_checks_total = Counter(
61+
"ls_quota_checks_total",
62+
"Quota availability checks",
63+
["endpoint", "quota_type", "result"],
64+
)
65+
66+
# Histogram to measure quota availability check latency by bounded quota category.
67+
quota_check_duration_seconds = Histogram(
68+
"ls_quota_check_duration_seconds",
69+
"Quota availability check duration",
70+
["endpoint", "quota_type", "result"],
71+
)

src/metrics/recording.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,3 +109,23 @@ def record_llm_token_usage(
109109
)
110110
except (AttributeError, TypeError, ValueError):
111111
logger.warning("Failed to update token metrics", exc_info=True)
112+
113+
114+
def record_quota_check(
115+
endpoint_path: str, quota_type: str, result: str, duration: float
116+
) -> None:
117+
"""Record a quota availability check.
118+
119+
Args:
120+
endpoint_path: API endpoint path for metric labeling.
121+
quota_type: Bounded quota category, not the subject identifier.
122+
result: Bounded result label, such as ``success``, ``skipped``, or ``failure``.
123+
duration: Quota check duration in seconds.
124+
"""
125+
try:
126+
metrics.quota_checks_total.labels(endpoint_path, quota_type, result).inc()
127+
metrics.quota_check_duration_seconds.labels(
128+
endpoint_path, quota_type, result
129+
).observe(duration)
130+
except (AttributeError, TypeError, ValueError):
131+
logger.warning("Failed to update quota check metrics", exc_info=True)

tests/unit/metrics/test_recording.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
"""Unit tests for Prometheus metric recording helpers."""
22

3+
from typing import Any
4+
5+
import pytest
36
from pytest_mock import MockerFixture
47

58
from metrics import recording
@@ -159,3 +162,42 @@ def test_record_llm_token_usage_logs_metric_errors(mocker: MockerFixture) -> Non
159162
mock_logger.warning.assert_called_once_with(
160163
"Failed to update token metrics", exc_info=True
161164
)
165+
166+
167+
@pytest.fixture(name="recording_logger")
168+
def recording_logger_fixture(mocker: MockerFixture) -> Any:
169+
"""Patch the metric recording logger for failure assertions."""
170+
return mocker.patch("metrics.recording.logger")
171+
172+
173+
@pytest.mark.parametrize("failing_metric", ["counter", "histogram"])
174+
def test_record_quota_check_updates_metrics_and_logs_errors(
175+
mocker: MockerFixture,
176+
recording_logger: Any,
177+
failing_metric: str,
178+
) -> None:
179+
"""Test quota helper counter and histogram updates plus both failure points."""
180+
mock_counter = mocker.patch("metrics.recording.metrics.quota_checks_total")
181+
mock_histogram = mocker.patch(
182+
"metrics.recording.metrics.quota_check_duration_seconds"
183+
)
184+
185+
recording.record_quota_check("/v1/infer", "org_id", "success", 0.75)
186+
187+
mock_counter.labels.assert_called_once_with("/v1/infer", "org_id", "success")
188+
mock_counter.labels.return_value.inc.assert_called_once()
189+
mock_histogram.labels.assert_called_once_with("/v1/infer", "org_id", "success")
190+
mock_histogram.labels.return_value.observe.assert_called_once_with(0.75)
191+
192+
mock_counter.reset_mock()
193+
mock_histogram.reset_mock()
194+
if failing_metric == "counter":
195+
mock_counter.labels.return_value.inc.side_effect = TypeError("bad")
196+
else:
197+
mock_histogram.labels.return_value.observe.side_effect = TypeError("bad")
198+
199+
recording.record_quota_check("/v1/infer", "org_id", "failure", 0.75)
200+
201+
recording_logger.warning.assert_called_once_with(
202+
"Failed to update quota check metrics", exc_info=True
203+
)

0 commit comments

Comments
 (0)