Skip to content

Commit 27816d5

Browse files
committed
refactor(metrics): centralize metric recording
Signed-off-by: Major Hayden <major@redhat.com>
1 parent c50425e commit 27816d5

12 files changed

Lines changed: 301 additions & 153 deletions

File tree

src/app/endpoints/rlsapi_v1.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,13 @@
1717
from openai._exceptions import APIStatusError as OpenAIAPIStatusError
1818

1919
import constants
20-
import metrics
2120
from authentication import get_auth_dependency
2221
from authentication.interface import AuthTuple
2322
from authorization.middleware import authorize
2423
from client import AsyncLlamaStackClientHolder
2524
from configuration import configuration
2625
from log import get_logger
26+
from metrics import recording
2727
from models.config import Action
2828
from models.responses import (
2929
UNAUTHORIZED_OPENAPI_EXAMPLES,
@@ -447,7 +447,7 @@ def _record_inference_failure( # pylint: disable=too-many-arguments,too-many-po
447447
The total inference time in seconds.
448448
"""
449449
inference_time = time.monotonic() - start_time
450-
metrics.llm_calls_failures_total.labels(provider, model).inc()
450+
recording.record_llm_failure(provider, model)
451451
_queue_splunk_event(
452452
background_tasks,
453453
infer_request,

src/app/endpoints/streaming_query.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@
4040
)
4141
from openai._exceptions import APIStatusError as OpenAIAPIStatusError
4242

43-
import metrics
4443
from authentication import get_auth_dependency
4544
from authentication.interface import AuthTuple
4645
from authorization.azure_token_manager import AzureEntraIDManager
@@ -59,6 +58,7 @@
5958
TOPIC_SUMMARY_INTERRUPT_TIMEOUT_SECONDS,
6059
)
6160
from log import get_logger
61+
from metrics import recording
6262
from models.config import Action
6363
from models.context import ResponseGeneratorContext
6464
from models.requests import QueryRequest
@@ -283,7 +283,7 @@ async def streaming_query_endpoint_handler( # pylint: disable=too-many-locals
283283
provider_id, model_id = extract_provider_and_model_from_model_id(
284284
responses_params.model
285285
)
286-
metrics.llm_calls_total.labels(provider_id, model_id).inc()
286+
recording.record_llm_call(provider_id, model_id)
287287

288288
generator, turn_summary = await retrieve_response_generator(
289289
responses_params=responses_params,

src/app/main.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
from starlette.routing import Mount, Route, WebSocketRoute
1313
from starlette.types import ASGIApp, Message, Receive, Scope, Send
1414

15-
import metrics
1615
import version
1716
from a2a_storage import A2AStorageFactory
1817
from app import routers
@@ -22,6 +21,7 @@
2221
from client import AsyncLlamaStackClientHolder
2322
from configuration import configuration
2423
from log import get_logger
24+
from metrics import recording
2525
from models.responses import InternalServerErrorResponse
2626
from sentry import initialize_sentry
2727
from utils.common import register_mcp_servers_async
@@ -182,12 +182,12 @@ async def send_wrapper(message: Message) -> None:
182182
# Measure duration and forward the request. Use try/finally so the
183183
# call counter is always incremented, even when the inner app raises.
184184
try:
185-
with metrics.response_duration_seconds.labels(path).time():
185+
with recording.measure_response_duration(path):
186186
await self.app(scope, receive, send_wrapper)
187187
finally:
188188
# Ignore /metrics endpoint that will be called periodically.
189189
if not path.endswith("/metrics"):
190-
metrics.rest_api_calls_total.labels(path, status_code).inc()
190+
recording.record_rest_api_call(path, status_code)
191191

192192

193193
class GlobalExceptionMiddleware: # pylint: disable=too-few-public-methods

src/metrics/__init__.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,14 +42,12 @@
4242
"ls_llm_validation_errors_total", "LLM validation errors"
4343
)
4444

45-
# TODO(lucasagomes): Add metric for token usage
46-
# https://issues.redhat.com/browse/LCORE-411
45+
# Metric that counts how many tokens were sent to LLMs
4746
llm_token_sent_total = Counter(
4847
"ls_llm_token_sent_total", "LLM tokens sent", ["provider", "model"]
4948
)
5049

51-
# TODO(lucasagomes): Add metric for token usage
52-
# https://issues.redhat.com/browse/LCORE-411
50+
# Metric that counts how many tokens were received from LLMs
5351
llm_token_received_total = Counter(
5452
"ls_llm_token_received_total", "LLM tokens received", ["provider", "model"]
5553
)

src/metrics/recording.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
"""Recording helpers for Prometheus metrics.
2+
3+
This module keeps metric definitions in ``metrics.__init__`` while providing a
4+
small facade for application code. New metrics should add a recording helper
5+
here so callers do not need to know Prometheus object details.
6+
"""
7+
8+
from collections.abc import Iterator
9+
from contextlib import contextmanager
10+
11+
import metrics
12+
from log import get_logger
13+
14+
logger = get_logger(__name__)
15+
16+
17+
@contextmanager
18+
def measure_response_duration(path: str) -> Iterator[None]:
19+
"""Measure REST API response duration for a route path.
20+
21+
Args:
22+
path: Normalized route path used as the metric label.
23+
"""
24+
with metrics.response_duration_seconds.labels(path).time():
25+
yield
26+
27+
28+
def record_rest_api_call(path: str, status_code: int) -> None:
29+
"""Record one REST API request.
30+
31+
Args:
32+
path: Normalized route path used as the metric label.
33+
status_code: HTTP response status code returned by the endpoint.
34+
"""
35+
metrics.rest_api_calls_total.labels(path, status_code).inc()
36+
37+
38+
def record_llm_call(provider: str, model: str) -> None:
39+
"""Record one LLM call for a provider and model.
40+
41+
Args:
42+
provider: LLM provider identifier.
43+
model: LLM model identifier without the provider prefix.
44+
"""
45+
try:
46+
metrics.llm_calls_total.labels(provider, model).inc()
47+
except (AttributeError, TypeError, ValueError):
48+
logger.warning("Failed to update LLM call metric", exc_info=True)
49+
50+
51+
def record_llm_failure(provider: str, model: str) -> None:
52+
"""Record one failed LLM call for a provider and model.
53+
54+
Args:
55+
provider: LLM provider identifier.
56+
model: LLM model identifier without the provider prefix.
57+
"""
58+
try:
59+
metrics.llm_calls_failures_total.labels(provider, model).inc()
60+
except (AttributeError, TypeError, ValueError):
61+
logger.warning("Failed to update LLM failure metric", exc_info=True)
62+
63+
64+
def record_llm_validation_error() -> None:
65+
"""Record one LLM validation error, such as a shield violation."""
66+
try:
67+
metrics.llm_calls_validation_errors_total.inc()
68+
except (AttributeError, TypeError, ValueError):
69+
logger.warning("Failed to update LLM validation error metric", exc_info=True)
70+
71+
72+
def record_llm_token_usage(
73+
provider: str,
74+
model: str,
75+
input_tokens: int,
76+
output_tokens: int,
77+
) -> None:
78+
"""Record LLM token usage for a provider and model.
79+
80+
Args:
81+
provider: LLM provider identifier.
82+
model: LLM model identifier without the provider prefix.
83+
input_tokens: Number of tokens sent to the LLM.
84+
output_tokens: Number of tokens received from the LLM.
85+
"""
86+
try:
87+
metrics.llm_token_sent_total.labels(provider, model).inc(input_tokens)
88+
metrics.llm_token_received_total.labels(provider, model).inc(output_tokens)
89+
except (AttributeError, TypeError, ValueError):
90+
logger.warning("Failed to update token metrics", exc_info=True)

src/utils/responses.py

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -86,11 +86,11 @@
8686
from llama_stack_client import APIConnectionError, APIStatusError, AsyncLlamaStackClient
8787

8888
import constants
89-
import metrics
9089
from client import AsyncLlamaStackClientHolder
9190
from configuration import configuration
9291
from constants import DEFAULT_RAG_TOOL
9392
from log import get_logger
93+
from metrics import recording
9494
from models.config import ByokRag
9595
from models.database.conversations import UserConversation
9696
from models.requests import QueryRequest
@@ -922,7 +922,7 @@ def extract_token_usage(usage: Optional[ResponseUsage], model: str) -> TokenCoun
922922
logger.debug(
923923
"No usage information in Responses API response, token counts will be 0"
924924
)
925-
_increment_llm_call_metric(provider_id, model_id)
925+
recording.record_llm_call(provider_id, model_id)
926926
return TokenCounter(llm_calls=1)
927927

928928
token_counter = TokenCounter(
@@ -934,18 +934,14 @@ def extract_token_usage(usage: Optional[ResponseUsage], model: str) -> TokenCoun
934934
token_counter.output_tokens,
935935
)
936936

937-
# Update Prometheus metrics only when we have actual usage data
938-
try:
939-
metrics.llm_token_sent_total.labels(provider_id, model_id).inc(
940-
token_counter.input_tokens
941-
)
942-
metrics.llm_token_received_total.labels(provider_id, model_id).inc(
943-
token_counter.output_tokens
944-
)
945-
except (AttributeError, TypeError, ValueError) as e:
946-
logger.warning("Failed to update token metrics: %s", e)
947-
948-
_increment_llm_call_metric(provider_id, model_id)
937+
# Update Prometheus metrics only when we have actual usage data.
938+
recording.record_llm_token_usage(
939+
provider_id,
940+
model_id,
941+
token_counter.input_tokens,
942+
token_counter.output_tokens,
943+
)
944+
recording.record_llm_call(provider_id, model_id)
949945
return token_counter
950946

951947

@@ -1251,14 +1247,6 @@ def extract_rag_chunks_from_file_search_item(
12511247
return rag_chunks
12521248

12531249

1254-
def _increment_llm_call_metric(provider: str, model: str) -> None:
1255-
"""Safely increment LLM call metric."""
1256-
try:
1257-
metrics.llm_calls_total.labels(provider, model).inc()
1258-
except (AttributeError, TypeError, ValueError) as e:
1259-
logger.warning("Failed to update LLM call metric: %s", e)
1260-
1261-
12621250
def parse_arguments_string(arguments_str: str) -> dict[str, Any]:
12631251
"""Parse an arguments string into a dictionary.
12641252

src/utils/shields.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@
1414
from llama_stack_client.types import ShieldListResponse
1515
from openai._exceptions import APIStatusError as OpenAIAPIStatusError
1616

17-
import metrics
1817
from configuration import AppConfig
1918
from constants import DEFAULT_VIOLATION_MESSAGE
2019
from log import get_logger
20+
from metrics import recording
2121
from models.requests import QueryRequest
2222
from models.responses import (
2323
InternalServerErrorResponse,
@@ -77,7 +77,7 @@ def detect_shield_violations(output_items: list[Any]) -> bool:
7777
refusal = getattr(output_item, "refusal", None)
7878
if refusal:
7979
# Metric for LLM validation errors (shield violations)
80-
metrics.llm_calls_validation_errors_total.inc()
80+
recording.record_llm_validation_error()
8181
logger.warning("Shield violation detected: %s", refusal)
8282
return True
8383
return False
@@ -178,7 +178,7 @@ async def run_shield_moderation(
178178

179179
if moderation_result.results and moderation_result.results[0].flagged:
180180
result = moderation_result.results[0]
181-
metrics.llm_calls_validation_errors_total.inc()
181+
recording.record_llm_validation_error()
182182
logger.warning(
183183
"Shield '%s' flagged content: categories=%s",
184184
shield.identifier,

tests/unit/app/endpoints/test_streaming_query.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,7 @@ async def test_successful_streaming_query(
389389
"app.endpoints.streaming_query.extract_provider_and_model_from_model_id",
390390
return_value=("provider1", "model1"),
391391
)
392-
mocker.patch("app.endpoints.streaming_query.metrics.llm_calls_total")
392+
mocker.patch("app.endpoints.streaming_query.recording.record_llm_call")
393393

394394
async def mock_generator() -> AsyncIterator[str]:
395395
yield "data: test\n\n"
@@ -476,7 +476,7 @@ async def test_streaming_query_text_media_type_header(
476476
"app.endpoints.streaming_query.extract_provider_and_model_from_model_id",
477477
return_value=("provider1", "model1"),
478478
)
479-
mocker.patch("app.endpoints.streaming_query.metrics.llm_calls_total")
479+
mocker.patch("app.endpoints.streaming_query.recording.record_llm_call")
480480

481481
async def mock_generator() -> AsyncIterator[str]:
482482
yield "data: test\n\n"
@@ -574,7 +574,7 @@ async def test_streaming_query_with_conversation(
574574
"app.endpoints.streaming_query.extract_provider_and_model_from_model_id",
575575
return_value=("provider1", "model1"),
576576
)
577-
mocker.patch("app.endpoints.streaming_query.metrics.llm_calls_total")
577+
mocker.patch("app.endpoints.streaming_query.recording.record_llm_call")
578578

579579
async def mock_generator() -> AsyncIterator[str]:
580580
yield "data: test\n\n"
@@ -670,7 +670,7 @@ async def test_streaming_query_with_attachments(
670670
"app.endpoints.streaming_query.extract_provider_and_model_from_model_id",
671671
return_value=("provider1", "model1"),
672672
)
673-
mocker.patch("app.endpoints.streaming_query.metrics.llm_calls_total")
673+
mocker.patch("app.endpoints.streaming_query.recording.record_llm_call")
674674

675675
async def mock_generator() -> AsyncIterator[str]:
676676
yield "data: test\n\n"
@@ -770,7 +770,7 @@ async def test_streaming_query_azure_token_refresh(
770770
"app.endpoints.streaming_query.run_shield_moderation",
771771
new=mocker.AsyncMock(return_value=ShieldModerationPassed()),
772772
)
773-
mocker.patch("app.endpoints.streaming_query.metrics.llm_calls_total")
773+
mocker.patch("app.endpoints.streaming_query.recording.record_llm_call")
774774

775775
async def mock_generator() -> AsyncIterator[str]:
776776
yield "data: test\n\n"

0 commit comments

Comments
 (0)