Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -10400,7 +10400,7 @@
"metrics"
],
"summary": "Metrics Endpoint Handler",
"description": "Handle request to the /metrics endpoint.\n\nProcess GET requests to the /metrics endpoint, returning the\nlatest Prometheus metrics in form of a plain text.\n\nInitializes model metrics on the first request if not already\nset up, then responds with the current metrics snapshot in\nPrometheus format.\n\n### Parameters:\n- request: The incoming HTTP request (used by middleware).\n- auth: Authentication tuple from the auth dependency (used by middleware).\n\n### Returns:\n- PlainTextResponse: Response body containing the Prometheus metrics text\n and the Prometheus content type.",
"description": "Handle request to the /metrics endpoint.\n\nProcess GET requests to the /metrics endpoint, returning the\nlatest Prometheus metrics in plain text Prometheus format.\n\n### Parameters:\n- request: The incoming HTTP request (used by middleware).\n- auth: Authentication tuple from the auth dependency (used by middleware).\n\n### Returns:\n- PlainTextResponse: Response body containing the Prometheus metrics text\n and the Prometheus content type.",

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

📐 Maintainability & Code Quality | 🟠 Major | ⚡ Quick win

Regenerate and commit OpenAPI schema to fix CI drift.

The Spectral job is failing because docs/openapi.json no longer matches the generated schema. Please regenerate with uv run python scripts/generate_openapi_schema.py docs/openapi.json and commit the updated artifact to keep docs and runtime schema in sync.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@docs/openapi.json` at line 10403, The OpenAPI schema artifact is out of sync
with the generated runtime schema, causing the Spectral CI drift failure.
Regenerate the schema using the existing generator workflow that updates
docs/openapi.json, then commit the refreshed artifact so it matches the current
API definition; the key target to verify is the metrics endpoint description in
the generated OpenAPI output.

Source: Pipeline failures

"operationId": "metrics_endpoint_handler_metrics_get",
"responses": {
"200": {
Expand Down
10 changes: 1 addition & 9 deletions src/app/endpoints/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from authentication import get_auth_dependency
from authentication.interface import AuthTuple
from authorization.middleware import authorize
from metrics.utils import setup_model_metrics
from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES
from models.api.responses.error import (
ForbiddenResponse,
Expand Down Expand Up @@ -47,11 +46,7 @@ async def metrics_endpoint_handler(
Handle request to the /metrics endpoint.

Process GET requests to the /metrics endpoint, returning the
latest Prometheus metrics in form of a plain text.

Initializes model metrics on the first request if not already
set up, then responds with the current metrics snapshot in
Prometheus format.
latest Prometheus metrics in plain text Prometheus format.

### Parameters:
- request: The incoming HTTP request (used by middleware).
Expand All @@ -67,7 +62,4 @@ async def metrics_endpoint_handler(
# Nothing interesting in the request
_ = request

# Setup the model metrics if not already done. This is a one-time setup
# and will not be run again on subsequent calls to this endpoint
await setup_model_metrics()
return PlainTextResponse(generate_latest(), media_type=str(CONTENT_TYPE_LATEST))
9 changes: 9 additions & 0 deletions src/app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from configuration import configuration
from log import get_logger
from metrics import recording
from metrics.utils import setup_model_metrics
from models.api.responses.error import InternalServerErrorResponse
from sentry import initialize_sentry
from utils.common import register_mcp_servers_async
Expand Down Expand Up @@ -119,6 +120,14 @@ async def lifespan(_app: FastAPI) -> AsyncIterator[None]:
AzureEntraIDManager().set_base_url(azure_base_url)
logger.info("Registering MCP servers")
await register_mcp_servers_async(logger, configuration.configuration)

# Set up model metrics if in healthy mode
if not degraded_tracker.is_degraded():
try:
await setup_model_metrics()
except APIConnectionError as e:
logger.warning("Failed to set up model metrics: %s", e, exc_info=True)

logger.info("App startup complete")

initialize_database()
Expand Down
6 changes: 6 additions & 0 deletions src/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,9 @@
["provider", "model", "endpoint", "result"],
buckets=LLM_INFERENCE_DURATION_BUCKETS,
)

# Gauge to track degraded mode startup state
started_in_degraded_mode = Gauge(
"ls_started_in_degraded_mode",
"Indicates if service started in degraded mode (1 = degraded, 0 = healthy)",
)
15 changes: 15 additions & 0 deletions src/metrics/recording.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,3 +157,18 @@ def record_llm_inference_duration(
).observe(duration)
except (AttributeError, TypeError, ValueError):
logger.warning("Failed to update LLM inference duration metric", exc_info=True)


def set_started_in_degraded_mode(is_degraded: bool) -> None:
"""Set the startup degraded mode gauge.

This metric tracks whether the service started in degraded mode.
It is set once at startup and does not change at runtime.

Args:
is_degraded: True if service started in degraded mode, False if healthy.
Comment on lines +168 to +169

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

📐 Maintainability & Code Quality | 🟡 Minor | ⚡ Quick win

Use the Parameters: docstring header instead of Args:.

This repo standardizes on the Parameters: section header for documented arguments.

📝 Proposed fix
-    Args:
-        is_degraded: True if service started in degraded mode, False if healthy.
+    Parameters:
+        is_degraded: True if service started in degraded mode, False if healthy.

Based on learnings: docstrings must use the section header name "Parameters:" (not "Args:") for function arguments.

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
Args:
is_degraded: True if service started in degraded mode, False if healthy.
Parameters:
is_degraded: True if service started in degraded mode, False if healthy.
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@src/metrics/recording.py` around lines 168 - 169, The docstring for the
relevant function in recording.py uses the nonstandard Args: header; update it
to the repo’s required Parameters: section while keeping the existing
is_degraded argument description intact. Locate the affected docstring near the
metrics recording function and replace the header so it matches the standard
used throughout the codebase.

Source: Learnings

"""
try:
metrics.started_in_degraded_mode.set(1 if is_degraded else 0)
except (AttributeError, TypeError, ValueError):
logger.warning("Failed to update started_in_degraded_mode gauge", exc_info=True)
18 changes: 6 additions & 12 deletions src/metrics/utils.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,23 @@
"""Utility functions for metrics handling."""

from fastapi import HTTPException
from llama_stack_client import APIConnectionError, APIStatusError

import metrics
from client import AsyncLlamaStackClientHolder
from configuration import configuration
from log import get_logger
from models.api.responses.error import ServiceUnavailableResponse
from utils.common import run_once_async
from utils.endpoints import check_configuration_loaded

logger = get_logger(__name__)


@run_once_async
async def setup_model_metrics() -> None:
"""Perform setup of all metrics related to LLM model and provider."""
"""Perform setup of all metrics related to LLM model and provider.

Should be called during startup when service is in healthy mode.
Skipped in degraded mode to avoid blocking on unavailable llama-stack.
"""
logger.info("Setting up model metrics")
check_configuration_loaded(configuration)
try:
model_list = await AsyncLlamaStackClientHolder().get_client().models.list()
except (APIConnectionError, APIStatusError) as e:
response = ServiceUnavailableResponse(backend_name="Llama Stack", cause=str(e))
raise HTTPException(**response.model_dump()) from e
model_list = await AsyncLlamaStackClientHolder().get_client().models.list()

models = [
model
Expand Down
7 changes: 7 additions & 0 deletions src/utils/degraded_mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from typing import Optional

from metrics import recording
from utils.types import Singleton


Expand All @@ -31,11 +32,17 @@ def set_degraded(self, reason: str) -> None:
self._is_degraded = True
self._degraded_reason = reason

# Record startup state metric
recording.set_started_in_degraded_mode(True)

def set_healthy(self) -> None:
"""Mark the service as running in healthy mode."""
self._is_degraded = False
self._degraded_reason = None

# Record startup state metric
recording.set_started_in_degraded_mode(False)
Comment on lines +35 to +44

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🎯 Functional Correctness | 🟠 Major | ⚡ Quick win

Preserve startup-only semantics for ls_started_in_degraded_mode.

Line 43 resets the gauge to 0, which can erase evidence that the service started degraded if set_healthy() is called after startup. For a startup-state metric, this should be write-once per process startup.

💡 Suggested fix
 class DegradedModeTracker(metaclass=Singleton):
@@
     def __init__(self) -> None:
         """Initialize the degraded mode tracker."""
         self._is_degraded: bool = False
         self._degraded_reason: Optional[str] = None
+        self._startup_state_recorded: bool = False
+
+    def _record_startup_state_once(self, is_degraded: bool) -> None:
+        """Record startup degraded-state metric only once per process start."""
+        if self._startup_state_recorded:
+            return
+        recording.set_started_in_degraded_mode(is_degraded)
+        self._startup_state_recorded = True
@@
-        recording.set_started_in_degraded_mode(True)
+        self._record_startup_state_once(True)
@@
-        recording.set_started_in_degraded_mode(False)
+        self._record_startup_state_once(False)
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@src/utils/degraded_mode.py` around lines 35 - 44, The `set_healthy()` method
is calling `recording.set_started_in_degraded_mode(False)` which overwrites the
startup-state metric, erasing evidence that the service started degraded. Since
this metric should only capture the startup state and be write-once per process,
remove the line `recording.set_started_in_degraded_mode(False)` from the
`set_healthy()` method. The metric should only be set once during
initialization, not updated when transitioning between degraded and healthy
modes later.


def is_degraded(self) -> bool:
"""Check if the service is running in degraded mode.

Expand Down
8 changes: 2 additions & 6 deletions tests/unit/app/endpoints/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from fastapi import Request
from pytest_mock import MockerFixture

import metrics # noqa: F401 pylint: disable=unused-import
from app.endpoints.metrics import metrics_endpoint_handler
from authentication.interface import AuthTuple
from tests.unit.utils.auth_helpers import mock_authorization_resolvers
Expand All @@ -14,10 +15,6 @@ async def test_metrics_endpoint(mocker: MockerFixture) -> None:
"""Test the metrics endpoint handler."""
mock_authorization_resolvers(mocker)

mock_setup_metrics = mocker.patch(
"app.endpoints.metrics.setup_model_metrics",
new=mocker.AsyncMock(return_value=None),
)
request = Request(
scope={
"type": "http",
Expand All @@ -34,8 +31,6 @@ async def test_metrics_endpoint(mocker: MockerFixture) -> None:

response_body = response.body.decode() # type: ignore

# Assert metrics were set up
mock_setup_metrics.assert_called_once()
# Check if the response contains Prometheus metrics format
assert "# TYPE ls_rest_api_calls_total counter" in response_body
assert "# TYPE ls_response_duration_seconds histogram" in response_body
Expand All @@ -45,3 +40,4 @@ async def test_metrics_endpoint(mocker: MockerFixture) -> None:
assert "# TYPE ls_llm_validation_errors_total counter" in response_body
assert "# TYPE ls_llm_token_sent_total counter" in response_body
assert "# TYPE ls_llm_token_received_total counter" in response_body
assert "# TYPE ls_started_in_degraded_mode gauge" in response_body
Loading