From 359b19e3f930f0e489b3743e82e9c93dffd9325c Mon Sep 17 00:00:00 2001 From: Anik Bhattacharjee Date: Tue, 23 Jun 2026 17:13:05 -0400 Subject: [PATCH] LCORE-1857: Add metrics to track degraded mode startup state Add Prometheus metrics to monitor when Lightspeed Core Stack starts in degraded mode (without llama-stack connectivity). During implementation, discovered that PR #327's lazy initialization pattern prevented model metrics from being set up correctly, requiring a revert. When LCORE starts in degraded mode, the @run_once_async decorator marks setup_model_metrics() as "done" after it returns early. On restart with llama-stack available, the function never runs again, leaving model metrics uninitialized. Moving setup back to startup fixes this while degraded mode already provides the startup resilience that PR #327 originally addressed. Signed-off-by: Anik Bhattacharjee --- docs/openapi.json | 2 +- src/app/endpoints/metrics.py | 10 +--------- src/app/main.py | 9 +++++++++ src/metrics/__init__.py | 6 ++++++ src/metrics/recording.py | 15 +++++++++++++++ src/metrics/utils.py | 18 ++++++------------ src/utils/degraded_mode.py | 7 +++++++ tests/unit/app/endpoints/test_metrics.py | 8 ++------ 8 files changed, 47 insertions(+), 28 deletions(-) diff --git a/docs/openapi.json b/docs/openapi.json index 1f2eb46ab..0ea650f2d 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -10400,7 +10400,7 @@ "metrics" ], "summary": "Metrics Endpoint Handler", - "description": "Handle request to the /metrics endpoint.\n\nProcess GET requests to the /metrics endpoint, returning the\nlatest Prometheus metrics in form of a plain text.\n\nInitializes model metrics on the first request if not already\nset up, then responds with the current metrics snapshot in\nPrometheus format.\n\n### Parameters:\n- request: The incoming HTTP request (used by middleware).\n- auth: Authentication tuple from the auth dependency (used by middleware).\n\n### Returns:\n- PlainTextResponse: Response body containing the Prometheus metrics text\n and the Prometheus content type.", + "description": "Handle request to the /metrics endpoint.\n\nProcess GET requests to the /metrics endpoint, returning the\nlatest Prometheus metrics in plain text Prometheus format.\n\n### Parameters:\n- request: The incoming HTTP request (used by middleware).\n- auth: Authentication tuple from the auth dependency (used by middleware).\n\n### Returns:\n- PlainTextResponse: Response body containing the Prometheus metrics text\n and the Prometheus content type.", "operationId": "metrics_endpoint_handler_metrics_get", "responses": { "200": { diff --git a/src/app/endpoints/metrics.py b/src/app/endpoints/metrics.py index c33a6866c..4b44799c3 100644 --- a/src/app/endpoints/metrics.py +++ b/src/app/endpoints/metrics.py @@ -12,7 +12,6 @@ from authentication import get_auth_dependency from authentication.interface import AuthTuple from authorization.middleware import authorize -from metrics.utils import setup_model_metrics from models.api.responses.constants import UNAUTHORIZED_OPENAPI_EXAMPLES from models.api.responses.error import ( ForbiddenResponse, @@ -47,11 +46,7 @@ async def metrics_endpoint_handler( Handle request to the /metrics endpoint. Process GET requests to the /metrics endpoint, returning the - latest Prometheus metrics in form of a plain text. - - Initializes model metrics on the first request if not already - set up, then responds with the current metrics snapshot in - Prometheus format. + latest Prometheus metrics in plain text Prometheus format. ### Parameters: - request: The incoming HTTP request (used by middleware). @@ -67,7 +62,4 @@ async def metrics_endpoint_handler( # Nothing interesting in the request _ = request - # Setup the model metrics if not already done. This is a one-time setup - # and will not be run again on subsequent calls to this endpoint - await setup_model_metrics() return PlainTextResponse(generate_latest(), media_type=str(CONTENT_TYPE_LATEST)) diff --git a/src/app/main.py b/src/app/main.py index 41466e143..e54c730bc 100644 --- a/src/app/main.py +++ b/src/app/main.py @@ -23,6 +23,7 @@ from configuration import configuration from log import get_logger from metrics import recording +from metrics.utils import setup_model_metrics from models.api.responses.error import InternalServerErrorResponse from sentry import initialize_sentry from utils.common import register_mcp_servers_async @@ -119,6 +120,14 @@ async def lifespan(_app: FastAPI) -> AsyncIterator[None]: AzureEntraIDManager().set_base_url(azure_base_url) logger.info("Registering MCP servers") await register_mcp_servers_async(logger, configuration.configuration) + + # Set up model metrics if in healthy mode + if not degraded_tracker.is_degraded(): + try: + await setup_model_metrics() + except APIConnectionError as e: + logger.warning("Failed to set up model metrics: %s", e, exc_info=True) + logger.info("App startup complete") initialize_database() diff --git a/src/metrics/__init__.py b/src/metrics/__init__.py index 63d7b8e29..ae6496501 100644 --- a/src/metrics/__init__.py +++ b/src/metrics/__init__.py @@ -82,3 +82,9 @@ ["provider", "model", "endpoint", "result"], buckets=LLM_INFERENCE_DURATION_BUCKETS, ) + +# Gauge to track degraded mode startup state +started_in_degraded_mode = Gauge( + "ls_started_in_degraded_mode", + "Indicates if service started in degraded mode (1 = degraded, 0 = healthy)", +) diff --git a/src/metrics/recording.py b/src/metrics/recording.py index a9b35d208..c41c105b5 100644 --- a/src/metrics/recording.py +++ b/src/metrics/recording.py @@ -157,3 +157,18 @@ def record_llm_inference_duration( ).observe(duration) except (AttributeError, TypeError, ValueError): logger.warning("Failed to update LLM inference duration metric", exc_info=True) + + +def set_started_in_degraded_mode(is_degraded: bool) -> None: + """Set the startup degraded mode gauge. + + This metric tracks whether the service started in degraded mode. + It is set once at startup and does not change at runtime. + + Args: + is_degraded: True if service started in degraded mode, False if healthy. + """ + try: + metrics.started_in_degraded_mode.set(1 if is_degraded else 0) + except (AttributeError, TypeError, ValueError): + logger.warning("Failed to update started_in_degraded_mode gauge", exc_info=True) diff --git a/src/metrics/utils.py b/src/metrics/utils.py index 806e7a336..afb832d29 100644 --- a/src/metrics/utils.py +++ b/src/metrics/utils.py @@ -1,29 +1,23 @@ """Utility functions for metrics handling.""" -from fastapi import HTTPException -from llama_stack_client import APIConnectionError, APIStatusError - import metrics from client import AsyncLlamaStackClientHolder from configuration import configuration from log import get_logger -from models.api.responses.error import ServiceUnavailableResponse -from utils.common import run_once_async from utils.endpoints import check_configuration_loaded logger = get_logger(__name__) -@run_once_async async def setup_model_metrics() -> None: - """Perform setup of all metrics related to LLM model and provider.""" + """Perform setup of all metrics related to LLM model and provider. + + Should be called during startup when service is in healthy mode. + Skipped in degraded mode to avoid blocking on unavailable llama-stack. + """ logger.info("Setting up model metrics") check_configuration_loaded(configuration) - try: - model_list = await AsyncLlamaStackClientHolder().get_client().models.list() - except (APIConnectionError, APIStatusError) as e: - response = ServiceUnavailableResponse(backend_name="Llama Stack", cause=str(e)) - raise HTTPException(**response.model_dump()) from e + model_list = await AsyncLlamaStackClientHolder().get_client().models.list() models = [ model diff --git a/src/utils/degraded_mode.py b/src/utils/degraded_mode.py index 661272da7..f40a480fc 100644 --- a/src/utils/degraded_mode.py +++ b/src/utils/degraded_mode.py @@ -6,6 +6,7 @@ from typing import Optional +from metrics import recording from utils.types import Singleton @@ -31,11 +32,17 @@ def set_degraded(self, reason: str) -> None: self._is_degraded = True self._degraded_reason = reason + # Record startup state metric + recording.set_started_in_degraded_mode(True) + def set_healthy(self) -> None: """Mark the service as running in healthy mode.""" self._is_degraded = False self._degraded_reason = None + # Record startup state metric + recording.set_started_in_degraded_mode(False) + def is_degraded(self) -> bool: """Check if the service is running in degraded mode. diff --git a/tests/unit/app/endpoints/test_metrics.py b/tests/unit/app/endpoints/test_metrics.py index 5cebc0529..bf826e5a1 100644 --- a/tests/unit/app/endpoints/test_metrics.py +++ b/tests/unit/app/endpoints/test_metrics.py @@ -4,6 +4,7 @@ from fastapi import Request from pytest_mock import MockerFixture +import metrics # noqa: F401 pylint: disable=unused-import from app.endpoints.metrics import metrics_endpoint_handler from authentication.interface import AuthTuple from tests.unit.utils.auth_helpers import mock_authorization_resolvers @@ -14,10 +15,6 @@ async def test_metrics_endpoint(mocker: MockerFixture) -> None: """Test the metrics endpoint handler.""" mock_authorization_resolvers(mocker) - mock_setup_metrics = mocker.patch( - "app.endpoints.metrics.setup_model_metrics", - new=mocker.AsyncMock(return_value=None), - ) request = Request( scope={ "type": "http", @@ -34,8 +31,6 @@ async def test_metrics_endpoint(mocker: MockerFixture) -> None: response_body = response.body.decode() # type: ignore - # Assert metrics were set up - mock_setup_metrics.assert_called_once() # Check if the response contains Prometheus metrics format assert "# TYPE ls_rest_api_calls_total counter" in response_body assert "# TYPE ls_response_duration_seconds histogram" in response_body @@ -45,3 +40,4 @@ async def test_metrics_endpoint(mocker: MockerFixture) -> None: assert "# TYPE ls_llm_validation_errors_total counter" in response_body assert "# TYPE ls_llm_token_sent_total counter" in response_body assert "# TYPE ls_llm_token_received_total counter" in response_body + assert "# TYPE ls_started_in_degraded_mode gauge" in response_body