Skip to content

Commit 62c5e42

Browse files
committed
fix: add default model availability check to readiness probe
The /readiness endpoint only verified provider health but did not check that the configured default model was registered in the Llama Stack model registry. This allowed pods where model registration failed during startup to pass readiness and serve 404s on every inference request. Add check_default_model_available() that verifies the configured default model exists in client.models.list(). When the model is missing, /readiness returns 503 so Kubernetes removes the pod from the service load balancer. Ref: RSPEED-2959 Signed-off-by: Major Hayden <major@redhat.com>
1 parent ca125c4 commit 62c5e42

2 files changed

Lines changed: 354 additions & 7 deletions

File tree

src/app/endpoints/health.py

Lines changed: 119 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,14 @@
88
from enum import Enum
99
from typing import Annotated, Any
1010

11-
from fastapi import APIRouter, Depends, Response, status
12-
from llama_stack_client import APIConnectionError
11+
from fastapi import APIRouter, Depends, HTTPException, Response, status
12+
from llama_stack_client import APIConnectionError, APIStatusError
1313

1414
from authentication import get_auth_dependency
1515
from authentication.interface import AuthTuple
1616
from authorization.middleware import authorize
1717
from client import AsyncLlamaStackClientHolder
18+
from configuration import configuration
1819
from log import get_logger
1920
from models.config import Action
2021
from models.responses import (
@@ -95,6 +96,108 @@ async def get_providers_health_statuses() -> list[ProviderHealthStatus]:
9596
]
9697

9798

99+
def _model_in_registry(models: list, expected_id: str) -> bool:
100+
"""Check if a model with the given ID exists in the model list."""
101+
return any(model.id == expected_id for model in models)
102+
103+
104+
async def _reload_and_check_model(
105+
client_holder: AsyncLlamaStackClientHolder,
106+
expected_model_id: str,
107+
) -> tuple[bool, str]:
108+
"""Attempt to reload the library client and recheck model availability.
109+
110+
Only called for library mode clients when the default model is missing
111+
from the registry after initial lookup.
112+
113+
Returns:
114+
A tuple of (found, reason) where found is True if the model was
115+
found after reloading the client.
116+
"""
117+
logger.warning(
118+
"Default model %s not found, attempting client reload",
119+
expected_model_id,
120+
)
121+
try:
122+
await client_holder.reload_library_client()
123+
client = client_holder.get_client()
124+
models = await client.models.list()
125+
if _model_in_registry(models, expected_model_id):
126+
logger.info(
127+
"Default model %s found after client reload",
128+
expected_model_id,
129+
)
130+
return True, (
131+
f"Default model {expected_model_id} is available after reload"
132+
)
133+
except (
134+
RuntimeError,
135+
HTTPException,
136+
APIConnectionError,
137+
APIStatusError,
138+
) as err:
139+
logger.error("Client reload failed: %s", err)
140+
return False, ""
141+
142+
143+
async def check_default_model_available() -> tuple[bool, str]:
144+
"""Check that the configured default model is registered in the model registry.
145+
146+
Verifies the default model from configuration can be found in the Llama
147+
Stack client's model list. This catches cases where a pod started
148+
successfully and providers report healthy, but model registration failed
149+
during initialization.
150+
151+
If the model is missing and the client is running in library mode, attempts
152+
a client reload to re-register models before reporting failure.
153+
154+
Returns:
155+
A tuple of (available, reason) where available is True if the default
156+
model was found or no default model is configured, and reason describes
157+
the outcome.
158+
"""
159+
if configuration.inference is None:
160+
return True, "No inference configuration"
161+
162+
default_model = configuration.inference.default_model
163+
default_provider = configuration.inference.default_provider
164+
165+
if not default_model or not default_provider:
166+
return True, "No default model configured"
167+
168+
expected_model_id = f"{default_provider}/{default_model}"
169+
170+
try:
171+
client_holder = AsyncLlamaStackClientHolder()
172+
client = client_holder.get_client()
173+
models = await client.models.list()
174+
175+
if _model_in_registry(models, expected_model_id):
176+
return True, f"Default model {expected_model_id} is available"
177+
178+
# Model not found - attempt self-healing reload for library clients
179+
if client_holder.is_library_client:
180+
found, reason = await _reload_and_check_model(
181+
client_holder, expected_model_id
182+
)
183+
if found:
184+
return True, reason
185+
186+
registered_ids = [m.id for m in models]
187+
logger.error(
188+
"Default model %s not found in registry. Registered models: %s",
189+
expected_model_id,
190+
registered_ids,
191+
)
192+
return False, f"Default model {expected_model_id} not found in model registry"
193+
except RuntimeError as e:
194+
logger.warning("Client not initialized, skipping model check: %s", e)
195+
return False, f"Client not initialized: {e!s}"
196+
except (APIConnectionError, APIStatusError) as e:
197+
logger.error("Error checking model availability: %s", e)
198+
return False, f"Error checking model availability: {e!s}"
199+
200+
98201
@router.get("/readiness", responses=get_readiness_responses)
99202
@authorize(Action.INFO)
100203
async def readiness_probe_get_method(
@@ -134,11 +237,21 @@ async def readiness_probe_get_method(
134237
unhealthy_provider_names = [p.provider_id for p in unhealthy_providers]
135238
reason = f"Providers not healthy: {', '.join(unhealthy_provider_names)}"
136239
response.status_code = status.HTTP_503_SERVICE_UNAVAILABLE
137-
else:
138-
ready = True
139-
reason = "All providers are healthy"
240+
return ReadinessResponse(
241+
ready=ready, reason=reason, providers=unhealthy_providers
242+
)
243+
244+
# Check that the default model is registered in the model registry
245+
model_available, model_reason = await check_default_model_available()
246+
if not model_available:
247+
response.status_code = status.HTTP_503_SERVICE_UNAVAILABLE
248+
return ReadinessResponse(
249+
ready=False, reason=model_reason, providers=unhealthy_providers
250+
)
140251

141-
return ReadinessResponse(ready=ready, reason=reason, providers=unhealthy_providers)
252+
return ReadinessResponse(
253+
ready=True, reason="All providers are healthy", providers=unhealthy_providers
254+
)
142255

143256

144257
@router.get("/liveness", responses=get_liveness_responses)

0 commit comments

Comments
 (0)