Skip to content

Commit dec6167

Browse files
Tighten readiness probe per CoPilot review
Three findings on PR #109: 1. Runtime guard for ``readiness_probe``. The Literal type is a static hint; an unknown string would silently no-op both dispatch branches in ``ready()`` and report ready. Validate in ``__init__`` against a module-level frozenset and raise ValueError. 2. Route ``_probe_models`` non-200 responses through ``classify_http_error``. Previously hard-coded 401/403 to ProviderAuthentication and everything-else to ProviderUnavailable, missing ProviderRateLimit (429), ProviderModelNotLoaded (503+marker), and ProviderInvalidModel (404+marker). The docstring's mode-independence claim is now true. 3. Validate ``_probe_chat_completions`` 200 response shape. A proxy answering 200 with an error payload or non-OpenAI-shape JSON previously passed the probe. Mirror ``_do_complete``'s parse + ``_parse_response(payload, None, None)`` step. Adds five new tests covering: invalid mode at construction, catalog probe 429 → ProviderRateLimit, catalog probe 503+marker → ProviderModelNotLoaded, chat probe 200 with error payload, and chat probe 200 with non-JSON body.
1 parent 9bdb143 commit dec6167

2 files changed

Lines changed: 128 additions & 5 deletions

File tree

src/openarmature/llm/providers/openai.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,13 @@
114114
)
115115
from ..response import FinishReason, ParsedValue, Response, RuntimeConfig, Usage
116116

117+
# Runtime guard for ``OpenAIProvider(..., readiness_probe=...)``. The
118+
# Literal type narrows callers under static checkers but is not enforced
119+
# at runtime, so an unknown string would silently no-op both dispatch
120+
# branches in ``ready()`` and return None — a false-green readiness
121+
# signal. Validate in ``__init__`` against this set instead.
122+
_VALID_READINESS_PROBES = frozenset({"models", "chat_completions", "both"})
123+
117124

118125
class OpenAIProvider:
119126
"""OpenAI Chat Completions wire-compatible provider.
@@ -177,6 +184,10 @@ def __init__(
177184
# rationale as ``genai_system``: no base_url sniffing, since the
178185
# right probe shape depends on what's on the other end and a
179186
# wrong inference is worse than a wrong default.
187+
if readiness_probe not in _VALID_READINESS_PROBES:
188+
raise ValueError(
189+
f"readiness_probe must be one of {sorted(_VALID_READINESS_PROBES)} (got {readiness_probe!r})"
190+
)
180191
self._readiness_probe = readiness_probe
181192
self._headers: dict[str, str] = {"Content-Type": "application/json"}
182193
if api_key is not None:
@@ -237,12 +248,8 @@ async def _probe_models(self) -> None:
237248
except httpx.HTTPError as exc:
238249
raise ProviderUnavailable(str(exc)) from exc
239250

240-
if resp.status_code in (401, 403):
241-
raise ProviderAuthentication(f"GET /v1/models returned {resp.status_code}")
242-
if 500 <= resp.status_code < 600:
243-
raise ProviderUnavailable(f"GET /v1/models returned {resp.status_code}")
244251
if resp.status_code != 200:
245-
raise ProviderUnavailable(f"GET /v1/models returned unexpected {resp.status_code}")
252+
raise classify_http_error(resp)
246253

247254
try:
248255
body_raw = resp.json()
@@ -306,6 +313,17 @@ async def _probe_chat_completions(self) -> None:
306313
raise ProviderUnavailable(str(exc)) from exc
307314
if resp.status_code != 200:
308315
raise classify_http_error(resp)
316+
# Validate the response shape so a proxy answering 200 with an
317+
# error payload or non-OpenAI-shape JSON doesn't pass the probe.
318+
# Mirrors ``_do_complete``'s parse step. The returned Response
319+
# is discarded — the validation itself is the point.
320+
try:
321+
payload_raw = resp.json()
322+
except ValueError as exc:
323+
raise ProviderInvalidResponse("POST /v1/chat/completions returned non-JSON body") from exc
324+
if not isinstance(payload_raw, dict):
325+
raise ProviderInvalidResponse("POST /v1/chat/completions returned a non-object body")
326+
self._parse_response(cast("dict[str, Any]", payload_raw), None, None)
309327

310328
# ------------------------------------------------------------------
311329
# complete() — single completion call

tests/unit/test_llm_provider.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -575,6 +575,19 @@ def test_runtime_config_from_partial_empty() -> None:
575575
assert config.stop_sequences is None
576576

577577

578+
def test_readiness_probe_unknown_mode_rejected_at_construction() -> None:
579+
# Literal type is a static hint, not a runtime guard. Unknown modes
580+
# would otherwise silently no-op both dispatch branches in ready()
581+
# and report ready, so reject at construction.
582+
with pytest.raises(ValueError, match="readiness_probe must be one of"):
583+
OpenAIProvider(
584+
base_url="http://test",
585+
model="m",
586+
api_key="k",
587+
readiness_probe="bogus", # pyright: ignore[reportArgumentType]
588+
)
589+
590+
578591
# ---------------------------------------------------------------------------
579592
# ready() readiness_probe modes
580593
# ---------------------------------------------------------------------------
@@ -780,6 +793,57 @@ def _handler(req: httpx.Request) -> httpx.Response:
780793
assert seen == ["/v1/models"]
781794

782795

796+
async def test_ready_models_429_surfaces_rate_limit() -> None:
797+
# The catalog probe routes non-200 through classify_http_error, so
798+
# 429 with a Retry-After lands as ProviderRateLimit carrying the
799+
# parsed delay. Pre-refactor _probe_models would have flattened this
800+
# to ProviderUnavailable.
801+
def _429(_req: httpx.Request) -> httpx.Response:
802+
return httpx.Response(
803+
429,
804+
headers={"Retry-After": "30"},
805+
json={"error": {"message": "rate limited"}},
806+
)
807+
808+
provider = OpenAIProvider(
809+
base_url="http://test",
810+
model="m",
811+
api_key="k",
812+
transport=httpx.MockTransport(_429),
813+
readiness_probe="models",
814+
)
815+
try:
816+
with pytest.raises(ProviderRateLimit) as excinfo:
817+
await provider.ready()
818+
finally:
819+
await provider.aclose()
820+
assert excinfo.value.retry_after == 30.0
821+
822+
823+
async def test_ready_models_503_model_not_loaded_surfaces_canonical_category() -> None:
824+
# 503 with a model-not-loaded marker now lands as
825+
# ProviderModelNotLoaded on the catalog probe too, not the previous
826+
# generic ProviderUnavailable.
827+
def _503(_req: httpx.Request) -> httpx.Response:
828+
return httpx.Response(
829+
503,
830+
json={"error": {"type": "model_not_loaded", "message": "model is not loaded yet"}},
831+
)
832+
833+
provider = OpenAIProvider(
834+
base_url="http://test",
835+
model="m",
836+
api_key="k",
837+
transport=httpx.MockTransport(_503),
838+
readiness_probe="models",
839+
)
840+
try:
841+
with pytest.raises(ProviderModelNotLoaded):
842+
await provider.ready()
843+
finally:
844+
await provider.aclose()
845+
846+
783847
async def test_ready_both_catalog_200_chat_405_surfaces_unavailable() -> None:
784848
# The actual Bifrost case via ``both`` mode: catalog probe sees 200 from
785849
# ``/v1/models`` with the bound model present, then the chat probe gets
@@ -834,6 +898,47 @@ def _raises(_req: httpx.Request) -> httpx.Response:
834898
await provider.aclose()
835899

836900

901+
async def test_ready_chat_completions_200_with_error_payload_surfaces_invalid_response() -> None:
902+
# The residual false-green class: a proxy returning 200 with an
903+
# error payload (no ``choices`` field) would pass a simple status
904+
# check but indicates a deeply broken inference path. The chat probe
905+
# now parses the response shape so this fails with
906+
# ProviderInvalidResponse rather than reporting ready.
907+
def _200_error(_req: httpx.Request) -> httpx.Response:
908+
return httpx.Response(200, json={"error": "something is wrong"})
909+
910+
provider = OpenAIProvider(
911+
base_url="http://test",
912+
model="m",
913+
api_key="k",
914+
transport=httpx.MockTransport(_200_error),
915+
)
916+
try:
917+
with pytest.raises(ProviderInvalidResponse):
918+
await provider.ready()
919+
finally:
920+
await provider.aclose()
921+
922+
923+
async def test_ready_chat_completions_200_with_non_json_body_surfaces_invalid_response() -> None:
924+
# Same false-green class, JSON-parse leg: a proxy returning 200 with
925+
# a non-JSON body (HTML error page, plain text) must not pass.
926+
def _200_html(_req: httpx.Request) -> httpx.Response:
927+
return httpx.Response(200, content=b"<html>error</html>", headers={"content-type": "text/html"})
928+
929+
provider = OpenAIProvider(
930+
base_url="http://test",
931+
model="m",
932+
api_key="k",
933+
transport=httpx.MockTransport(_200_html),
934+
)
935+
try:
936+
with pytest.raises(ProviderInvalidResponse, match="non-JSON"):
937+
await provider.ready()
938+
finally:
939+
await provider.aclose()
940+
941+
837942
async def test_ready_chat_completions_503_model_not_loaded() -> None:
838943
# 503 with a model-not-loaded body routes through classify_http_error
839944
# to ProviderModelNotLoaded. Covered indirectly by the classifier's

0 commit comments

Comments
 (0)