Tighten readiness probe per CoPilot review

chris-colinsky · chris-colinsky · commit dec61674f260 · 2026-06-01T09:04:29.000-07:00
Three findings on PR #109: 1. Runtime guard for ``readiness_probe``. The Literal type is a static hint; an unknown string would silently no-op both dispatch branches in ``ready()`` and report ready. Validate in ``__init__`` against a module-level frozenset and raise ValueError. 2. Route ``_probe_models`` non-200 responses through ``classify_http_error``. Previously hard-coded 401/403 to ProviderAuthentication and everything-else to ProviderUnavailable, missing ProviderRateLimit (429), ProviderModelNotLoaded (503+marker), and ProviderInvalidModel (404+marker). The docstring's mode-independence claim is now true. 3. Validate ``_probe_chat_completions`` 200 response shape. A proxy answering 200 with an error payload or non-OpenAI-shape JSON previously passed the probe. Mirror ``_do_complete``'s parse + ``_parse_response(payload, None, None)`` step. Adds five new tests covering: invalid mode at construction, catalog probe 429 → ProviderRateLimit, catalog probe 503+marker → ProviderModelNotLoaded, chat probe 200 with error payload, and chat probe 200 with non-JSON body.
diff --git a/src/openarmature/llm/providers/openai.py b/src/openarmature/llm/providers/openai.py
@@ -114,6 +114,13 @@
 )
 from ..response import FinishReason, ParsedValue, Response, RuntimeConfig, Usage
 
+# Runtime guard for ``OpenAIProvider(..., readiness_probe=...)``. The
+# Literal type narrows callers under static checkers but is not enforced
+# at runtime, so an unknown string would silently no-op both dispatch
+# branches in ``ready()`` and return None — a false-green readiness
+# signal. Validate in ``__init__`` against this set instead.
+_VALID_READINESS_PROBES = frozenset({"models", "chat_completions", "both"})
+
 
 class OpenAIProvider:
     """OpenAI Chat Completions wire-compatible provider.
@@ -177,6 +184,10 @@ def __init__(
         # rationale as ``genai_system``: no base_url sniffing, since the
         # right probe shape depends on what's on the other end and a
         # wrong inference is worse than a wrong default.
+        if readiness_probe not in _VALID_READINESS_PROBES:
+            raise ValueError(
+                f"readiness_probe must be one of {sorted(_VALID_READINESS_PROBES)} (got {readiness_probe!r})"
+            )
         self._readiness_probe = readiness_probe
         self._headers: dict[str, str] = {"Content-Type": "application/json"}
         if api_key is not None:
@@ -237,12 +248,8 @@ async def _probe_models(self) -> None:
         except httpx.HTTPError as exc:
             raise ProviderUnavailable(str(exc)) from exc
 
-        if resp.status_code in (401, 403):
-            raise ProviderAuthentication(f"GET /v1/models returned {resp.status_code}")
-        if 500 <= resp.status_code < 600:
-            raise ProviderUnavailable(f"GET /v1/models returned {resp.status_code}")
         if resp.status_code != 200:
-            raise ProviderUnavailable(f"GET /v1/models returned unexpected {resp.status_code}")
+            raise classify_http_error(resp)
 
         try:
             body_raw = resp.json()
@@ -306,6 +313,17 @@ async def _probe_chat_completions(self) -> None:
             raise ProviderUnavailable(str(exc)) from exc
         if resp.status_code != 200:
             raise classify_http_error(resp)
+        # Validate the response shape so a proxy answering 200 with an
+        # error payload or non-OpenAI-shape JSON doesn't pass the probe.
+        # Mirrors ``_do_complete``'s parse step. The returned Response
+        # is discarded — the validation itself is the point.
+        try:
+            payload_raw = resp.json()
+        except ValueError as exc:
+            raise ProviderInvalidResponse("POST /v1/chat/completions returned non-JSON body") from exc
+        if not isinstance(payload_raw, dict):
+            raise ProviderInvalidResponse("POST /v1/chat/completions returned a non-object body")
+        self._parse_response(cast("dict[str, Any]", payload_raw), None, None)
 
     # ------------------------------------------------------------------
     # complete() — single completion call
diff --git a/tests/unit/test_llm_provider.py b/tests/unit/test_llm_provider.py
@@ -575,6 +575,19 @@ def test_runtime_config_from_partial_empty() -> None:
     assert config.stop_sequences is None
 
 
+def test_readiness_probe_unknown_mode_rejected_at_construction() -> None:
+    # Literal type is a static hint, not a runtime guard. Unknown modes
+    # would otherwise silently no-op both dispatch branches in ready()
+    # and report ready, so reject at construction.
+    with pytest.raises(ValueError, match="readiness_probe must be one of"):
+        OpenAIProvider(
+            base_url="http://test",
+            model="m",
+            api_key="k",
+            readiness_probe="bogus",  # pyright: ignore[reportArgumentType]
+        )
+
+
 # ---------------------------------------------------------------------------
 # ready() readiness_probe modes
 # ---------------------------------------------------------------------------
@@ -780,6 +793,57 @@ def _handler(req: httpx.Request) -> httpx.Response:
     assert seen == ["/v1/models"]
 
 
+async def test_ready_models_429_surfaces_rate_limit() -> None:
+    # The catalog probe routes non-200 through classify_http_error, so
+    # 429 with a Retry-After lands as ProviderRateLimit carrying the
+    # parsed delay. Pre-refactor _probe_models would have flattened this
+    # to ProviderUnavailable.
+    def _429(_req: httpx.Request) -> httpx.Response:
+        return httpx.Response(
+            429,
+            headers={"Retry-After": "30"},
+            json={"error": {"message": "rate limited"}},
+        )
+
+    provider = OpenAIProvider(
+        base_url="http://test",
+        model="m",
+        api_key="k",
+        transport=httpx.MockTransport(_429),
+        readiness_probe="models",
+    )
+    try:
+        with pytest.raises(ProviderRateLimit) as excinfo:
+            await provider.ready()
+    finally:
+        await provider.aclose()
+    assert excinfo.value.retry_after == 30.0
+
+
+async def test_ready_models_503_model_not_loaded_surfaces_canonical_category() -> None:
+    # 503 with a model-not-loaded marker now lands as
+    # ProviderModelNotLoaded on the catalog probe too, not the previous
+    # generic ProviderUnavailable.
+    def _503(_req: httpx.Request) -> httpx.Response:
+        return httpx.Response(
+            503,
+            json={"error": {"type": "model_not_loaded", "message": "model is not loaded yet"}},
+        )
+
+    provider = OpenAIProvider(
+        base_url="http://test",
+        model="m",
+        api_key="k",
+        transport=httpx.MockTransport(_503),
+        readiness_probe="models",
+    )
+    try:
+        with pytest.raises(ProviderModelNotLoaded):
+            await provider.ready()
+    finally:
+        await provider.aclose()
+
+
 async def test_ready_both_catalog_200_chat_405_surfaces_unavailable() -> None:
     # The actual Bifrost case via ``both`` mode: catalog probe sees 200 from
     # ``/v1/models`` with the bound model present, then the chat probe gets
@@ -834,6 +898,47 @@ def _raises(_req: httpx.Request) -> httpx.Response:
         await provider.aclose()
 
 
+async def test_ready_chat_completions_200_with_error_payload_surfaces_invalid_response() -> None:
+    # The residual false-green class: a proxy returning 200 with an
+    # error payload (no ``choices`` field) would pass a simple status
+    # check but indicates a deeply broken inference path. The chat probe
+    # now parses the response shape so this fails with
+    # ProviderInvalidResponse rather than reporting ready.
+    def _200_error(_req: httpx.Request) -> httpx.Response:
+        return httpx.Response(200, json={"error": "something is wrong"})
+
+    provider = OpenAIProvider(
+        base_url="http://test",
+        model="m",
+        api_key="k",
+        transport=httpx.MockTransport(_200_error),
+    )
+    try:
+        with pytest.raises(ProviderInvalidResponse):
+            await provider.ready()
+    finally:
+        await provider.aclose()
+
+
+async def test_ready_chat_completions_200_with_non_json_body_surfaces_invalid_response() -> None:
+    # Same false-green class, JSON-parse leg: a proxy returning 200 with
+    # a non-JSON body (HTML error page, plain text) must not pass.
+    def _200_html(_req: httpx.Request) -> httpx.Response:
+        return httpx.Response(200, content=b"<html>error</html>", headers={"content-type": "text/html"})
+
+    provider = OpenAIProvider(
+        base_url="http://test",
+        model="m",
+        api_key="k",
+        transport=httpx.MockTransport(_200_html),
+    )
+    try:
+        with pytest.raises(ProviderInvalidResponse, match="non-JSON"):
+            await provider.ready()
+    finally:
+        await provider.aclose()
+
+
 async def test_ready_chat_completions_503_model_not_loaded() -> None:
     # 503 with a model-not-loaded body routes through classify_http_error
     # to ProviderModelNotLoaded. Covered indirectly by the classifier's