fix(gatekeeper): pass a looser schema to the model (#462)

owtaylor · web-flow · commit bd9b242ca3e9 · 2026-05-19T17:00:38.000-04:00
In evals, Claude Opus and Claude Sonnet were performing *much*
worse when run directly from Anthropic than when run from Vertex AI
(and worse than other flagship models)

Switching the schema passed to response_format so that the
reason is optional first greatly improves the Claude models on the
Anthropic pplatforms (Opus from 76.7% =&gt; 93.2%) and doesn't have much
noticeable effect on other models.

Theory here is:
 - The response_format isn't ending up in the prompt for Vertex AI,
   though it may constrain decoding.
 - When the response_format does end up in the prompt, being required
   to provide a reason "intimidates" the model, and responding OK
   seems easier.
diff --git a/src/linux_mcp_server/gatekeeper/__init__.py b/src/linux_mcp_server/gatekeeper/__init__.py
@@ -1,7 +1,6 @@
 from linux_mcp_server.gatekeeper.check_run_script import check_run_script
 from linux_mcp_server.gatekeeper.check_run_script import GatekeeperResult
-from linux_mcp_server.gatekeeper.check_run_script import GatekeeperResultStrict
 from linux_mcp_server.gatekeeper.check_run_script import GatekeeperStatus
 
 
-__all__ = ["check_run_script", "GatekeeperStatus", "GatekeeperResult", "GatekeeperResultStrict"]
+__all__ = ["check_run_script", "GatekeeperStatus", "GatekeeperResult"]
diff --git a/src/linux_mcp_server/gatekeeper/check_run_script.py b/src/linux_mcp_server/gatekeeper/check_run_script.py
@@ -105,8 +105,6 @@ def get_model() -> str:
 what is wrong with the script. Be specific to allow the language model to correct
 the problem.
 
-If status is OK, the detail should be an empty string.
-
 If the script seems buggy but does not fall into any of the categories above, return
 a status of `OK`.
 
@@ -186,10 +184,6 @@ def parse_from_description(cls, description: str) -> "GatekeeperResult":
             return cls(status=status, detail=detail)
 
 
-class GatekeeperResultStrict(GatekeeperResult):
-    detail: str  # type:ignore
-
-
 def check_run_script(description: str, script_type: str, script: str, *, readonly: bool) -> GatekeeperResult:
     # Check that the script does what is described
     if "start_of_script" in script.lower() or "end_of_script" in script.lower():
@@ -215,7 +209,7 @@ def check_run_script(description: str, script_type: str, script: str, *, readonl
 
     params = get_supported_openai_params(model=get_model())
     if params is not None and "response_format" in params:
-        response_format = GatekeeperResultStrict
+        response_format = GatekeeperResult
     else:
         response_format = None
 
diff --git a/tests/gatekeeper/test_check_run_script.py b/tests/gatekeeper/test_check_run_script.py
@@ -9,7 +9,6 @@
 from pydantic import ValidationError
 
 from linux_mcp_server.gatekeeper import GatekeeperResult
-from linux_mcp_server.gatekeeper import GatekeeperResultStrict
 from linux_mcp_server.gatekeeper import GatekeeperStatus
 from linux_mcp_server.gatekeeper.check_run_script import check_run_script
 from linux_mcp_server.gatekeeper.check_run_script import get_model
@@ -125,7 +124,7 @@ def test_response_format_handling(self, mock_litellm, supported_params, expect_r
 
         call_kwargs = mock_completion.call_args.kwargs
         if expect_response_format:
-            assert call_kwargs["response_format"] is GatekeeperResultStrict
+            assert call_kwargs["response_format"] is GatekeeperResult
         else:
             assert call_kwargs["response_format"] is None