Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions eval_protocol/mcp/execution/base_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,20 @@ def __init__(
# Initialize conversation state tracking for proper OpenAI trajectories
self.initialized = False

def _supports_reasoning_details(self) -> bool:
Comment thread
xzrderek marked this conversation as resolved.
Outdated
"""
Returns True if this policy is configured for a provider/model that expects
top-level reasoning_details to be preserved (e.g., Gemini 3 via OpenRouter).
"""
model_id = getattr(self, "model_id", "") or ""
base_url = getattr(self, "base_url", "") or ""

if isinstance(model_id, str) and "openrouter" in model_id:
return True
if isinstance(base_url, str) and "openrouter.ai" in base_url:
return True
return False

@abstractmethod
async def _make_llm_call(self, messages: List[Dict], tools: List[Dict]) -> Dict:
"""
Expand Down Expand Up @@ -199,6 +213,10 @@ async def _generate_live_tool_calls(
if message.get("tool_calls"):
assistant_message_for_history["tool_calls"] = message["tool_calls"]

rd = message.get("reasoning_details", None)
if rd is not None and self._supports_reasoning_details():
assistant_message_for_history["reasoning_details"] = rd

# Add to actual conversation history
conversation_history.append(assistant_message_for_history)

Expand Down
53 changes: 31 additions & 22 deletions eval_protocol/mcp/execution/policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,9 @@ def _clean_messages_for_api(self, messages: List[Dict]) -> List[Dict]:
# Standard OpenAI message fields
allowed_fields = {"role", "content", "tool_calls", "tool_call_id", "name"}

if self._supports_reasoning_details():
allowed_fields.add("reasoning_details")

clean_messages = []
for msg in messages:
# Only keep allowed fields
Expand Down Expand Up @@ -217,31 +220,37 @@ async def _make_llm_call(self, messages: List[Dict[str, Any]], tools: List[Dict[
logger.debug(f"🔄 API call for model: {self.model_id}")

# LiteLLM already returns OpenAI-compatible format
message_obj = getattr(response.choices[0], "message", object())

message_dict: Dict[str, Any] = {
"role": getattr(message_obj, "role", "assistant"),
"content": getattr(message_obj, "content", None),
"tool_calls": (
[
{
"id": getattr(tc, "id", None),
"type": getattr(tc, "type", "function"),
"function": {
"name": getattr(getattr(tc, "function", None), "name", "tool"),
"arguments": getattr(getattr(tc, "function", None), "arguments", "{}"),
},
}
for tc in (getattr(message_obj, "tool_calls", []) or [])
]
if getattr(message_obj, "tool_calls", None)
else []
),
}

if self._supports_reasoning_details():
rd = getattr(message_obj, "reasoning_details", None)
if rd is not None:
message_dict["reasoning_details"] = rd

return {
"choices": [
{
"message": {
"role": getattr(getattr(response.choices[0], "message", object()), "role", "assistant"),
"content": getattr(getattr(response.choices[0], "message", object()), "content", None),
"tool_calls": (
[
{
"id": getattr(tc, "id", None),
"type": getattr(tc, "type", "function"),
"function": {
"name": getattr(getattr(tc, "function", None), "name", "tool"),
"arguments": getattr(getattr(tc, "function", None), "arguments", "{}"),
},
}
for tc in (
getattr(getattr(response.choices[0], "message", object()), "tool_calls", [])
or []
)
]
if getattr(getattr(response.choices[0], "message", object()), "tool_calls", None)
else []
),
},
"message": message_dict,
"finish_reason": getattr(response.choices[0], "finish_reason", None),
}
],
Expand Down
Loading