Adjust tools resolution

asimurka · asimurka · commit c7b90c35bea0 · 2026-03-27T16:52:16.000+01:00
diff --git a/docs/responses.md b/docs/responses.md
@@ -280,9 +280,9 @@ Optional. **Tool selection strategy** that controls whether and how the model us
 
 **Specific tool objects (object with `type`):**
 
-- `allowed_tools`: Restrict to a list of tool definitions; `mode` is `"auto"` or `"required"`, `tools` is a list of tool objects (same shapes as in [tools](#tools)).
-- `file_search`: Force the model to use file search.
-- `web_search`: Force the model to use web search (optionally with a variant such as `web_search_preview`).
+- `allowed_tools`: Restrict to a list of tool definitions; `mode` is `"auto"` or `"required"`, `tools` is a list of key-valued filters for tools configured by `tools` attribute.
+- `file_search`: Force the model to use file-only search.
+- `web_search`: Force the model to use only web search.
 - `function`: Force a specific function; `name` (required) is the function name.
 - `mcp`: Force a tool on an MCP server; `server_label` (required), `name` (optional) tool name.
 - `custom`: Force a custom tool; `name` (required).
@@ -297,16 +297,25 @@ Simple modes (string): use one of `"auto"`, `"required"`, or `"none"`.
 { "tool_choice": "none" }
 ```
 
-Restrict to specific tools with `allowed_tools` (mode `"auto"` or `"required"`, plus `tools` array):
+Restrict tool usage to a specific subset using `allowed_tools`. You can control behavior with the `mode` field (`"auto"` or `"required"`) and explicitly list permitted tools in the `tools` array.
+
+The `tools` array acts as a **key-value filter**: each object specifies matching criteria (such as `type`, `server_label`, or `name`), and only tools that satisfy all provided attributes are allowed.
+
+The example below limits tool usage to:
+- the `file_search` tool  
+- a specific MCP tools (`tool_1` and `tool_2`) available on `server_1` (for multiple `name`s act as union)
+
+If the `name` field is omitted for an MCP tool, the filter applies to all tools available on the specified server.
 
 ```json
 {
   "tool_choice": {
     "type": "allowed_tools",
     "mode": "required",
     "tools": [
-      { "type": "file_search", "vector_store_ids": ["vs_123"] },
-      { "type": "web_search" }
+      { "type": "file_search"},
+      { "type": "mcp", "server_label": "server_1", "name": "tool_1" },
+      { "type": "mcp", "server_label": "server_1", "name": "tool_2" }
     ]
   }
 }
@@ -396,8 +405,8 @@ The following response attributes are inherited directly from the LLS OpenAPI sp
 | `temperature` | float | Temperature parameter used for generation |
 | `text` | object | Text response configuration object used |
 | `top_p` | float | Top-p sampling used |
-| `tools` | array[object] | Tools available during generation |
-| `tool_choice` | string or object | Tool selection used |
+| `tools` | array[object] | Internally resolved tools available during generation |
+| `tool_choice` | string | Internally resolved tool choice mode |
 | `truncation` | string | Truncation strategy applied (`"auto"` or `"disabled"`) |
 | `usage` | object | Token usage (input_tokens, output_tokens, total_tokens) |
 | `instructions` | string | System instructions used |
@@ -517,6 +526,8 @@ Vector store IDs are configured within the `tools` as `file_search` tools rather
 
 **Vector store IDs:** Accepts **LCORE format** in requests and also outputs it in responses; LCORE translates to/from Llama Stack format internally.
 
+The response includes `tools` and `tool_choice` fields that reflect the internally resolved configuration. More specifically, the final set of tools and selection constraints after internal resolution and filtering.
+
 ### LCORE-Specific Extensions
 
 The API introduces extensions that are not part of the OpenResponses specification:
diff --git a/src/app/endpoints/responses.py b/src/app/endpoints/responses.py
@@ -234,15 +234,14 @@ async def responses_endpoint_handler(
         request.headers,
     )
 
-    #Build RAG context from Inline RAG sources
+    # Build RAG context from Inline RAG sources
     inline_rag_context = await build_rag_context(
         client,
         moderation_result.decision,
         input_text,
         vector_store_ids,
         responses_request.solr,
     )
-
     if moderation_result.decision == "passed":
         responses_request.input = append_inline_rag_context_to_responses_input(
             responses_request.input, inline_rag_context.context_text
@@ -663,7 +662,6 @@ async def handle_non_streaming_response(
             )
     else:
         try:
-            print("API Params: ", api_params.model_dump(exclude_none=True))
             api_response = cast(
                 OpenAIResponseObject,
                 await client.responses.create(
diff --git a/src/app/main.py b/src/app/main.py
@@ -229,5 +229,5 @@ async def send_wrapper(message: Message) -> None:
 # RestApiMetricsMiddleware (registered last) is outermost.  This ensures metrics
 # always observe a status code — including 500s synthesised by the exception
 # middleware — rather than seeing a raw exception with no response.
-#app.add_middleware(GlobalExceptionMiddleware)
+app.add_middleware(GlobalExceptionMiddleware)
 app.add_middleware(RestApiMetricsMiddleware)
diff --git a/src/utils/responses.py b/src/utils/responses.py
@@ -420,16 +420,12 @@ def extract_vector_store_ids_from_tools(
     return vector_store_ids
 
 
-def _tool_matches_allowed_entry(tool: InputTool, entry: dict[str, str]) -> bool:
+def tool_matches_allowed_entry(tool: InputTool, entry: dict[str, str]) -> bool:
     """Return True if the tool satisfies every key in the allowlist entry.
 
-    ``OpenAIResponseInputToolChoiceAllowedTools.tools`` entries use string keys
-    and values (e.g. ``type``, ``server_label``, ``name``); each must match the
-    corresponding attribute on the tool.
-
     Parameters:
         tool: A configured input tool.
-        entry: One allowlist entry from ``allowed_tools.tools``.
+        entry: One allowlist entry from allowed_tools.tools.
 
     Returns:
         True if all entry keys match the tool.
@@ -445,28 +441,139 @@ def _tool_matches_allowed_entry(tool: InputTool, entry: dict[str, str]) -> bool:
     return True
 
 
-def filter_tools_by_allowed_entries(
-    tools: list[InputTool],
+def group_mcp_tools_by_server(
+    entries: list[dict[str, str]],
+) -> dict[str, Optional[list[str]]]:
+    """Group MCP tool filters by server_label.
+
+    Rules:
+    - Non-MCP entries are ignored.
+    - Entries without server_label are ignored.
+    - If any entry for a server has no "name", that server is unrestricted (None).
+    - Otherwise, collect unique tool names in first-seen order.
+
+    Returns:
+        Dict mapping:
+            server_label -> None (unrestricted) OR list of allowed tool names
+    """
+    unrestricted_servers: set[str] = set()
+    server_to_names: dict[str, list[str]] = {}
+    for entry in entries:
+        if entry.get("type") != "mcp":
+            continue
+        server = entry.get("server_label")
+        if not server:
+            continue
+        # Unrestricted entry (no "name")
+        if "name" not in entry:
+            unrestricted_servers.add(server)
+            continue
+        # Skip collecting names if already unrestricted
+        if server in unrestricted_servers:
+            continue
+        name = entry["name"]
+        if server not in server_to_names:
+            server_to_names[server] = []
+
+        if name not in server_to_names[server]:
+            server_to_names[server].append(name)
+
+    # Build final result
+    result: dict[str, Optional[list[str]]] = {}
+    for server in unrestricted_servers:
+        result[server] = None
+
+    for server, names in server_to_names.items():
+        if server not in unrestricted_servers:
+            result[server] = names
+
+    return result
+
+
+def mcp_strip_name_from_allowlist_entries(
     allowed_entries: list[dict[str, str]],
-) -> list[InputTool]:
-    """Keep tools that match at least one allowlist entry.
+) -> list[dict[str, str]]:
+    """Return a copy of entries where 'name' is removed only for MCP entries."""
+    result: list[dict[str, str]] = []
+    for entry in allowed_entries:
+        new_entry = entry.copy()
+        if new_entry.get("type") == "mcp":
+            new_entry.pop("name", None)
 
-    If ``allowed_entries`` is empty, no tools are kept (strict allowlist).
+        result.append(new_entry)
+
+    return result
 
-    Parameters:
-        tools: Tools to filter (typically after translation / preparation).
-        allowed_entries: Entries from ``OpenAIResponseInputToolChoiceAllowedTools.tools``.
+
+def mcp_project_allowed_tools_to_names(
+    tool: InputToolMCP, names: list[str]
+) -> list[str] | None:
+    """Intersect narrowed names with what the MCP tool already permits.
 
     Returns:
-        A sublist of ``tools`` matching the allowlist.
+        List of permitted tool names, or None if the intersection is empty.
+    """
+    if not names:
+        return None
+    name_set = set(names)
+    allowed = tool.allowed_tools
+    if allowed is None:
+        permitted = name_set
+    elif isinstance(allowed, list):
+        permitted = name_set & set(allowed)
+    else:
+        if allowed.tool_names is None:
+            permitted = name_set
+        else:
+            permitted = name_set & set(allowed.tool_names)
+
+    if not permitted:
+        return None
+
+    return list(permitted)
+
+
+def filter_tools_by_allowed_entries(
+    tools: list[InputTool],
+    allowed_entries: list[dict[str, str]],
+) -> list[InputTool]:
+    """Filter tools based on allowlist entries.
+
+    - Keeps tools matching at least one entry.
+    - Applies MCP name narrowing when applicable.
     """
     if not allowed_entries:
         return []
-    return [
-        t
-        for t in tools
-        if any(_tool_matches_allowed_entry(t, e) for e in allowed_entries)
-    ]
+
+    mcp_names_by_server = group_mcp_tools_by_server(allowed_entries)
+    sanitized_entries = mcp_strip_name_from_allowlist_entries(allowed_entries)
+    filtered: list[InputTool] = []
+    for tool in tools:
+        # Skip tools not matching any allowlist entry
+        if not any(tool_matches_allowed_entry(tool, e) for e in sanitized_entries):
+            continue
+        # Non-MCP tools pass through and are handled separately
+        if tool.type != "mcp":
+            filtered.append(tool)
+            continue
+
+        mcp_tool = cast(InputToolMCP, tool)
+        server = mcp_tool.server_label
+
+        narrowed_names = mcp_names_by_server.get(server)
+        # No filters specified for this MCP server
+        if narrowed_names is None:
+            filtered.append(tool)
+            continue
+
+        # Apply intersection
+        permitted = mcp_project_allowed_tools_to_names(mcp_tool, narrowed_names)
+        if permitted is None:
+            continue
+
+        filtered.append(mcp_tool.model_copy(update={"allowed_tools": permitted}))
+
+    return filtered
 
 
 def resolve_vector_store_ids(
@@ -1382,46 +1489,41 @@ async def resolve_tool_choice(
     mcp_headers: Optional[McpHeaders] = None,
     request_headers: Optional[Mapping[str, str]] = None,
 ) -> tuple[Optional[list[InputTool]], Optional[ToolChoice], Optional[list[str]]]:
-    """Resolve tools and tool_choice for the Responses API.
+    """Resolve tools and tool choice for the Responses API.
 
-    If ``tool_choice`` is ``none``, always returns ``(None, None, None)`` — no
-    tools are sent to Llama Stack, even when the request included explicit
-    ``tools`` (e.g. file_search).
+    When tool choice disables tools, always return Nones so Llama Stack
+    sees no tools, even if the request listed tools.
 
-    If ``tool_choice`` is ``allowed_tools``, it is rewritten for downstream
-    services: tools are filtered to those matching the allowlist entries, and
-    ``tool_choice`` becomes ``auto`` or ``required`` per the allowlist ``mode``.
+    Allowed-tools mode: filter tools to the allowlist and narrow tool choice to
+    auto or required from the allowlist mode.
 
-    If the request includes tools and tool_choice is not ``none``, uses them
-    (after allowlist filtering) and derives vector_store_ids from the prepared
-    tools; otherwise loads tools via prepare_tools (using all configured vector
-    stores), then applies allowlist filtering when present. When no tools end
-    up configured, tool_choice is cleared to None.
+    Otherwise: use request tools (with filtering) and derive vector store IDs, or
+    load tools via prepare_tools, then filter. Clear tool choice when no tools
+    remain.
 
     Args:
-        tools: Tools from the request, or None to use LCORE-configured tools.
-        tool_choice: Requested tool choice (e.g. auto, required, none) or None.
-        token: User token for MCP/auth.
-        mcp_headers: Optional MCP headers to propagate.
-        request_headers: Optional request headers for tool resolution.
+        tools: Request tools, or None for LCORE-configured tools.
+        tool_choice: Requested strategy, or None.
+        token: User token for MCP and auth.
+        mcp_headers: Optional MCP headers.
+        request_headers: Optional headers for tool resolution.
 
     Returns:
-        A tuple of (prepared_tools, prepared_tool_choice, vector_store_ids):
-        prepared_tools is the list of tools to use, or None if none configured;
-        prepared_tool_choice is the resolved tool choice, or None when there
-        are no tools; vector_store_ids is extracted from tools (in user-facing format)
-        when provided, otherwise None (also None when tool_choice is ``none``).
+        Prepared tools, resolved tool choice, and vector store IDs (user-facing),
+        each possibly None.
     """
+    # If tool_choice is "none", no tools are allowed
     if isinstance(tool_choice, ToolChoiceMode) and tool_choice == ToolChoiceMode.none:
         return None, None, None
 
+    # Extract the allowed filters if specified and overwrite tool choice mode
     allowed_filters: Optional[list[dict[str, str]]] = None
     if isinstance(tool_choice, AllowedTools):
         allowed_filters = tool_choice.tools
         tool_choice = ToolChoiceMode(tool_choice.mode)
 
     prepared_tools: Optional[list[InputTool]] = None
-    if tools:  # explicitly specified in request
+    if tools is not None:  # explicitly specified in request
         byok_rags = configuration.configuration.byok_rag
         prepared_tools = translate_tools_vector_store_ids(tools, byok_rags)
         if allowed_filters is not None:
diff --git a/tests/e2e/features/steps/llm_query_response.py b/tests/e2e/features/steps/llm_query_response.py
@@ -57,7 +57,9 @@ def responses_output_should_include_item_type(context: Context, item_type: str)
 
 
 @then('The responses output should not include an item with type "{item_type}"')
-def responses_output_should_not_include_item_type(context: Context, item_type: str) -> None:
+def responses_output_should_not_include_item_type(
+    context: Context, item_type: str
+) -> None:
     """Assert no ``output`` item has the given ``type``."""
     assert context.response is not None, "Request needs to be performed first"
     response_json = cast(dict[str, Any], context.response.json())
diff --git a/tests/unit/utils/test_responses.py b/tests/unit/utils/test_responses.py