lightspeed-core · tisnik · Mar 9, 2026 · Mar 5, 2026 · Mar 5, 2026 · Mar 6, 2026
diff --git a/src/app/endpoints/query.py b/src/app/endpoints/query.py
@@ -41,6 +41,7 @@
     validate_and_retrieve_conversation,
 )
 from utils.mcp_headers import McpHeaders, mcp_headers_dependency
+from utils.mcp_oauth_probe import check_mcp_auth
 from utils.query import (
     consume_query_tokens,
     handle_known_apistatus_errors,
@@ -122,6 +123,8 @@ async def query_endpoint_handler(
     """
     check_configuration_loaded(configuration)
 
+    await check_mcp_auth(configuration, mcp_headers)
+
     started_at = datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%dT%H:%M:%SZ")
     user_id, _, _skip_userid_check, token = auth
     # Check token availability

diff --git a/src/app/endpoints/streaming_query.py b/src/app/endpoints/streaming_query.py
@@ -61,6 +61,7 @@
     validate_and_retrieve_conversation,
 )
 from utils.mcp_headers import McpHeaders, mcp_headers_dependency
+from utils.mcp_oauth_probe import check_mcp_auth
 from utils.query import (
     consume_query_tokens,
     extract_provider_and_model_from_model_id,
@@ -151,6 +152,8 @@ async def streaming_query_endpoint_handler(  # pylint: disable=too-many-locals
     """
     check_configuration_loaded(configuration)
 
+    await check_mcp_auth(configuration, mcp_headers)
+
     user_id, _user_name, _skip_userid_check, token = auth
     started_at = datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%dT%H:%M:%SZ")
 

diff --git a/src/app/endpoints/tools.py b/src/app/endpoints/tools.py
@@ -3,8 +3,7 @@
 from typing import Annotated, Any
 
 from fastapi import APIRouter, Depends, HTTPException, Request
-from llama_stack_client import APIConnectionError, BadRequestError, AuthenticationError
-from llama_stack.core.datatypes import AuthenticationRequiredError
+from llama_stack_client import APIConnectionError, BadRequestError
 
 from authentication import get_auth_dependency
 from authentication.interface import AuthTuple
@@ -21,7 +20,7 @@
 )
 from utils.endpoints import check_configuration_loaded
 from utils.mcp_headers import McpHeaders, mcp_headers_dependency
-from utils.mcp_oauth_probe import probe_mcp_oauth_and_raise_401
+from utils.mcp_oauth_probe import check_mcp_auth
 from utils.tool_formatter import format_tools_list
 from log import get_logger
 
@@ -124,6 +123,8 @@ async def tools_endpoint_handler(  # pylint: disable=too-many-locals,too-many-st
 
     check_configuration_loaded(configuration)
 
+    await check_mcp_auth(configuration, mcp_headers)
+
     toolgroups_response = []
     try:
         client = AsyncLlamaStackClientHolder().get_client()
@@ -146,6 +147,7 @@ async def tools_endpoint_handler(  # pylint: disable=too-many-locals,too-many-st
             # Get tools for each toolgroup
             headers = mcp_headers.get(toolgroup.identifier, {})
             authorization = headers.pop("Authorization", None)
+
             tools_response = await client.tools.list(
                 toolgroup_id=toolgroup.identifier,
                 extra_headers=headers,
@@ -154,13 +156,6 @@ async def tools_endpoint_handler(  # pylint: disable=too-many-locals,too-many-st
         except BadRequestError:
             logger.error("Toolgroup %s is not found", toolgroup.identifier)
             continue
-        except (AuthenticationError, AuthenticationRequiredError) as e:
-            if toolgroup.mcp_endpoint:
-                await probe_mcp_oauth_and_raise_401(
-                    toolgroup.mcp_endpoint.uri, chain_from=e
-                )
-            error_response = UnauthorizedResponse(cause=str(e))
-            raise HTTPException(**error_response.model_dump()) from e
         except APIConnectionError as e:
             logger.error("Unable to connect to Llama Stack: %s", e)
             response = ServiceUnavailableResponse(

diff --git a/src/utils/mcp_oauth_probe.py b/src/utils/mcp_oauth_probe.py
@@ -1,52 +1,101 @@
-"""Probe MCP server for OAuth and raise 401 with WWW-Authenticate when required."""
+"""Probe MCP servers for OAuth and raise 401 with WWW-Authenticate when required.
 
+Used by endpoints that call MCP-backed services so clients receive a proper
+401 with WWW-Authenticate when an MCP server requires OAuth.
+"""
+
+import asyncio
 from typing import Optional
+
 import aiohttp
 from fastapi import HTTPException
 
 from models.responses import UnauthorizedResponse
 
+from configuration import AppConfig
+from utils.mcp_headers import McpHeaders
+import constants
+
 from log import get_logger
 
 logger = get_logger(__name__)
 
 
-async def probe_mcp_oauth_and_raise_401(
+async def check_mcp_auth(configuration: AppConfig, mcp_headers: McpHeaders) -> None:
+    """Probe each configured MCP server that expects OAuth or has auth headers.
+
+    For every MCP server that has an Authorization header in mcp_headers or
+    has OAuth in its resolved_authorization_headers, performs a probe request.
+    If the server indicates OAuth is required, raises 401 with
+    WWW-Authenticate (or 401 without header on probe failure).
+
+    Parameters:
+        configuration: Application config containing mcp_servers.
+        mcp_headers: Per-server headers; keys are MCP server names.
+
+    Returns:
+        None when no server requires OAuth or probe does not trigger 401.
+
+    Raises:
+        HTTPException: 401 when an MCP server requires OAuth (from probe_mcp).
+    """
+    probes = []
+    for mcp_server in configuration.mcp_servers:
+        headers = mcp_headers.get(mcp_server.name, {})
+        authorization = headers.get("Authorization", None)
+        if (
+            authorization
+            or constants.MCP_AUTH_OAUTH
+            in mcp_server.resolved_authorization_headers.values()
+        ):
+            probes.append(probe_mcp(mcp_server.url, authorization=authorization))
+    if probes:
+        await asyncio.gather(*probes)
+
+
+async def probe_mcp(
     url: str,
-    chain_from: Optional[BaseException] = None,
+    authorization: Optional[str] = None,
 ) -> None:
     """Probe MCP endpoint and raise 401 so the client can perform OAuth.
 
-    Performs an async GET to the given URL to obtain a WWW-Authenticate header,
-    then raises HTTPException with status 401 and that header. If the probe
-    fails (connection error, timeout), raises 401 without the header.
+    Performs an async GET to the given URL. If the response is 401 with
+    WWW-Authenticate, raises HTTPException with that header. If the response
+    is 401 without the header, or the probe fails (connection error, timeout),
+    raises 401 without WWW-Authenticate.
 
-    Args:
+    Parameters:
         url: MCP server URL to probe.
-        chain_from: Exception to chain the HTTPException from when
-            the probe succeeds (e.g. the original AuthenticationError).
+        authorization: Optional Authorization header value for the probe request.
 
     Returns:
-        None. Always raises an HTTPException.
+        None when the server responds with a status other than 401 (OAuth not
+        required). Otherwise does not return; raises HTTPException.
 
     Raises:
-        HTTPException: 401 with WWW-Authenticate when the probe succeeds, or
-            401 without the header when the probe fails.
+        HTTPException: 401 with WWW-Authenticate when the server returns 401
+            and includes that header; 401 without the header when the server
+            returns 401 without it or when the probe fails (timeout/connection).
     """
     cause = f"MCP server at {url} requires OAuth"
     error_response = UnauthorizedResponse(cause=cause)
+    headers: Optional[dict[str, str]] = (
+        {"authorization": authorization} if authorization is not None else None
+    )
     try:
         timeout = aiohttp.ClientTimeout(total=10)
         async with aiohttp.ClientSession(timeout=timeout) as session:
-            async with session.get(url) as resp:
+            async with session.get(url, headers=headers) as resp:
+                if resp.status != 401:
+                    return
                 www_auth = resp.headers.get("WWW-Authenticate")
                 if www_auth is None:
                     logger.warning("No WWW-Authenticate header received from %s", url)
-                    raise HTTPException(**error_response.model_dump()) from chain_from
+                    raise HTTPException(**error_response.model_dump())
                 raise HTTPException(
                     **error_response.model_dump(),
                     headers={"WWW-Authenticate": www_auth},
-                ) from chain_from
+                )
     except (aiohttp.ClientError, TimeoutError) as probe_err:
         logger.warning("OAuth probe failed for %s: %s", url, probe_err)
         raise HTTPException(**error_response.model_dump()) from probe_err
diff --git a/src/utils/responses.py b/src/utils/responses.py
@@ -44,7 +44,6 @@
     ServiceUnavailableResponse,
 )
 from utils.mcp_headers import McpHeaders, extract_propagated_headers
-from utils.mcp_oauth_probe import probe_mcp_oauth_and_raise_401
 from utils.prompts import get_system_prompt, get_topic_summary_system_prompt
 from utils.query import (
     extract_provider_and_model_from_model_id,
@@ -464,16 +463,6 @@ def _get_token_value(original: str, header: str) -> Optional[str]:
         if mcp_server.authorization_headers and len(headers) != len(
             mcp_server.authorization_headers
         ):
-            # If OAuth was required and no headers passed, probe endpoint and forward
-            # 401 with WWW-Authenticate so the client can perform OAuth
-            uses_oauth = (
-                constants.MCP_AUTH_OAUTH
-                in mcp_server.resolved_authorization_headers.values()
-            )
-            if uses_oauth and (
-                mcp_headers is None or not mcp_headers.get(mcp_server.name)
-            ):
-                await probe_mcp_oauth_and_raise_401(mcp_server.url)
             logger.warning(
                 "Skipping MCP server %s: required %d auth headers but only resolved %d",
                 mcp_server.name,

diff --git a/tests/e2e/features/mcp.feature b/tests/e2e/features/mcp.feature
@@ -56,7 +56,6 @@ Feature: MCP tests
     """
     And The headers of the response contains the following header "www-authenticate"
 
-  @skip     # will be fixed in LCORE-1368
   Scenario: Check if tools endpoint succeeds when MCP auth token is passed
     Given The system is in default state
     And I set the "MCP-HEADERS" header to
@@ -65,42 +64,9 @@ Feature: MCP tests
     """
     When I access REST API endpoint "tools" using HTTP GET method
     Then The status code of the response is 200
-    And The body of the response is the following
-    """
-        {
-            "tools": [
-                {
-                    "identifier": "",
-                    "description": "Insert documents into memory",
-                    "parameters": [],
-                    "provider_id": "",
-                    "toolgroup_id": "builtin::rag",
-                    "server_source": "builtin",
-                    "type": ""
-                },
-                {
-                    "identifier": "",
-                    "description": "Search for information in a database.",
-                    "parameters": [],
-                    "provider_id": "",
-                    "toolgroup_id": "builtin::rag",
-                    "server_source": "builtin",
-                    "type": ""
-                },
-                {
-                    "identifier": "",
-                    "description": "Mock tool for E2E",
-                    "parameters": [],
-                    "provider_id": "",
-                    "toolgroup_id": "mcp-oauth",
-                    "server_source": "http://localhost:3001",
-                    "type": ""
-                }
-            ]
-        }
-    """
+    And The body of the response contains mcp-oauth
 
-  @skip     # will be fixed in LCORE-1366
+  @skip-in-library-mode     # will be fixed in LCORE-1428
   Scenario: Check if query endpoint succeeds when MCP auth token is passed
     Given The system is in default state
     And I set the "MCP-HEADERS" header to
@@ -115,10 +81,10 @@ Feature: MCP tests
     Then The status code of the response is 200
     And The response should contain following fragments
         | Fragments in LLM response |
-        | hello                     |
+        | Hello                     |
     And The token metrics should have increased
 
-  @skip     # will be fixed in LCORE-1366
+  @skip-in-library-mode     # will be fixed in LCORE-1428
   Scenario: Check if streaming_query endpoint succeeds when MCP auth token is passed
     Given The system is in default state
     And I set the "MCP-HEADERS" header to
@@ -134,10 +100,9 @@ Feature: MCP tests
     Then The status code of the response is 200
     And The streamed response should contain following fragments
         | Fragments in LLM response |
-        | hello                     |
+        | Hello                     |
     And The token metrics should have increased
 
-  @skip     # will be fixed in LCORE-1368
   Scenario: Check if tools endpoint reports error when MCP invalid auth token is passed
     Given The system is in default state
     And I set the "MCP-HEADERS" header to
@@ -180,7 +145,6 @@ Feature: MCP tests
     """
     And The headers of the response contains the following header "www-authenticate"
 
-  @skip     # will be fixed in LCORE-1366
   Scenario: Check if streaming_query endpoint reports error when MCP invalid auth token is passed
     Given The system is in default state
     And I set the "MCP-HEADERS" header to

diff --git a/tests/e2e/mock_mcp_server/server.py b/tests/e2e/mock_mcp_server/server.py
@@ -47,6 +47,8 @@ def do_GET(self) -> None:  # pylint: disable=invalid-name
         """Handle GET requests."""
         if self.path == "/health":
             self._json_response({"status": "ok"})
+        elif self._parse_auth() is not None:
+            self._json_response({"status": "authorized"})
         else:
             self._require_oauth()