lightspeed-core
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 33 additions & 0 deletions b/‎CONTRIBUTING.md‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 44 additions & 8 deletions b/‎README.md‎
Lines changed: 44 additions & 8 deletions
diff --git a/‎docs/config.md‎
Lines changed: 1 addition & 0 deletions b/‎docs/config.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/contributing_guide.md‎
Lines changed: 33 additions & 0 deletions b/‎docs/contributing_guide.md‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎examples/lightspeed-stack-mcp-servers.yaml‎
Lines changed: 17 additions & 1 deletion b/‎examples/lightspeed-stack-mcp-servers.yaml‎
Lines changed: 17 additions & 1 deletion
diff --git a/‎src/app/endpoints/a2a.py‎
Lines changed: 24 additions & 6 deletions b/‎src/app/endpoints/a2a.py‎
Lines changed: 24 additions & 6 deletions
diff --git a/‎src/app/endpoints/query.py‎
Lines changed: 6 additions & 3 deletions b/‎src/app/endpoints/query.py‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎src/app/endpoints/rlsapi_v1.py‎
Lines changed: 3 additions & 3 deletions b/‎src/app/endpoints/rlsapi_v1.py‎
Lines changed: 3 additions & 3 deletions
@@ -22,6 +22,12 @@
     * [Pylint](#pylint)
     * [Security checks](#security-checks)
 * [Code style](#code-style)
+    * [Function Standards](#function-standards)
+        * [Documentation](#documentation)
+        * [Type annotations](#type-annotations)
+        * [Naming conventions](#naming-conventions)
+        * [Async functions](#async-functions)
+        * [Error handling](#error-handling)
     * [Formatting rules](#formatting-rules)
     * [Docstrings style](#docstrings-style)
 
@@ -227,6 +233,33 @@ make security-check
 
 ## Code style
 
+### Function Standards
+
+#### Documentation
+
+All functions require docstrings with brief descriptions
+
+#### Type annotations
+
+Use complete type annotations for parameters and return types
+
+- Use `typing_extensions.Self` for model validators
+- Union types: `str | int` (modern syntax)
+- Optional: `Optional[Type]`
+
+#### Naming conventions
+
+Use snake_case with descriptive, action-oriented names (get_, validate_, check_)
+
+#### Async functions
+
+Use `async def` for I/O operations and external API calls
+
+#### Error handling
+
+- Use FastAPI `HTTPException` with appropriate status codes for API endpoints
+- Handle `APIConnectionError` from Llama Stack where appropriate
+
 ### Formatting rules
 
 Code formatting rules are checked by __Black__. More info can be found on [https://black.readthedocs.io/en/stable/](https://black.readthedocs.io/en/stable/).
 
@@ -340,8 +340,9 @@ Each MCP server requires two fields:
 - `name`: Unique identifier for the MCP server
 - `url`: The endpoint where the MCP server is running
 
-And one optional field:
+And optional fields:
 - `provider_id`: MCP provider identification (defaults to `"model-context-protocol"`)
+- `headers`: List of HTTP header names to automatically forward from the incoming request to this MCP server (see [Automatic Header Propagation](#5-automatic-header-propagation-for-gateway-injected-headers))
 
 **Minimal Example:**
 
@@ -436,6 +437,41 @@ mcp_servers:
 
 When no token is provided for an OAuth-configured server, the service may respond with **401 Unauthorized** and a **`WWW-Authenticate`** header (probed from the MCP server). Clients can use this to drive an OAuth flow and then retry with the token in `MCP-HEADERS`.
 
+##### 5. Automatic Header Propagation (For Gateway-Injected Headers)
+
+Use the `headers` field to automatically forward specific headers from the incoming HTTP request to an MCP server. This is designed for environments where infrastructure components (e.g. API gateways) inject headers that MCP servers need but clients cannot provide.
+
+**HCC Use Case:** In Hybrid Cloud Console (HCC), the gateway strips the client's `Authorization` header and replaces it with `x-rh-identity` (a base64-encoded user identity). Backend services use `x-rh-identity` to identify users. Since clients never see this header, the existing `MCP-HEADERS` mechanism cannot be used. Instead, configure `headers` to automatically forward it:
+
+```yaml
+mcp_servers:
+  - name: "rbac"
+    url: "http://rbac-service:8080"
+    headers:
+      - x-rh-identity
+      - x-rh-insights-request-id
+```
+
+When a request arrives at Lightspeed with these headers, they are automatically extracted and forwarded to the `rbac` MCP server. No client-side configuration is needed.
+
+**Key behaviors:**
+
+- **Case-insensitive matching**: Header names in the allowlist are matched case-insensitively against the incoming request.
+- **Missing headers are skipped**: If a header in the allowlist is not present on the incoming request, it is silently skipped. The MCP server is **not** skipped (unlike `authorization_headers` behavior).
+- **Additive with other methods**: Propagated headers can be combined with `authorization_headers` and `MCP-HEADERS`. If the same header name appears in both `authorization_headers` and `headers`, the `authorization_headers` value takes precedence.
+
+**Combined example:**
+
+```yaml
+mcp_servers:
+  - name: "notifications"
+    url: "http://notifications-service:8080"
+    headers:
+      - x-rh-identity                              # From incoming request
+    authorization_headers:
+      X-API-Key: "/var/secrets/notifications-key"   # Static service credential
+```
+
 ##### Client-Authenticated MCP Servers Discovery
 
 To help clients determine which MCP servers require client-provided tokens, use the **MCP Client Auth Options** endpoint:
@@ -492,13 +528,13 @@ mcp_servers:
 
 ##### Authentication Method Comparison
 
-| Method          | Use Case                    | Configuration                    | Token Scope                   | Example                |
-|-----------------|-----------------------------|----------------------------------|-------------------------------|------------------------|
-| **Static File** | Service tokens, API keys    | File path in config              | Global (all users)            | `"/var/secrets/token"` |
-| **Kubernetes**  | K8s service accounts        | `"kubernetes"` keyword           | Per-user (from auth)          | `"kubernetes"`         |
-| **Client**      | User-specific tokens        | `"client"` keyword + HTTP header | Per-request                   | `"client"`             |
-| **OAuth**       | OAuth-protected MCP servers | `"oauth"` keyword + HTTP header  | Per-request (from OAuth flow) | `"oauth"`              |
-
+| Method                 | Use Case                         | Configuration                    | Token Scope                        | Example                       |
+|------------------------|----------------------------------|----------------------------------|------------------------------------|-------------------------------|
+| **Static File**        | Service tokens, API keys         | File path in config              | Global (all users)                 | `"/var/secrets/token"`        |
+| **Kubernetes**         | K8s service accounts             | `"kubernetes"` keyword           | Per-user (from auth)               | `"kubernetes"`                |
+| **Client**             | User-specific tokens             | `"client"` keyword + HTTP header | Per-request                        | `"client"`                    |
+| **OAuth**              | OAuth-protected MCP servers      | `"oauth"` keyword + HTTP header  | Per-request (from OAuth flow)      | `"oauth"`                     |
+| **Header Propagation** | Gateway-injected headers (HCC)   | `headers` list                   | Per-request (from incoming request)| `headers: [x-rh-identity]`   |
 
 ##### Important: Automatic Server Skipping
 
 
@@ -373,6 +373,7 @@ Useful resources:
 | provider_id | string | MCP provider identification |
 | url | string | URL of the MCP server |
 | authorization_headers | object | Headers to send to the MCP server. The map contains the header name and the path to a file containing the header value (secret). There are 3 special cases: 1. Usage of the kubernetes token in the header — use the string 'kubernetes' instead of the file path. 2. Usage of the client-provided token in the header — use the string 'client' instead of the file path. 3. Usage of OAuth token (resolved at request time or 401 with WWW-Authenticate) — use the string 'oauth' instead of the file path. |
+| headers | array | List of HTTP header names to automatically forward from the incoming request to this MCP server. Headers listed here are extracted from the original client request and included when calling the MCP server. This is useful when infrastructure components (e.g. API gateways) inject headers that MCP servers need, such as x-rh-identity in HCC. Header matching is case-insensitive. These headers are additive with authorization_headers and MCP-HEADERS. |
 | timeout | integer | Timeout in seconds for requests to the MCP server. If not specified, the default timeout from Llama Stack will be used. Note: This field is reserved for future use when Llama Stack adds timeout support. |
 
 
 
@@ -22,6 +22,12 @@
     * [Pylint](#pylint)
     * [Security checks](#security-checks)
 * [Code style](#code-style)
+    * [Function Standards](#function-standards)
+        * [Documentation](#documentation)
+        * [Type annotations](#type-annotations)
+        * [Naming conventions](#naming-conventions)
+        * [Async functions](#async-functions)
+        * [Error handling](#error-handling)
     * [Formatting rules](#formatting-rules)
     * [Docstrings style](#docstrings-style)
 
@@ -227,6 +233,33 @@ make security-check
 
 ## Code style
 
+### Function Standards
+
+#### Documentation
+
+All functions require docstrings with brief descriptions
+
+#### Type annotations
+
+Use complete type annotations for parameters and return types
+
+- Use `typing_extensions.Self` for model validators
+- Union types: `str | int` (modern syntax)
+- Optional: `Optional[Type]`
+
+#### Naming conventions
+
+Use snake_case with descriptive, action-oriented names (get_, validate_, check_)
+
+#### Async functions
+
+Use `async def` for I/O operations and external API calls
+
+#### Error handling
+
+- Use FastAPI `HTTPException` with appropriate status codes for API endpoints
+- Handle `APIConnectionError` from Llama Stack where appropriate
+
 ### Formatting rules
 
 Code formatting rules are checked by __Black__. More info can be found on [https://black.readthedocs.io/en/stable/](https://black.readthedocs.io/en/stable/).
 
@@ -46,4 +46,20 @@ mcp_servers:
     url: "http://url.com:6"
     authorization_headers:
       Authorization: "client"  # Special value to forward the client's token
-    timeout: 30  # Optional: timeout in seconds (future Llama Stack feature)
+    timeout: 30  # Optional: timeout in seconds (future Llama Stack feature)
+  # Example with automatic header propagation from incoming request (HCC use case)
+  # Headers listed here are automatically extracted from the incoming HTTP request
+  # and forwarded to this MCP server. Useful when infrastructure components (e.g.
+  # HCC Gateway) inject headers that MCP servers need for user identification.
+  - name: "rbac"
+    url: "http://rbac-service:8080"
+    headers:
+      - x-rh-identity
+      - x-rh-insights-request-id
+  # Headers can be combined with authorization_headers (additive)
+  - name: "notifications"
+    url: "http://notifications-service:8080"
+    headers:
+      - x-rh-identity
+    authorization_headers:
+      X-API-Key: "/var/secrets/notifications-api-key"
@@ -3,6 +3,7 @@
 import asyncio
 import json
 import uuid
+from collections.abc import Mapping
 from datetime import datetime, timezone
 from typing import Annotated, Any, AsyncIterator, MutableMapping, Optional
 
@@ -46,7 +47,7 @@
 from models.requests import QueryRequest
 from utils.mcp_headers import mcp_headers_dependency, McpHeaders
 from utils.responses import (
-    extract_text_from_output_item,
+    extract_text_from_response_item,
     prepare_responses_params,
 )
 from utils.suid import normalize_conversation_id
@@ -107,7 +108,7 @@ def _convert_responses_content_to_a2a_parts(output: list[Any]) -> list[Part]:
     parts: list[Part] = []
 
     for output_item in output:
-        text = extract_text_from_output_item(output_item)
+        text = extract_text_from_response_item(output_item)
         if text:
             parts.append(Part(root=TextPart(text=text)))
 
@@ -184,15 +185,22 @@ class A2AAgentExecutor(AgentExecutor):
     routing queries to the LLM backend using the Responses API.
     """
 
-    def __init__(self, auth_token: str, mcp_headers: Optional[McpHeaders] = None):
+    def __init__(
+        self,
+        auth_token: str,
+        mcp_headers: Optional[McpHeaders] = None,
+        request_headers: Optional[Mapping[str, str]] = None,
+    ):
         """Initialize the A2A agent executor.
 
         Args:
             auth_token: Authentication token for the request
             mcp_headers: MCP headers for context propagation
+            request_headers: Incoming HTTP request headers for allowlist propagation
         """
         self.auth_token: str = auth_token
         self.mcp_headers: McpHeaders = mcp_headers or {}
+        self.request_headers: Optional[Mapping[str, str]] = request_headers
 
     async def execute(
         self,
@@ -326,6 +334,7 @@ async def _process_task_streaming(  # pylint: disable=too-many-locals
                 self.mcp_headers,
                 stream=True,
                 store=True,
+                request_headers=self.request_headers,
             )
             # Stream response from LLM using the Responses API
             stream = await client.responses.create(**responses_params.model_dump())
@@ -649,17 +658,26 @@ async def get_agent_card(  # pylint: disable=unused-argument
         raise
 
 
-async def _create_a2a_app(auth_token: str, mcp_headers: McpHeaders) -> Any:
+async def _create_a2a_app(
+    auth_token: str,
+    mcp_headers: McpHeaders,
+    request_headers: Optional[Mapping[str, str]] = None,
+) -> Any:
     """Create an A2A Starlette application instance with auth context.
 
     Args:
         auth_token: Authentication token for the request
         mcp_headers: MCP headers for context propagation
+        request_headers: Incoming HTTP request headers for allowlist propagation
 
     Returns:
         A2A Starlette ASGI application
     """
-    agent_executor = A2AAgentExecutor(auth_token=auth_token, mcp_headers=mcp_headers)
+    agent_executor = A2AAgentExecutor(
+        auth_token=auth_token,
+        mcp_headers=mcp_headers,
+        request_headers=request_headers,
+    )
     task_store = await _get_task_store()
 
     request_handler = DefaultRequestHandler(
@@ -713,7 +731,7 @@ async def handle_a2a_jsonrpc(  # pylint: disable=too-many-locals,too-many-statem
         auth_token = ""
 
     # Create A2A app with auth context
-    a2a_app = await _create_a2a_app(auth_token, mcp_headers)
+    a2a_app = await _create_a2a_app(auth_token, mcp_headers, request.headers)
 
     # Detect if this is a streaming request by checking the JSON-RPC method
     is_streaming_request = False
 
@@ -179,6 +179,7 @@ async def query_endpoint_handler(
         mcp_headers,
         stream=False,
         store=True,
+        request_headers=request.headers,
     )
 
     # Handle Azure token refresh if needed
@@ -245,7 +246,8 @@ async def query_endpoint_handler(
         started_at=started_at,
         completed_at=completed_at,
         summary=turn_summary,
-        query_request=query_request,
+        query=query_request.query,
+        attachments=query_request.attachments,
         skip_userid_check=_skip_userid_check,
         topic_summary=topic_summary,
     )
@@ -288,13 +290,14 @@ async def retrieve_response(  # pylint: disable=too-many-locals
     Returns:
         TurnSummary: Summary of the LLM response content
     """
+    response: Optional[OpenAIResponseObject] = None
     try:
         moderation_result = await run_shield_moderation(
             client, responses_params.input, shield_ids
         )
-        if moderation_result.blocked:
+        if moderation_result.decision == "blocked":
             # Handle shield moderation blocking
-            violation_message = moderation_result.message or ""
+            violation_message = moderation_result.message
             await append_turn_to_conversation(
                 client,
                 responses_params.conversation,
 
@@ -35,7 +35,7 @@
 from observability import InferenceEventData, build_inference_event, send_splunk_event
 from utils.query import handle_known_apistatus_errors
 from utils.responses import (
-    extract_text_from_output_items,
+    extract_text_from_response_items,
     get_mcp_tools,
 )
 from utils.suid import get_suid
@@ -192,7 +192,7 @@ async def retrieve_simple_response(
     )
     response = cast(OpenAIResponseObject, response)
 
-    return extract_text_from_output_items(response.output)
+    return extract_text_from_response_items(response.output)
 
 
 def _get_cla_version(request: Request) -> str:
@@ -307,7 +307,7 @@ async def infer_endpoint(
     input_source = infer_request.get_input_source()
     instructions = _build_instructions(infer_request.context.systeminfo)
     model_id = _get_default_model_id()
-    mcp_tools = await get_mcp_tools()
+    mcp_tools = await get_mcp_tools(request_headers=request.headers)
     logger.debug(
         "Request %s: Combined input source length: %d", request_id, len(input_source)
     )