diff --git a/docs/openapi.json b/docs/openapi.json
index 84f2a59dc..fa969fb98 100644
--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -1863,7 +1863,7 @@
                     "providers"
                 ],
                 "summary": "Get Provider Endpoint Handler",
-                "description": "Retrieve a single provider identified by its unique ID.\n\n### Parameters:\n- request: The incoming HTTP request.\n- provider_id: Provider identification string\n- auth: Authentication tuple from the auth dependency.\n\n### Returns:\n- ProviderResponse: Provider details.\n\n### Raises:\n- HTTPException:\n- 401: Authentication failed\n- 403: Authorization failed\n- 404: Provider not found\n- 500: Lightspeed Stack configuration not loaded\n- 503: Unable to connect to Llama Stack",
+                "description": "Retrieve a single provider identified by its unique ID.\n\n### Parameters:\n- request: The incoming HTTP request.\n- provider_id: Provider identification string\n- auth: Authentication tuple from the auth dependency.\n\n### Raises:\n- HTTPException: with status 401 for unauthorized access.\n- HTTPException: with status 403 if permission is denied.\n- HTTPException: with status 404 if provider is not found.\n- HTTPException: with status 500 and a detail object containing `response`\n  and `cause` when service configuration is wrong or incomplete.\n- HTTPException: with status 503 and a detail object containing `response`\n  and `cause` when unable to connect to Llama Stack.\n\n### Returns:\n- ProviderResponse: Provider details.",
                 "operationId": "get_provider_endpoint_handler_v1_providers__provider_id__get",
                 "parameters": [
                     {
@@ -2082,7 +2082,7 @@
                     "prompts"
                 ],
                 "summary": "List Prompts Handler",
-                "description": "Handle requests to the GET /prompts endpoint.\n\nProcess GET requests that list all stored prompt templates from the Llama\nStack service. For example:\n\n    curl http://localhost:8080/v1/prompts\n\n### Parameters:\n- request: The incoming HTTP request (used by middleware).\n- auth: Authentication tuple from the auth dependency (used by middleware).\n\n### Raises:\n- HTTPException: If configuration is not loaded, if unable to connect to\n  Llama Stack, or if the prompts API returns an error response.\n\n### Returns:\n- PromptsListResponse: An object containing the list of prompts.",
+                "description": "Handle requests to the GET /prompts endpoint.\n\nProcess GET requests that list all stored prompt templates from the Llama\nStack service. For example:\n\n    curl http://localhost:8080/v1/prompts\n\n### Parameters:\n- request: The incoming HTTP request (used by middleware).\n- auth: Authentication tuple from the auth dependency (used by middleware).\n\n### Raises:\n- HTTPException: with status 401 for unauthorized access.\n- HTTPException: with status 403 if permission is denied.\n- HTTPException: with status 500 and a detail object containing `response`\n  and `cause` when service configuration is wrong or incomplete.\n- HTTPException: with status 503 and a detail object containing `response`\n  and `cause` when unable to connect to Llama Stack.\n\n### Returns:\n- PromptsListResponse: An object containing the list of prompts.",
                 "operationId": "list_prompts_handler_v1_prompts_get",
                 "responses": {
                     "200": {
@@ -2267,7 +2267,7 @@
                     "prompts"
                 ],
                 "summary": "Create Prompt Handler",
-                "description": "Handle requests to the POST /prompts endpoint.\n\nProcess requests to create a stored prompt template in Llama Stack. The\nbody must include the prompt text and may include template variable names.\nFor example:\n\n    curl -X POST http://localhost:8080/v1/prompts \\\\\n      -H 'Content-Type: application/json' \\\\\n      -d '{\"prompt\": \"Hello {{name}}\", \"variables\": [\"name\"]}'\n\n### Parameters:\n- request: The incoming HTTP request (used by middleware).\n- auth: Authentication tuple from the auth dependency (used by middleware).\n- body: Prompt creation parameters.\n\n### Raises:\n- HTTPException: If configuration is not loaded, if unable to connect to\n  Llama Stack, or if the prompts API returns an error response.\n\n### Returns:\n- PromptResourceResponse: The created prompt as returned by Llama Stack.",
+                "description": "Handle requests to the POST /prompts endpoint.\n\nProcess requests to create a stored prompt template in Llama Stack. The\nbody must include the prompt text and may include template variable names.\nFor example:\n\n    curl -X POST http://localhost:8080/v1/prompts \\\\\n      -H 'Content-Type: application/json' \\\\\n      -d '{\"prompt\": \"Hello {{name}}\", \"variables\": [\"name\"]}'\n\n### Parameters:\n- request: The incoming HTTP request (used by middleware).\n- auth: Authentication tuple from the auth dependency (used by middleware).\n- body: Prompt creation parameters.\n\n### Raises:\n- HTTPException: with status 401 for unauthorized access.\n- HTTPException: with status 403 if permission is denied.\n- HTTPException: with status 422 if the request body is improper.\n- HTTPException: with status 500 and a detail object containing `response`\n  and `cause` when service configuration is wrong or incomplete.\n- HTTPException: with status 503 and a detail object containing `response`\n  and `cause` when unable to connect to Llama Stack.\n\n### Returns:\n- PromptResourceResponse: The created prompt as returned by Llama Stack.",
                 "operationId": "create_prompt_handler_v1_prompts_post",
                 "requestBody": {
                     "content": {
@@ -2470,7 +2470,7 @@
                     "prompts"
                 ],
                 "summary": "Get Prompt Handler",
-                "description": "Handle requests to the GET /prompts/{prompt_id} endpoint.\n\nProcess GET requests to retrieve a single prompt by identifier. The\n``version`` query parameter is optional; when omitted, the latest version is\nreturned. For example:\n\n    curl http://localhost:8080/v1/prompts/pmpt_abc123?version=1\n\n### Parameters:\n- request: The incoming HTTP request (used by middleware).\n- prompt_id: The Llama Stack prompt identifier.\n- auth: Authentication tuple from the auth dependency (used by middleware).\n- version: Optional version number (latest when omitted).\n\n### Raises:\n- HTTPException: If configuration is not loaded, if the prompt is not\n  found, if unable to connect to Llama Stack, or if the prompts API returns\n  an error response.\n\n### Returns:\n- PromptResourceResponse: The requested prompt object.",
+                "description": "Handle requests to the GET /prompts/{prompt_id} endpoint.\n\nProcess GET requests to retrieve a single prompt by identifier. The\n``version`` query parameter is optional; when omitted, the latest version is\nreturned. For example:\n\n    curl http://localhost:8080/v1/prompts/pmpt_abc123?version=1\n\n### Parameters:\n- request: The incoming HTTP request (used by middleware).\n- prompt_id: The Llama Stack prompt identifier.\n- auth: Authentication tuple from the auth dependency (used by middleware).\n- version: Optional version number (latest when omitted).\n\n### Raises:\n- HTTPException: with status 401 for unauthorized access.\n- HTTPException: with status 403 if permission is denied.\n- HTTPException: with status 404 if prompt is not found.\n- HTTPException: with status 500 and a detail object containing `response`\n  and `cause` when service configuration is wrong or incomplete.\n- HTTPException: with status 503 and a detail object containing `response`\n  and `cause` when unable to connect to Llama Stack.\n\n### Returns:\n- PromptResourceResponse: The requested prompt object.",
                 "operationId": "get_prompt_handler_v1_prompts__prompt_id__get",
                 "parameters": [
                     {
@@ -10573,7 +10573,7 @@
                     "a2a"
                 ],
                 "summary": "Get Agent Card",
-                "description": "Serve the A2A Agent Card at the well-known location.\n\nThis endpoint provides the agent card that describes Lightspeed's\ncapabilities according to the A2A protocol specification.\n\nReturns:\n    AgentCard: The agent card describing this agent's capabilities.",
+                "description": "Serve the A2A Agent Card at the well-known location.\n\nThis endpoint provides the agent card that describes Lightspeed's\ncapabilities according to the A2A protocol specification.\n\n### Parameters:\n- auth: Authentication tuple from the auth dependency (used by middleware).\n\n### Raises:\n- HTTPException: with status 500 and a detail object containing `response`\n  and `cause` when service configuration is wrong or incomplete.\n- HTTPException: with status 503 and a detail object containing `response`\n  and `cause` when unable to connect to Llama Stack.\n\n### Returns:\n- AgentCard: The agent card describing this agent's capabilities.",
                 "operationId": "get_agent_card__well_known_agent_card_json_get",
                 "responses": {
                     "200": {
@@ -10595,7 +10595,7 @@
                     "a2a"
                 ],
                 "summary": "Get Agent Card",
-                "description": "Serve the A2A Agent Card at the well-known location.\n\nThis endpoint provides the agent card that describes Lightspeed's\ncapabilities according to the A2A protocol specification.\n\nReturns:\n    AgentCard: The agent card describing this agent's capabilities.",
+                "description": "Serve the A2A Agent Card at the well-known location.\n\nThis endpoint provides the agent card that describes Lightspeed's\ncapabilities according to the A2A protocol specification.\n\n### Parameters:\n- auth: Authentication tuple from the auth dependency (used by middleware).\n\n### Raises:\n- HTTPException: with status 500 and a detail object containing `response`\n  and `cause` when service configuration is wrong or incomplete.\n- HTTPException: with status 503 and a detail object containing `response`\n  and `cause` when unable to connect to Llama Stack.\n\n### Returns:\n- AgentCard: The agent card describing this agent's capabilities.",
                 "operationId": "get_agent_card__well_known_agent_json_get",
                 "responses": {
                     "200": {
@@ -10687,7 +10687,7 @@
                     "a2a"
                 ],
                 "summary": "A2A Health Check",
-                "description": "Health check endpoint for A2A service.\n\nReturns:\n    Dict with health status information.",
+                "description": "Health check endpoint for A2A service.\n\n### Parameters:\n- None\n\n### Raises:\n- None\n\n### Returns:\n- Dict with health status information.",
                 "operationId": "a2a_health_check_a2a_health_get",
                 "responses": {
                     "200": {
diff --git a/docs/openapi.md b/docs/openapi.md
index f4bcbdd38..124c5d782 100644
--- a/docs/openapi.md
+++ b/docs/openapi.md
@@ -493,6 +493,11 @@ Handle GET requests to the root ("/") endpoint and returns the static HTML index
 | 401         | Unauthorized        | [UnauthorizedResponse](#unauthorizedresponse) |
 | 403         | Permission denied   | [ForbiddenResponse](#forbiddenresponse)       |
 
+Examples
+
+
+
+
 
 ```json
 {
@@ -503,6 +508,9 @@ Handle GET requests to the root ("/") endpoint and returns the static HTML index
 }
 ```
 
+
+
+
 ```json
 {
   "detail": {
@@ -512,6 +520,81 @@ Handle GET requests to the root ("/") endpoint and returns the static HTML index
 }
 ```
 
+
+
+
+```json
+{
+  "detail": {
+    "cause": "Token has expired",
+    "response": "Missing or invalid credentials provided by client"
+  }
+}
+```
+
+
+
+
+```json
+{
+  "detail": {
+    "cause": "Invalid token signature",
+    "response": "Missing or invalid credentials provided by client"
+  }
+}
+```
+
+
+
+
+```json
+{
+  "detail": {
+    "cause": "Token signed by unknown key",
+    "response": "Missing or invalid credentials provided by client"
+  }
+}
+```
+
+
+
+
+```json
+{
+  "detail": {
+    "cause": "Token missing claim: user_id",
+    "response": "Missing or invalid credentials provided by client"
+  }
+}
+```
+
+
+
+
+```json
+{
+  "detail": {
+    "cause": "Invalid or expired Kubernetes token",
+    "response": "Missing or invalid credentials provided by client"
+  }
+}
+```
+
+
+
+
+```json
+{
+  "detail": {
+    "cause": "Authentication key server returned invalid data",
+    "response": "Missing or invalid credentials provided by client"
+  }
+}
+```
+
+
+
+
 ```json
 {
   "detail": {
@@ -521,6 +604,19 @@ Handle GET requests to the root ("/") endpoint and returns the static HTML index
 }
 ```
 
+
+
+
+
+```json
+{
+  "detail": {
+    "cause": "Failed to connect to Kubernetes API: Service Unavailable (status 503)",
+    "response": "Unable to connect to Kubernetes API"
+  }
+}
+```
+
 ## GET `/v1/info`
 
 > **Info Endpoint Handler**
@@ -558,6 +654,10 @@ service name, version and Llama-stack version.
 
 Examples
 
+
+
+
+
 ```json
 {
   "detail": {
@@ -567,6 +667,9 @@ Examples
 }
 ```
 
+
+
+
 ```json
 {
   "detail": {
@@ -576,6 +679,9 @@ Examples
 }
 ```
 
+
+
+
 ```json
 {
   "detail": {
@@ -585,6 +691,9 @@ Examples
 }
 ```
 
+
+
+
 ```json
 {
   "detail": {
@@ -594,6 +703,9 @@ Examples
 }
 ```
 
+
+
+
 ```json
 {
   "detail": {
@@ -603,6 +715,9 @@ Examples
 }
 ```
 
+
+
+
 ```json
 {
   "detail": {
@@ -612,6 +727,9 @@ Examples
 }
 ```
 
+
+
+
 ```json
 {
   "detail": {
@@ -621,6 +739,9 @@ Examples
 }
 ```
 
+
+
+
 ```json
 {
   "detail": {
@@ -648,6 +769,18 @@ Examples
 }
 ```
 
+
+
+
+```json
+{
+  "detail": {
+    "cause": "Failed to connect to Kubernetes API: Service Unavailable (status 503)",
+    "response": "Unable to connect to Kubernetes API"
+  }
+}
+```
+
 ## GET `/v1/models`
 
 > **Models Endpoint Handler**
@@ -704,6 +837,10 @@ will be returned.
 
 Examples
 
+
+
+
+
 ```json
 {
   "detail": {
@@ -713,6 +850,9 @@ Examples
 }
 ```
 
+
+
+
 ```json
 {
   "detail": {
@@ -722,6 +862,81 @@ Examples
 }
 ```
 
+
+
+
+```json
+{
+  "detail": {
+    "cause": "Token has expired",
+    "response": "Missing or invalid credentials provided by client"
+  }
+}
+```
+
+
+
+
+```json
+{
+  "detail": {
+    "cause": "Invalid token signature",
+    "response": "Missing or invalid credentials provided by client"
+  }
+}
+```
+
+
+
+
+```json
+{
+  "detail": {
+    "cause": "Token signed by unknown key",
+    "response": "Missing or invalid credentials provided by client"
+  }
+}
+```
+
+
+
+
+```json
+{
+  "detail": {
+    "cause": "Token missing claim: user_id",
+    "response": "Missing or invalid credentials provided by client"
+  }
+}
+```
+
+
+
+
+```json
+{
+  "detail": {
+    "cause": "Invalid or expired Kubernetes token",
+    "response": "Missing or invalid credentials provided by client"
+  }
+}
+```
+
+
+
+
+```json
+{
+  "detail": {
+    "cause": "Authentication key server returned invalid data",
+    "response": "Missing or invalid credentials provided by client"
+  }
+}
+```
+
+
+
+
 ```json
 {
   "detail": {
@@ -749,6 +964,18 @@ Examples
 }
 ```
 
+
+
+
+```json
+{
+  "detail": {
+    "cause": "Failed to connect to Kubernetes API: Service Unavailable (status 503)",
+    "response": "Unable to connect to Kubernetes API"
+  }
+}
+```
+
 ## GET `/v1/tools`
 
 > **Tools Endpoint Handler**
@@ -794,6 +1021,10 @@ available tools from all configured MCP servers.
 
 Examples
 
+
+
+
+
 ```json
 {
   "detail": {
@@ -803,6 +1034,9 @@ Examples
 }
 ```
 
+
+
+
 ```json
 {
   "detail": {
@@ -812,30 +1046,50 @@ Examples
 }
 ```
 
+
+
+
 ```json
 {
   "detail": {
-    "cause": "User 6789 is not authorized to access this endpoint.",
-    "response": "User does not have permission to access this endpoint"
+    "cause": "Token has expired",
+    "response": "Missing or invalid credentials provided by client"
   }
 }
 ```
 
 
+
+
 ```json
 {
   "detail": {
-    "cause": "Lightspeed Stack configuration has not been initialized.",
-    "response": "Configuration is not loaded"
+    "cause": "Invalid token signature",
+    "response": "Missing or invalid credentials provided by client"
+  }
+}
+```
+
+
+
+
+```json
+{
+  "detail": {
+    "cause": "Token signed by unknown key",
+    "response": "Missing or invalid credentials provided by client"
   }
 }
 ```
 
+
+
+
 ```json
 {
   "detail": {
-    "cause": "Connection error while trying to reach backend service.",
-    "response": "Unable to connect to Llama Stack"
+    "cause": "Token missing claim: user_id",
+    "response": "Missing or invalid credentials provided by client"
   }
 }
 ```
@@ -877,6 +1131,10 @@ authenticate with using their own tokens.
 
 Examples
 
+
+
+
+
 ```json
 {
   "detail": {
@@ -898,6 +1156,8 @@ Examples
 }
 ```
 
+
+
 ```json
 {
   "detail": {
@@ -907,6 +1167,9 @@ Examples
 }
 ```
 
+
+
+
 ```json
 {
   "detail": {
@@ -917,6 +1180,73 @@ Examples
 ```
 
 
+
+```json
+{
+  "detail": {
+    "cause": "User 6789 is not authorized to access this endpoint.",
+    "response": "User does not have permission to access this endpoint"
+  }
+}
+```
+
+
+
+
+```json
+{
+  "detail": {
+    "cause": "Lightspeed Stack configuration has not been initialized.",
+    "response": "Configuration is not loaded"
+  }
+}
+```
+
+
+
+
+```json
+{
+  "detail": {
+    "cause": "Token signed by unknown key",
+    "response": "Missing or invalid credentials provided by client"
+  }
+}
+```
+
+
+
+
+```json
+{
+  "detail": {
+    "cause": "Token missing claim: user_id",
+    "response": "Missing or invalid credentials provided by client"
+  }
+}
+```
+
+```json
+{
+  "detail": {
+    "cause": "Invalid or expired Kubernetes token",
+    "response": "Missing or invalid credentials provided by client"
+  }
+}
+```
+
+
+
+
+```json
+{
+  "detail": {
+    "cause": "Authentication key server returned invalid data",
+    "response": "Missing or invalid credentials provided by client"
+  }
+}
+```
+
 ## GET `/v1/mcp-servers`
 
 > **List Mcp Servers Handler**
@@ -948,6 +1278,7 @@ registered (via API) MCP servers.
 | 401         | Unauthorized          | [UnauthorizedResponse](#unauthorizedresponse)               |
 | 403         | Permission denied     | [ForbiddenResponse](#forbiddenresponse)                     |
 | 500         | Internal server error | [InternalServerErrorResponse](#internalservererrorresponse) |
+| 503         | Service unavailable   | [ServiceUnavailableResponse](#serviceunavailableresponse)   |
 
 Examples
 
@@ -1788,9 +2119,13 @@ returned. For example:
 - version: Optional version number (latest when omitted).
 
 ### Raises:
-- HTTPException: If configuration is not loaded, if the prompt is not
-  found, if unable to connect to Llama Stack, or if the prompts API returns
-  an error response.
+- HTTPException: with status 401 for unauthorized access.
+- HTTPException: with status 403 if permission is denied.
+- HTTPException: with status 404 if prompt is not found.
+- HTTPException: with status 500 and a detail object containing `response`
+  and `cause` when service configuration is wrong or incomplete.
+- HTTPException: with status 503 and a detail object containing `response`
+  and `cause` when unable to connect to Llama Stack.
 
 ### Returns:
 - PromptResourceResponse: The requested prompt object.
@@ -1807,15 +2142,16 @@ returned. For example:
 
 ### ✅ Responses
 
-| Status Code | Description           | Component                                                   |
-|-------------|-----------------------|-------------------------------------------------------------|
-| 200         | Successful response   | [PromptResourceResponse](#promptresourceresponse)           |
-| 401         | Unauthorized          | [UnauthorizedResponse](#unauthorizedresponse)               |
-| 403         | Permission denied     | [ForbiddenResponse](#forbiddenresponse)                     |
-| 404         | Resource not found    | [NotFoundResponse](#notfoundresponse)                       |
-| 500         | Internal server error | [InternalServerErrorResponse](#internalservererrorresponse) |
-| 503         | Service unavailable   | [ServiceUnavailableResponse](#serviceunavailableresponse)   |
-| 422         | Validation Error      | [HTTPValidationError](#httpvalidationerror)                 |
+| Status Code | Description            | Component                                                   |
+|-------------|------------------------|-------------------------------------------------------------|
+| 200         | Successful response    | [PromptResourceResponse](#promptresourceresponse)           |
+| 400         | Invalid request format | [BadRequestResponse](#badrequestresponse)                   |
+| 401         | Unauthorized           | [UnauthorizedResponse](#unauthorizedresponse)               |
+| 403         | Permission denied      | [ForbiddenResponse](#forbiddenresponse)                     |
+| 404         | Resource not found     | [NotFoundResponse](#notfoundresponse)                       |
+| 500         | Internal server error  | [InternalServerErrorResponse](#internalservererrorresponse) |
+| 503         | Service unavailable    | [ServiceUnavailableResponse](#serviceunavailableresponse)   |
+| 422         | Validation Error       | [HTTPValidationError](#httpvalidationerror)                 |
 
 
 
@@ -1948,15 +2284,16 @@ For example:
 
 ### ✅ Responses
 
-| Status Code | Description           | Component                                                   |
-|-------------|-----------------------|-------------------------------------------------------------|
-| 200         | Successful response   | [PromptResourceResponse](#promptresourceresponse)           |
-| 401         | Unauthorized          | [UnauthorizedResponse](#unauthorizedresponse)               |
-| 403         | Permission denied     | [ForbiddenResponse](#forbiddenresponse)                     |
-| 404         | Resource not found    | [NotFoundResponse](#notfoundresponse)                       |
-| 500         | Internal server error | [InternalServerErrorResponse](#internalservererrorresponse) |
-| 503         | Service unavailable   | [ServiceUnavailableResponse](#serviceunavailableresponse)   |
-| 422         | Validation Error      | [HTTPValidationError](#httpvalidationerror)                 |
+| Status Code | Description            | Component                                                   |
+|-------------|------------------------|-------------------------------------------------------------|
+| 200         | Successful response    | [PromptResourceResponse](#promptresourceresponse)           |
+| 400         | Invalid request format | [BadRequestResponse](#badrequestresponse)                   |
+| 401         | Unauthorized           | [UnauthorizedResponse](#unauthorizedresponse)               |
+| 403         | Permission denied      | [ForbiddenResponse](#forbiddenresponse)                     |
+| 404         | Resource not found     | [NotFoundResponse](#notfoundresponse)                       |
+| 500         | Internal server error  | [InternalServerErrorResponse](#internalservererrorresponse) |
+| 503         | Service unavailable    | [ServiceUnavailableResponse](#serviceunavailableresponse)   |
+| 422         | Validation Error       | [HTTPValidationError](#httpvalidationerror)                 |
 
 Examples
 
@@ -6268,6 +6605,7 @@ Inference configuration.
 |-------|------|-------------|
 | default_model |  | Identification of default model used when no other model is specified. |
 | default_provider |  | Identification of default provider used when no other model is specified. |
+| context_windows | object | Map of fully-qualified model identifier (e.g., "openai/gpt-4o-mini") to context window size in tokens. Used by the conversation compaction trigger to decide when older turns must be summarized before the input exceeds the window. Models absent from this map have no registered window — callers fall back to their own default or skip the token-based trigger. |
 
 
 ## InfoResponse
@@ -6389,11 +6727,6 @@ Model representing a response to a liveness request.
 Attributes:
     alive: If app is alive.
 
-Example:
-    ```python
-    liveness_response = LivenessResponse(alive=True)
-    ```
-
 
 | Field | Type | Description |
 |-------|------|-------------|
@@ -6424,6 +6757,9 @@ Useful resources:
 | use_as_library_client |  | When set to true Llama Stack will be used in library mode, not in server mode (default) |
 | library_client_config_path |  | Path to configuration file used when Llama Stack is run in library mode |
 | timeout | integer | Timeout in seconds for requests to Llama Stack service. Default is 180 seconds (3 minutes) to accommodate long-running RAG queries. |
+| max_retries | integer | Maximum number of connection attempts before giving up. Used on startup to connect to Llama Stack and retrieve its version. Connection attempts are retried with a fixed delay to handle the case where Llama Stack is still starting up (e.g., when running as a sidecar in the same pod). |
+| retry_delay | integer | Delay in seconds between retry attempts. Used on startup to connect to Llama Stack and retrieve its version. Connection attempts are retried with a fixed delay to handle the case where Llama Stack is still starting up (e.g., when running as a sidecar in the same pod). |
+| allow_degraded_mode |  | If enabled, Lightspeed Core can be started even when Llama Stack is not accessible (valid for server mode only) |
 
 
 ## MCPClientAuthOptionsResponse
@@ -6431,6 +6767,9 @@ Useful resources:
 
 Response containing MCP servers that accept client-provided authorization.
 
+Attributes:
+    servers: MCP servers that declare client authentication headers.
+
 
 | Field | Type | Description |
 |-------|------|-------------|
@@ -6469,13 +6808,19 @@ Information about MCP server client authentication options.
 ## MCPServerDeleteResponse
 
 
-Response for a successful MCP server deletion.
+Response indicating the outcome of an MCP server delete operation.
+
+Attributes:
+    name: Name of the MCP server targeted for deletion.
+    deleted: Whether the server was successfully deleted (True) or not found (False).
+    response: Description of the result, e.g. "MCP server deleted successfully".
 
 
 | Field | Type | Description |
 |-------|------|-------------|
-| name | string | Deleted MCP server name |
-| message | string | Status message |
+| deleted | boolean | Whether the deletion was successful. |
+| name | string | MCP server name that was passed to delete. |
+| response | string | Human-readable outcome of the delete operation. |
 
 
 ## MCPServerInfo
@@ -6503,6 +6848,9 @@ Attributes:
 
 Response listing all registered MCP servers.
 
+Attributes:
+    servers: All registered MCP servers (static and dynamic).
+
 
 | Field | Type | Description |
 |-------|------|-------------|
@@ -6522,14 +6870,6 @@ Attributes:
     headers: Optional list of HTTP header names to forward from incoming requests.
     timeout: Optional request timeout in seconds.
 
-Example:
-    ```python
-    request = MCPServerRegistrationRequest(
-        name="my-tools",
-        url="http://localhost:8888/mcp",
-    )
-    ```
-
 
 | Field | Type | Description |
 |-------|------|-------------|
@@ -7625,11 +7965,7 @@ Attributes:
     media_type: The optional media type for response format (application/json or text/plain).
     vector_store_ids: The optional list of specific vector store IDs to query for RAG.
     shield_ids: The optional list of safety shield IDs to apply.
-
-Example:
-    ```python
-    query_request = QueryRequest(query="Tell me about Kubernetes")
-    ```
+    solr: Optional Solr inline RAG options (mode, filters) or legacy filter-only dict.
 
 
 | Field | Type | Description |
@@ -7848,21 +8184,6 @@ Attributes:
     reason: The reason for the readiness.
     providers: List of unhealthy providers in case of readiness failure.
 
-Example:
-    ```python
-    readiness_response = ReadinessResponse(
-        ready=False,
-        reason="Service is not ready",
-        providers=[
-            ProviderHealthStatus(
-                provider_id="ollama",
-                status="unhealthy",
-                message="Server is unavailable"
-            )
-        ]
-    )
-    ```
-
 
 | Field | Type | Description |
 |-------|------|-------------|
@@ -8428,14 +8749,6 @@ Attributes:
     functionality: The functionality of the service.
     status: The status of the service.
 
-Example:
-    ```python
-    status_response = StatusResponse(
-        functionality="feedback",
-        status={"enabled": True},
-    )
-    ```
-
 
 | Field | Type | Description |
 |-------|------|-------------|
@@ -8467,15 +8780,6 @@ Attributes:
     interrupted: Whether an in-progress stream was interrupted.
     message: Human-readable interruption status message.
 
-Example:
-    ```python
-    response = StreamingInterruptResponse(
-        request_id="123e4567-e89b-12d3-a456-426614174000",
-        interrupted=True,
-        message="Streaming request interrupted",
-    )
-    ```
-
 
 | Field | Type | Description |
 |-------|------|-------------|
@@ -8549,6 +8853,30 @@ Model representing a response to tools request.
 | tools | array | List of tools available from all configured MCP servers and built-in toolgroups |
 
 
+## TrustedProxyConfiguration
+
+
+Configuration for trusted-proxy auth module.
+
+
+| Field | Type | Description |
+|-------|------|-------------|
+| user_header | string | HTTP header containing the forwarded user identity. |
+| allowed_service_accounts |  | Optional allowlist of Kubernetes ServiceAccount identities permitted to act as trusted proxies. When set to null/omitted, any ServiceAccount with a valid token is accepted. When set to a non-empty list, only the listed ServiceAccounts are allowed. An empty list behaves the same as null (no restriction). |
+
+
+## TrustedProxyServiceAccount
+
+
+A Kubernetes ServiceAccount identity for trusted-proxy allowlist.
+
+
+| Field | Type | Description |
+|-------|------|-------------|
+| namespace | string | Kubernetes namespace of the ServiceAccount. |
+| name | string | Name of the Kubernetes ServiceAccount. |
+
+
 ## UnauthorizedResponse
 
 
diff --git a/src/app/endpoints/a2a.py b/src/app/endpoints/a2a.py
index c8de56e91..99c53b8d6 100644
--- a/src/app/endpoints/a2a.py
+++ b/src/app/endpoints/a2a.py
@@ -685,8 +685,17 @@ async def get_agent_card(  # pylint: disable=unused-argument
     This endpoint provides the agent card that describes Lightspeed's
     capabilities according to the A2A protocol specification.
 
-    Returns:
-        AgentCard: The agent card describing this agent's capabilities.
+    ### Parameters:
+    - auth: Authentication tuple from the auth dependency (used by middleware).
+
+    ### Raises:
+    - HTTPException: with status 500 and a detail object containing `response`
+      and `cause` when service configuration is wrong or incomplete.
+    - HTTPException: with status 503 and a detail object containing `response`
+      and `cause` when unable to connect to Llama Stack.
+
+    ### Returns:
+    - AgentCard: The agent card describing this agent's capabilities.
     """
     try:
         logger.info("Serving A2A Agent Card")
@@ -757,23 +766,29 @@ async def handle_a2a_jsonrpc_get(
     Thin wrapper that delegates to ``_handle_a2a_jsonrpc`` so GET and POST share
     the same processing path while keeping distinct OpenAPI operation metadata.
 
-    Args:
-        request: Incoming ASGI/FastAPI request (body, scope, headers).
-        auth: Resolved authentication tuple from ``auth_dependency`` (user
-            identity and bearer token used to build the per-request A2A app).
-        mcp_headers: MCP-related headers from ``mcp_headers_dependency``, forwarded
-            into the A2A executor for downstream tool/context propagation.
+    ### Parameters:
+    - request: Incoming ASGI/FastAPI request (body, scope, headers).
+    - auth: Resolved authentication tuple from ``auth_dependency`` (user
+      identity and bearer token used to build the per-request A2A app).
+    - mcp_headers: MCP-related headers from ``mcp_headers_dependency``, forwarded
+      into the A2A executor for downstream tool/context propagation.
+
+    ### Raises:
+    - HTTPException: with status 401 for unauthorized access.
+    - HTTPException: with status 403 if permission is denied.
+    - HTTPException: with status 500 and a detail object containing `response`
+      and `cause` when service configuration is wrong or incomplete.
+    - HTTPException: with status 503 and a detail object containing `response`
+      and `cause` when unable to connect to Llama Stack.
+
+    ### Returns:
+    - ``Response`` with the full buffered JSON-RPC (or HTTP)
+      payload when the request is non-streaming, or
+      ``StreamingResponse`` (SSE) when the JSON-RPC method is
+      ``message/stream`` and chunks are streamed to the client.
+      Error conditions are generally expressed as JSON-RPC or HTTP
+      responses rather than by raising from this wrapper.
 
-    Returns:
-        ``Response`` with the full buffered JSON-RPC (or HTTP) payload when the
-        request is non-streaming, or ``StreamingResponse`` (SSE) when the
-        JSON-RPC method is ``message/stream`` and chunks are streamed to the
-        client. Error conditions are generally expressed as JSON-RPC or HTTP
-        responses rather than by raising from this wrapper.
-
-    Raises:
-        HTTPException: If authentication or ``@authorize`` rejects the request
-            before or while entering the handler chain.
     """
     return await _handle_a2a_jsonrpc(request, auth, mcp_headers)
 
@@ -799,23 +814,27 @@ async def handle_a2a_jsonrpc_post(
     Thin wrapper that delegates to ``_handle_a2a_jsonrpc`` so GET and POST share
     the same processing path while keeping distinct OpenAPI operation metadata.
 
-    Args:
-        request: Incoming ASGI/FastAPI request (body, scope, headers).
-        auth: Resolved authentication tuple from ``auth_dependency`` (user
-            identity and bearer token used to build the per-request A2A app).
-        mcp_headers: MCP-related headers from ``mcp_headers_dependency``, forwarded
-            into the A2A executor for downstream tool/context propagation.
+    ### Parameters:
+    - request: Incoming ASGI/FastAPI request (body, scope, headers).
+    - auth: Resolved authentication tuple from ``auth_dependency`` (user
+      identity and bearer token used to build the per-request A2A app).
+    - mcp_headers: MCP-related headers from ``mcp_headers_dependency``, forwarded
+      into the A2A executor for downstream tool/context propagation.
+
+    ### Raises:
+    - HTTPException: with status 401 for unauthorized access.
+    - HTTPException: with status 403 if permission is denied.
+    - HTTPException: with status 503 and a detail object containing `response`
+      and `cause` when unable to connect to Llama Stack.
+
+    ### Returns:
+    - ``Response`` with the full buffered JSON-RPC (or HTTP)
+      payload when the request is non-streaming, or
+      ``StreamingResponse`` (SSE) when the JSON-RPC method is
+      ``message/stream`` and chunks are streamed to the client.
+      Error conditions are generally expressed as JSON-RPC or HTTP
+      responses rather than by raising from this wrapper.
 
-    Returns:
-        ``Response`` with the full buffered JSON-RPC (or HTTP) payload when the
-        request is non-streaming, or ``StreamingResponse`` (SSE) when the
-        JSON-RPC method is ``message/stream`` and chunks are streamed to the
-        client. Error conditions are generally expressed as JSON-RPC or HTTP
-        responses rather than by raising from this wrapper.
-
-    Raises:
-        HTTPException: If authentication or ``@authorize`` rejects the request
-            before or while entering the handler chain.
     """
     return await _handle_a2a_jsonrpc(request, auth, mcp_headers)
 
@@ -1008,8 +1027,14 @@ async def a2a_health_check() -> dict[str, str]:
     """
     Health check endpoint for A2A service.
 
-    Returns:
-        Dict with health status information.
+    ### Parameters:
+    - None
+
+    ### Raises:
+    - None
+
+    ### Returns:
+    - Dict with health status information.
     """
     return {
         "status": "healthy",
diff --git a/src/app/endpoints/prompts.py b/src/app/endpoints/prompts.py
index fc35da82f..008a97d5d 100644
--- a/src/app/endpoints/prompts.py
+++ b/src/app/endpoints/prompts.py
@@ -118,8 +118,13 @@ async def create_prompt_handler(
     - body: Prompt creation parameters.
 
     ### Raises:
-    - HTTPException: If configuration is not loaded, if unable to connect to
-      Llama Stack, or if the prompts API returns an error response.
+    - HTTPException: with status 401 for unauthorized access.
+    - HTTPException: with status 403 if permission is denied.
+    - HTTPException: with status 422 if the request body is improper.
+    - HTTPException: with status 500 and a detail object containing `response`
+      and `cause` when service configuration is wrong or incomplete.
+    - HTTPException: with status 503 and a detail object containing `response`
+      and `cause` when unable to connect to Llama Stack.
 
     ### Returns:
     - PromptResourceResponse: The created prompt as returned by Llama Stack.
@@ -163,8 +168,12 @@ async def list_prompts_handler(
     - auth: Authentication tuple from the auth dependency (used by middleware).
 
     ### Raises:
-    - HTTPException: If configuration is not loaded, if unable to connect to
-      Llama Stack, or if the prompts API returns an error response.
+    - HTTPException: with status 401 for unauthorized access.
+    - HTTPException: with status 403 if permission is denied.
+    - HTTPException: with status 500 and a detail object containing `response`
+      and `cause` when service configuration is wrong or incomplete.
+    - HTTPException: with status 503 and a detail object containing `response`
+      and `cause` when unable to connect to Llama Stack.
 
     ### Returns:
     - PromptsListResponse: An object containing the list of prompts.
@@ -213,9 +222,13 @@ async def get_prompt_handler(
     - version: Optional version number (latest when omitted).
 
     ### Raises:
-    - HTTPException: If configuration is not loaded, if the prompt is not
-      found, if unable to connect to Llama Stack, or if the prompts API returns
-      an error response.
+    - HTTPException: with status 401 for unauthorized access.
+    - HTTPException: with status 403 if permission is denied.
+    - HTTPException: with status 404 if prompt is not found.
+    - HTTPException: with status 500 and a detail object containing `response`
+      and `cause` when service configuration is wrong or incomplete.
+    - HTTPException: with status 503 and a detail object containing `response`
+      and `cause` when unable to connect to Llama Stack.
 
     ### Returns:
     - PromptResourceResponse: The requested prompt object.
diff --git a/src/app/endpoints/providers.py b/src/app/endpoints/providers.py
index 41184a005..e6cb8ed07 100644
--- a/src/app/endpoints/providers.py
+++ b/src/app/endpoints/providers.py
@@ -136,16 +136,17 @@ async def get_provider_endpoint_handler(
     - provider_id: Provider identification string
     - auth: Authentication tuple from the auth dependency.
 
+    ### Raises:
+    - HTTPException: with status 401 for unauthorized access.
+    - HTTPException: with status 403 if permission is denied.
+    - HTTPException: with status 404 if provider is not found.
+    - HTTPException: with status 500 and a detail object containing `response`
+      and `cause` when service configuration is wrong or incomplete.
+    - HTTPException: with status 503 and a detail object containing `response`
+      and `cause` when unable to connect to Llama Stack.
+
     ### Returns:
     - ProviderResponse: Provider details.
-
-    ### Raises:
-    - HTTPException:
-    - 401: Authentication failed
-    - 403: Authorization failed
-    - 404: Provider not found
-    - 500: Lightspeed Stack configuration not loaded
-    - 503: Unable to connect to Llama Stack
     """
     # Used only by the middleware
     _ = auth