Merge pull request #1180 from tisnik/lcore-1237-updated-documentation-for-models-endpoint

tisnik · web-flow · commit c706062aa1a7 · 2026-02-19T10:45:10.000+01:00
LCORE-1237: updated documentation for models endpoint
diff --git a/README.md b/README.md
@@ -1045,6 +1045,60 @@ The liveness endpoint performs a basic health check to verify the service is ali
 }
 ```
 
+## Models endpoint
+
+**Endpoint:** `GET /v1/models`
+
+Process GET requests and returns a list of available models from the Llama
+Stack service. It is possible to specify "model_type" query parameter that is
+used as a filter. For example, if model type is set to "llm", only LLM models
+will be returned:
+
+curl http://localhost:8080/v1/models?model_type=llm
+
+The "model_type" query parameter is optional. When not specified, all models
+will be returned.
+
+**Response Body:**
+```json
+{
+  "models": [
+    {
+      "identifier": "sentence-transformers/.llama",
+      "metadata": {
+        "embedding_dimension": 384
+      },
+      "api_model_type": "embedding",
+      "provider_id": "sentence-transformers",
+      "type": "model",
+      "provider_resource_id": ".llama",
+      "model_type": "embedding"
+    },
+    {
+      "identifier": "openai/gpt-4o-mini",
+      "metadata": {},
+      "api_model_type": "llm",
+      "provider_id": "openai",
+      "type": "model",
+      "provider_resource_id": "gpt-4o-mini",
+      "model_type": "llm"
+    },
+    {
+      "identifier": "sentence-transformers/nomic-ai/nomic-embed-text-v1.5",
+      "metadata": {
+        "embedding_dimension": 768
+      },
+      "api_model_type": "embedding",
+      "provider_id": "sentence-transformers",
+      "type": "model",
+      "provider_resource_id": "nomic-ai/nomic-embed-text-v1.5",
+      "model_type": "embedding"
+    }
+  ]
+}
+```
+
+
 # Database structure
 
 Database structure is described on [this page](https://lightspeed-core.github.io/lightspeed-stack/DB/index.html)
diff --git a/docs/openapi.json b/docs/openapi.json
@@ -245,7 +245,7 @@
                     "models"
                 ],
                 "summary": "Models Endpoint Handler",
-                "description": "Handle requests to the /models endpoint.\n\nProcess GET requests to the /models endpoint, returning a list of available\nmodels from the Llama Stack service.\n\nParameters:\n    request: The incoming HTTP request.\n    auth: Authentication tuple from the auth dependency.\n    model_type: Optional filter to return only models matching this type.\n\nRaises:\n    HTTPException: If unable to connect to the Llama Stack server or if\n    model retrieval fails for any reason.\n\nReturns:\n    ModelsResponse: An object containing the list of available models.",
+                "description": "Handle requests to the /models endpoint.\n\nProcess GET requests to the /models endpoint, returning a list of available\nmodels from the Llama Stack service. It is possible to specify \"model_type\"\nquery parameter that is used as a filter. For example, if model type is set\nto \"llm\", only LLM models will be returned:\n\n    curl http://localhost:8080/v1/models?model_type=llm\n\nThe \"model_type\" query parameter is optional. When not specified, all models\nwill be returned.\n\n## Parameters:\n    request: The incoming HTTP request.\n    auth: Authentication tuple from the auth dependency.\n    model_type: Optional filter to return only models matching this type.\n\n## Raises:\n    HTTPException: If unable to connect to the Llama Stack server or if\n    model retrieval fails for any reason.\n\n## Returns:\n    ModelsResponse: An object containing the list of available models.",
                 "operationId": "models_endpoint_handler_v1_models_get",
                 "parameters": [
                     {
@@ -3763,6 +3763,26 @@
                             }
                         }
                     },
+                    "413": {
+                        "description": "Prompt is too long",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/PromptTooLongResponse"
+                                },
+                                "examples": {
+                                    "prompt too long": {
+                                        "value": {
+                                            "detail": {
+                                                "cause": "The prompt exceeds the maximum allowed length.",
+                                                "response": "Prompt is too long"
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    },
                     "422": {
                         "description": "Request validation failed",
                         "content": {
@@ -7201,7 +7221,7 @@
                         },
                         "type": "object",
                         "title": "Authorization headers",
-                        "description": "Headers to send to the MCP server. The map contains the header name and the path to a file containing the header value (secret). There are 2 special cases: 1. Usage of the kubernetes token in the header. To specify this use a string 'kubernetes' instead of the file path. 2. Usage of the client provided token in the header. To specify this use a string 'client' instead of the file path."
+                        "description": "Headers to send to the MCP server. The map contains the header name and the path to a file containing the header value (secret). There are 3 special cases: 1. Usage of the kubernetes token in the header. To specify this use a string 'kubernetes' instead of the file path. 2. Usage of the client-provided token in the header. To specify this use a string 'client' instead of the file path. 3. Usage of the oauth token in the header. To specify this use a string 'oauth' instead of the file path. "
                     },
                     "timeout": {
                         "anyOf": [
@@ -7565,6 +7585,33 @@
                 "title": "PostgreSQLDatabaseConfiguration",
                 "description": "PostgreSQL database configuration.\n\nPostgreSQL database is used by Lightspeed Core Stack service for storing\ninformation about conversation IDs. It can also be leveraged to store\nconversation history and information about quota usage.\n\nUseful resources:\n\n- [Psycopg: connection classes](https://www.psycopg.org/psycopg3/docs/api/connections.html)\n- [PostgreSQL connection strings](https://www.connectionstrings.com/postgresql/)\n- [How to Use PostgreSQL in Python](https://www.freecodecamp.org/news/postgresql-in-python/)"
             },
+            "PromptTooLongResponse": {
+                "properties": {
+                    "status_code": {
+                        "type": "integer",
+                        "title": "Status Code"
+                    },
+                    "detail": {
+                        "$ref": "#/components/schemas/DetailModel"
+                    }
+                },
+                "type": "object",
+                "required": [
+                    "status_code",
+                    "detail"
+                ],
+                "title": "PromptTooLongResponse",
+                "description": "413 Payload Too Large - Prompt is too long.",
+                "examples": [
+                    {
+                        "detail": {
+                            "cause": "The prompt exceeds the maximum allowed length.",
+                            "response": "Prompt is too long"
+                        },
+                        "label": "prompt too long"
+                    }
+                ]
+            },
             "ProviderHealthStatus": {
                 "properties": {
                     "provider_id": {
diff --git a/docs/openapi.md b/docs/openapi.md
@@ -247,18 +247,25 @@ Examples
 Handle requests to the /models endpoint.
 
 Process GET requests to the /models endpoint, returning a list of available
-models from the Llama Stack service.
+models from the Llama Stack service. It is possible to specify "model_type"
+query parameter that is used as a filter. For example, if model type is set
+to "llm", only LLM models will be returned:
 
-Parameters:
+    curl http://localhost:8080/v1/models?model_type=llm
+
+The "model_type" query parameter is optional. When not specified, all models
+will be returned.
+
+## Parameters:
     request: The incoming HTTP request.
     auth: Authentication tuple from the auth dependency.
     model_type: Optional filter to return only models matching this type.
 
-Raises:
+## Raises:
     HTTPException: If unable to connect to the Llama Stack server or if
     model retrieval fails for any reason.
 
-Returns:
+## Returns:
     ModelsResponse: An object containing the list of available models.
 
 
@@ -275,14 +282,14 @@ Returns:
 | Status Code | Description | Component |
 |-------------|-------------|-----------|
 | 200 | Successful response | [ModelsResponse](#modelsresponse) |
-| 401 | Unauthorized | [UnauthorizedResponse](#unauthorizedresponse)
+| 401 | Unauthorized | [UnauthorizedResponse](#unauthorizedresponse) |
+| 403 | Permission denied | [ForbiddenResponse](#forbiddenresponse) |
+| 500 | Internal server error | [InternalServerErrorResponse](#internalservererrorresponse) |
+| 503 | Service unavailable | [ServiceUnavailableResponse](#serviceunavailableresponse) |
+| 422 | Validation Error | [HTTPValidationError](#httpvalidationerror) |
 
 Examples
 
-
-
-
-
 ```json
 {
   "detail": {
@@ -292,9 +299,6 @@ Examples
 }
 ```
 
-
-
-
 ```json
 {
   "detail": {
@@ -303,14 +307,6 @@ Examples
   }
 }
 ```
- |
-| 403 | Permission denied | [ForbiddenResponse](#forbiddenresponse)
-
-Examples
-
-
-
-
 
 ```json
 {
@@ -320,14 +316,6 @@ Examples
   }
 }
 ```
- |
-| 500 | Internal server error | [InternalServerErrorResponse](#internalservererrorresponse)
-
-Examples
-
-
-
-
 
 ```json
 {
@@ -337,14 +325,6 @@ Examples
   }
 }
 ```
- |
-| 503 | Service unavailable | [ServiceUnavailableResponse](#serviceunavailableresponse)
-
-Examples
-
-
-
-
 
 ```json
 {
@@ -354,8 +334,7 @@ Examples
   }
 }
 ```
- |
-| 422 | Validation Error | [HTTPValidationError](#httpvalidationerror) |
+
 ## GET `/v1/tools`
 
 > **Tools Endpoint Handler**
@@ -3275,6 +3254,23 @@ Examples
     "response": "User does not have permission to access this endpoint"
   }
 }
+```
+ |
+| 413 | Prompt is too long | [PromptTooLongResponse](#prompttoolongresponse)
+
+Examples
+
+
+
+
+
+```json
+{
+  "detail": {
+    "cause": "The prompt exceeds the maximum allowed length.",
+    "response": "Prompt is too long"
+  }
+}
 ```
  |
 | 422 | Request validation failed | [UnprocessableEntityResponse](#unprocessableentityresponse)
@@ -4945,7 +4941,7 @@ Useful resources:
 | name | string | MCP server name that must be unique |
 | provider_id | string | MCP provider identification |
 | url | string | URL of the MCP server |
-| authorization_headers | object | Headers to send to the MCP server. The map contains the header name and the path to a file containing the header value (secret). There are 2 special cases: 1. Usage of the kubernetes token in the header. To specify this use a string 'kubernetes' instead of the file path. 2. Usage of the client provided token in the header. To specify this use a string 'client' instead of the file path. |
+| authorization_headers | object | Headers to send to the MCP server. The map contains the header name and the path to a file containing the header value (secret). There are 3 special cases: 1. Usage of the kubernetes token in the header. To specify this use a string 'kubernetes' instead of the file path. 2. Usage of the client-provided token in the header. To specify this use a string 'client' instead of the file path. 3. Usage of the oauth token in the header. To specify this use a string 'oauth' instead of the file path.  |
 | timeout |  | Timeout in seconds for requests to the MCP server. If not specified, the default timeout from Llama Stack will be used. Note: This field is reserved for future use when Llama Stack adds timeout support. |
 
 
@@ -5067,6 +5063,18 @@ Useful resources:
 | ca_cert_path |  | Path to CA certificate |
 
 
+## PromptTooLongResponse
+
+
+413 Payload Too Large - Prompt is too long.
+
+
+| Field | Type | Description |
+|-------|------|-------------|
+| status_code | integer |  |
+| detail |  |  |
+
+
 ## ProviderHealthStatus
 
 
diff --git a/src/app/endpoints/models.py b/src/app/endpoints/models.py
@@ -83,18 +83,25 @@ async def models_endpoint_handler(
     Handle requests to the /models endpoint.
 
     Process GET requests to the /models endpoint, returning a list of available
-    models from the Llama Stack service.
+    models from the Llama Stack service. It is possible to specify "model_type"
+    query parameter that is used as a filter. For example, if model type is set
+    to "llm", only LLM models will be returned:
 
-    Parameters:
+        curl http://localhost:8080/v1/models?model_type=llm
+
+    The "model_type" query parameter is optional. When not specified, all models
+    will be returned.
+
+    ## Parameters:
         request: The incoming HTTP request.
         auth: Authentication tuple from the auth dependency.
         model_type: Optional filter to return only models matching this type.
 
-    Raises:
+    ## Raises:
         HTTPException: If unable to connect to the Llama Stack server or if
         model retrieval fails for any reason.
 
-    Returns:
+    ## Returns:
         ModelsResponse: An object containing the list of available models.
     """
     # Used only by the middleware