lightspeed-core
diff --git a/‎.github/workflows/e2e_tests.yaml‎
Lines changed: 60 additions & 7 deletions b/‎.github/workflows/e2e_tests.yaml‎
Lines changed: 60 additions & 7 deletions
diff --git a/‎README.md‎
Lines changed: 4 additions & 0 deletions b/‎README.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎docker-compose-library.yaml‎
Lines changed: 11 additions & 3 deletions b/‎docker-compose-library.yaml‎
Lines changed: 11 additions & 3 deletions
diff --git a/‎docker-compose.yaml‎
Lines changed: 11 additions & 3 deletions b/‎docker-compose.yaml‎
Lines changed: 11 additions & 3 deletions
diff --git a/‎docs/config.html‎
Lines changed: 19 additions & 0 deletions b/‎docs/config.html‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎docs/config.json‎
Lines changed: 37 additions & 4 deletions b/‎docs/config.json‎
Lines changed: 37 additions & 4 deletions
diff --git a/‎docs/config.md‎
Lines changed: 2 additions & 0 deletions b/‎docs/config.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/config.puml‎
Lines changed: 2 additions & 0 deletions b/‎docs/config.puml‎
Lines changed: 2 additions & 0 deletions
@@ -10,7 +10,7 @@ jobs:
       fail-fast: false
       matrix:
         mode: ["server", "library"]
-        environment: ["ci", "azure"]
+        environment: ["ci", "azure", "vertexai"]
 
     name: "E2E: ${{ matrix.mode }} mode / ${{ matrix.environment }}"
 
@@ -52,8 +52,7 @@ jobs:
       - name: Load lightspeed-stack.yaml configuration
         run: |
           MODE="${{ matrix.mode }}"
-          CONFIG_FILE="tests/e2e/configuration/lightspeed-stack-${MODE}-mode.yaml"
-          
+          CONFIG_FILE="tests/e2e/configuration/${MODE}-mode/lightspeed-stack.yaml"
           echo "Loading configuration for ${MODE} mode"
           echo "Source: ${CONFIG_FILE}"
           
@@ -91,6 +90,45 @@ jobs:
           echo "✅ Successfully obtained Azure access token."
           echo "AZURE_API_KEY=$ACCESS_TOKEN" >> $GITHUB_ENV
 
+      - name: Save VertexAI service account key to file
+        if: matrix.environment == 'vertexai'
+        env:
+          GOOGLE_SA_KEY: ${{ secrets.GOOGLE_SA_KEY }}
+        run: |
+          echo "Setting up Google Cloud service account credentials..."
+
+          if [ -z "$GOOGLE_SA_KEY" ]; then
+            echo "❌ GOOGLE_SA_KEY is not set. Please configure the secret in GitHub repository settings."
+            exit 1
+          fi
+
+          GCP_KEYS_PATH=./tmp/.gcp-keys
+          echo "GCP_KEYS_PATH=$GCP_KEYS_PATH" >> $GITHUB_ENV
+
+          mkdir -p $GCP_KEYS_PATH
+
+          echo "Writing service account key to file..."
+
+          # Decode from base64, needed because GH changes the key if using the raw key
+          printf '%s' "$GOOGLE_SA_KEY" | base64 -d > $GCP_KEYS_PATH/gcp-key.json
+
+          # Verify the file was created and is valid JSON
+          if [ ! -f "$GCP_KEYS_PATH/gcp-key.json" ]; then
+            echo "❌ Failed to create gcp-key.json file"
+            exit 1
+          fi
+
+          if ! jq empty "$GCP_KEYS_PATH/gcp-key.json" 2>/dev/null; then
+            echo "❌ gcp-key.json is not valid JSON"
+            exit 1
+          fi
+          echo "✅ gcp-key.json is valid JSON"
+
+          # Set proper permissions (readable by all, needed for container user 1001)
+          chmod 644 $GCP_KEYS_PATH/gcp-key.json
+
+          echo "GOOGLE_APPLICATION_CREDENTIALS=/opt/app-root/.gcp-keys/gcp-key.json" >> $GITHUB_ENV
+
       - name: Select and configure run.yaml
         env:
           CONFIG_ENVIRONMENT: ${{ matrix.environment || 'ci' }}
@@ -146,19 +184,30 @@ jobs:
         run: |
           echo $QUAY_ROBOT_TOKEN | docker login quay.io -u=$QUAY_ROBOT_USERNAME --password-stdin
 
+      - name: Create dummy GCP keys directory
+        if: matrix.environment != 'vertexai'
+        run: |
+          echo "Creating dummy GCP keys directory for non-VertexAI environment..."
+          mkdir -p ./tmp/.gcp-keys-dummy
+          echo "✅ Dummy directory created."
+
       - name: Run services (Server Mode)
         if: matrix.mode == 'server'
-        env: 
+        env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
           AZURE_API_KEY: ${{ env.AZURE_API_KEY }}
+          VERTEX_AI_LOCATION: ${{ secrets.VERTEX_AI_LOCATION }}
+          VERTEX_AI_PROJECT: ${{ secrets.VERTEX_AI_PROJECT }}
+          GOOGLE_APPLICATION_CREDENTIALS: ${{ env.GOOGLE_APPLICATION_CREDENTIALS }}
+          GCP_KEYS_PATH: ${{ env.GCP_KEYS_PATH }}
         run: |
           # Debug: Check if environment variable is available for docker-compose
           echo "OPENAI_API_KEY is set: $([ -n "$OPENAI_API_KEY" ] && echo 'YES' || echo 'NO')"
           echo "OPENAI_API_KEY length: ${#OPENAI_API_KEY}"
-          
+
           docker compose version
           docker compose up -d
-          
+
           # Check for errors and show logs if any services failed
           if docker compose ps | grep -E 'Exit|exited|stopped'; then
             echo "Some services failed to start - showing logs:"
@@ -173,10 +222,14 @@ jobs:
         env: 
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
           AZURE_API_KEY: ${{ env.AZURE_API_KEY }}
+          VERTEX_AI_LOCATION: ${{ secrets.VERTEX_AI_LOCATION }}
+          VERTEX_AI_PROJECT: ${{ secrets.VERTEX_AI_PROJECT }}
+          GOOGLE_APPLICATION_CREDENTIALS: ${{ env.GOOGLE_APPLICATION_CREDENTIALS }}
+          GCP_KEYS_PATH: ${{ env.GCP_KEYS_PATH }}
         run: |
           echo "Starting service in library mode (1 container)"
           docker compose -f docker-compose-library.yaml up -d
-          
+
           if docker compose -f docker-compose-library.yaml ps | grep -E 'Exit|exited|stopped'; then
             echo "Service failed to start - showing logs:"
             docker compose -f docker-compose-library.yaml logs
 
@@ -121,6 +121,7 @@ Lightspeed Core Stack is based on the FastAPI framework (Uvicorn). The service i
   |----------------|-----------------------------------------------------------------------|
   | OpenAI         | https://platform.openai.com                                           |
   | Azure OpenAI   | https://azure.microsoft.com/en-us/products/ai-services/openai-service |
+  | Google VertexAI| https://cloud.google.com/vertex-ai |
   | RHOAI (vLLM)   | See tests/e2e-prow/rhoai/configs/run.yaml                             |
   | RHEL AI (vLLM) | See tests/e2e/configs/run-rhelai.yaml                                 |
 
@@ -175,6 +176,9 @@ __Note__: Support for individual models is dependent on the specific inference p
 | RHEL AI (vLLM)| meta-llama/Llama-3.1-8B-Instruct           | Yes          | remote::vllm   | [1](tests/e2e/configs/run-rhelai.yaml)                                     |
 | Azure    | gpt-5, gpt-5-mini, gpt-5-nano, gpt-5-chat, gpt-4.1, gpt-4.1-mini, gpt-4.1-nano, o3-mini, o4-mini | Yes          | remote::azure  | [1](examples/azure-run.yaml)                                               |
 | Azure    |  o1, o1-mini | No          | remote::azure  |  |
+| VertexAI    | google/gemini-2.0-flash, google/gemini-2.5-flash, google/gemini-2.5-pro [^1] | Yes          | remote::vertexai  | [1](examples/vertexai-run.yaml)                                               |
+
+[^1]: List of models is limited by design in llama-stack, future versions will probably allow to use more models (see [here](https://github.com/llamastack/llama-stack/blob/release-0.3.x/llama_stack/providers/remote/inference/vertexai/vertexai.py#L54))
 
 The "provider_type" is used in the llama stack configuration file when refering to the provider.
 
 
@@ -12,20 +12,28 @@ services:
       # Mount both config files - lightspeed-stack.yaml should have library mode enabled
       - ./lightspeed-stack.yaml:/app-root/lightspeed-stack.yaml:Z
       - ./run.yaml:/app-root/run.yaml:Z
+      - ${GCP_KEYS_PATH:-./tmp/.gcp-keys-dummy}:/opt/app-root/.gcp-keys:ro
     environment:
-      # LLM Provider API Keys
+      - BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY:-}
+      - TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY:-}
+      # OpenAI
       - OPENAI_API_KEY=${OPENAI_API_KEY}
       - E2E_OPENAI_MODEL=${E2E_OPENAI_MODEL:-gpt-4-turbo}
+      # Azure
       - AZURE_API_KEY=${AZURE_API_KEY:-}
-      - BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY:-}
-      - TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY:-}
+      # RHAIIS
       - RHAIIS_URL=${RHAIIS_URL:-}
       - RHAIIS_API_KEY=${RHAIIS_API_KEY:-}
       - RHAIIS_MODEL=${RHAIIS_MODEL:-}
+      # RHEL AI
       - RHEL_AI_URL=${RHEL_AI_URL:-}
       - RHEL_AI_PORT=${RHEL_AI_PORT:-}
       - RHEL_AI_API_KEY=${RHEL_AI_API_KEY:-}
       - RHEL_AI_MODEL=${RHEL_AI_MODEL:-}
+      # VertexAI
+      - GOOGLE_APPLICATION_CREDENTIALS=${GOOGLE_APPLICATION_CREDENTIALS:-}
+      - VERTEX_AI_PROJECT=${VERTEX_AI_PROJECT:-}
+      - VERTEX_AI_LOCATION=${VERTEX_AI_LOCATION:-}
       # Enable debug logging if needed
       - LLAMA_STACK_LOGGING=${LLAMA_STACK_LOGGING:-}
     healthcheck:
 
@@ -10,22 +10,30 @@ services:
       - "8321:8321"  # Expose llama-stack on 8321 (adjust if needed)
     volumes:
       - ./run.yaml:/opt/app-root/run.yaml:Z
+      - ${GCP_KEYS_PATH:-./tmp/.gcp-keys-dummy}:/opt/app-root/.gcp-keys:ro
     environment:
+      - BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY:-}
+      - TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY:-}
+      # OpenAI
       - OPENAI_API_KEY=${OPENAI_API_KEY}
       - E2E_OPENAI_MODEL=${E2E_OPENAI_MODEL}
+      # Azure
       - AZURE_API_KEY=${AZURE_API_KEY}
-      - BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY:-}
-      - TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY:-}
+      # RHAIIS
       - RHAIIS_URL=${RHAIIS_URL}
       - RHAIIS_API_KEY=${RHAIIS_API_KEY}
       - RHAIIS_MODEL=${RHAIIS_MODEL}
+      # RHEL AI
       - RHEL_AI_URL=${RHEL_AI_URL}
       - RHEL_AI_PORT=${RHEL_AI_PORT}
       - RHEL_AI_API_KEY=${RHEL_AI_API_KEY}
       - RHEL_AI_MODEL=${RHEL_AI_MODEL}
+      # VertexAI
+      - GOOGLE_APPLICATION_CREDENTIALS=${GOOGLE_APPLICATION_CREDENTIALS:-}
+      - VERTEX_AI_PROJECT=${VERTEX_AI_PROJECT:-}
+      - VERTEX_AI_LOCATION=${VERTEX_AI_LOCATION:-}
       # Enable debug logging if needed
       - LLAMA_STACK_LOGGING=${LLAMA_STACK_LOGGING:-}
-
     networks:
       - lightspeednet
     healthcheck:
 
@@ -1116,6 +1116,11 @@ <h2 id="quotalimiterconfiguration">QuotaLimiterConfiguration</h2>
     <h2 id="quotaschedulerconfiguration">QuotaSchedulerConfiguration</h2>
     <p>Quota scheduler configuration.</p>
     <table>
+      <colgroup>
+        <col style="width: 26%"/>
+        <col style="width: 23%"/>
+        <col style="width: 50%"/>
+      </colgroup>
       <thead>
         <tr class="header">
           <th>Field</th>
@@ -1129,6 +1134,20 @@ <h2 id="quotaschedulerconfiguration">QuotaSchedulerConfiguration</h2>
           <td>integer</td>
           <td>Quota scheduler period specified in seconds</td>
         </tr>
+        <tr class="even">
+          <td>database_reconnection_count</td>
+          <td>integer</td>
+          <td>Database reconnection count on startup. When database for quota is
+not available on startup, the service tries to reconnect N times with
+specified delay.</td>
+        </tr>
+        <tr class="odd">
+          <td>database_reconnection_delay</td>
+          <td>integer</td>
+          <td>Database reconnection delay specified in seconds. When database for
+quota is not available on startup, the service tries to reconnect N
+times with specified delay.</td>
+        </tr>
       </tbody>
     </table>
     <h2 id="rhidentityconfiguration">RHIdentityConfiguration</h2>
 
@@ -6,6 +6,27 @@
   },
   "components": {
     "schemas": {
+      "APIKeyTokenConfiguration": {
+        "additionalProperties": false,
+        "description": "API Key Token configuration.",
+        "properties": {
+          "api_key": {
+            "examples": [
+              "some-api-key"
+            ],
+            "format": "password",
+            "minLength": 1,
+            "title": "API key",
+            "type": "string",
+            "writeOnly": true
+          }
+        },
+        "required": [
+          "api_key"
+        ],
+        "title": "APIKeyTokenConfiguration",
+        "type": "object"
+      },
       "AccessRule": {
         "additionalProperties": false,
         "description": "Rule defining what actions a role can perform.",
@@ -56,7 +77,8 @@
           "get_metrics",
           "get_config",
           "info",
-          "model_override"
+          "model_override",
+          "rlsapi_v1_infer"
         ],
         "title": "Action",
         "type": "string"
@@ -98,6 +120,17 @@
             ],
             "default": null
           },
+          "api_key_config": {
+            "anyOf": [
+              {
+                "$ref": "#/components/schemas/APIKeyTokenConfiguration"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "default": null
+          },
           "rh_identity_config": {
             "anyOf": [
               {
@@ -663,7 +696,7 @@
       },
       "ModelContextProtocolServer": {
         "additionalProperties": false,
-        "description": "Model context protocol server configuration.\n\nMCP (Model Context Protocol) servers provide tools and\ncapabilities to the AI agents. These are configured by this structure.\nOnly MCP servers defined in the lightspeed-stack.yaml configuration are\navailable to the agents. Tools configured in the llama-stack run.yaml\nare not accessible to lightspeed-core agents.\n\nUseful resources:\n\n- [Model Context Protocol](https://modelcontextprotocol.io/docs/getting-started/intro)\n- [MCP FAQs](https://modelcontextprotocol.io/faqs)\n- [Wikipedia article](https://en.wikipedia.org/wiki/Model_Context_Protocol)",
+        "description": "Model context protocol server configuration.\n\nMCP (Model Context Protocol) servers provide tools and capabilities to the\nAI agents. These are configured by this structure. Only MCP servers\ndefined in the lightspeed-stack.yaml configuration are available to the\nagents. Tools configured in the llama-stack run.yaml are not accessible to\nlightspeed-core agents.\n\nUseful resources:\n\n- [Model Context Protocol](https://modelcontextprotocol.io/docs/getting-started/intro)\n- [MCP FAQs](https://modelcontextprotocol.io/faqs)\n- [Wikipedia article](https://en.wikipedia.org/wiki/Model_Context_Protocol)",
         "properties": {
           "name": {
             "description": "MCP server name that must be unique",
@@ -691,7 +724,7 @@
       },
       "PostgreSQLDatabaseConfiguration": {
         "additionalProperties": false,
-        "description": "PostgreSQL database configuration.\n\nPostgreSQL database is used by Lightspeed Core Stack service for storing information about\nconversation IDs. It can also be leveraged to store conversation history and information\nabout quota usage.\n\nUseful resources:\n\n- [Psycopg: connection classes](https://www.psycopg.org/psycopg3/docs/api/connections.html)\n- [PostgreSQL connection strings](https://www.connectionstrings.com/postgresql/)\n- [How to Use PostgreSQL in Python](https://www.freecodecamp.org/news/postgresql-in-python/)",
+        "description": "PostgreSQL database configuration.\n\nPostgreSQL database is used by Lightspeed Core Stack service for storing\ninformation about conversation IDs. It can also be leveraged to store\nconversation history and information about quota usage.\n\nUseful resources:\n\n- [Psycopg: connection classes](https://www.psycopg.org/psycopg3/docs/api/connections.html)\n- [PostgreSQL connection strings](https://www.connectionstrings.com/postgresql/)\n- [How to Use PostgreSQL in Python](https://www.freecodecamp.org/news/postgresql-in-python/)",
         "properties": {
           "host": {
             "default": "localhost",
@@ -905,7 +938,7 @@
       },
       "ServiceConfiguration": {
         "additionalProperties": false,
-        "description": "Service configuration.\n\nLightspeed Core Stack is a REST API service that accepts requests\non a specified hostname and port. It is also possible to enable\nauthentication and specify the number of Uvicorn workers. When more\nworkers are specified, the service can handle requests concurrently.",
+        "description": "Service configuration.\n\nLightspeed Core Stack is a REST API service that accepts requests on a\nspecified hostname and port. It is also possible to enable authentication\nand specify the number of Uvicorn workers. When more workers are specified,\nthe service can handle requests concurrently.",
         "properties": {
           "host": {
             "default": "localhost",
 
@@ -420,6 +420,8 @@ Quota scheduler configuration.
 | Field | Type | Description |
 |-------|------|-------------|
 | period | integer | Quota scheduler period specified in seconds |
+| database_reconnection_count | integer | Database reconnection count on startup. When database for quota is not available on startup, the service tries to reconnect N times with specified delay. |
+| database_reconnection_delay | integer | Database reconnection delay specified in seconds. When database for quota is not available on startup, the service tries to reconnect N times with specified delay. |
 
 
 ## RHIdentityConfiguration
 
@@ -157,6 +157,8 @@ class "QuotaLimiterConfiguration" as src.models.config.QuotaLimiterConfiguration
   type : Literal['user_limiter', 'cluster_limiter']
 }
 class "QuotaSchedulerConfiguration" as src.models.config.QuotaSchedulerConfiguration {
+  database_reconnection_count
+  database_reconnection_delay
   period
 }
 class "RHIdentityConfiguration" as src.models.config.RHIdentityConfiguration {
Original file line number	Diff line number	Diff line change
`@@ -157,6 +157,8 @@ class "QuotaLimiterConfiguration" as src.models.config.QuotaLimiterConfiguration`
`157`	`157`	`type : Literal['user_limiter', 'cluster_limiter']`
`158`	`158`	`}`
`159`	`159`	`class "QuotaSchedulerConfiguration" as src.models.config.QuotaSchedulerConfiguration {`
	`160`	`+ database_reconnection_count`
	`161`	`+ database_reconnection_delay`
`160`	`162`	`period`
`161`	`163`	`}`
`162`	`164`	`class "RHIdentityConfiguration" as src.models.config.RHIdentityConfiguration {`