Skip to content

Commit 183e135

Browse files
authored
Merge branch 'lightspeed-core:main' into update-provider-configs-clean
2 parents f192004 + bace7c4 commit 183e135

29 files changed

Lines changed: 1058 additions & 220 deletions

.github/workflows/e2e_tests.yaml

Lines changed: 60 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ jobs:
1010
fail-fast: false
1111
matrix:
1212
mode: ["server", "library"]
13-
environment: ["ci", "azure"]
13+
environment: ["ci", "azure", "vertexai"]
1414

1515
name: "E2E: ${{ matrix.mode }} mode / ${{ matrix.environment }}"
1616

@@ -52,8 +52,7 @@ jobs:
5252
- name: Load lightspeed-stack.yaml configuration
5353
run: |
5454
MODE="${{ matrix.mode }}"
55-
CONFIG_FILE="tests/e2e/configuration/lightspeed-stack-${MODE}-mode.yaml"
56-
55+
CONFIG_FILE="tests/e2e/configuration/${MODE}-mode/lightspeed-stack.yaml"
5756
echo "Loading configuration for ${MODE} mode"
5857
echo "Source: ${CONFIG_FILE}"
5958
@@ -91,6 +90,45 @@ jobs:
9190
echo "✅ Successfully obtained Azure access token."
9291
echo "AZURE_API_KEY=$ACCESS_TOKEN" >> $GITHUB_ENV
9392
93+
- name: Save VertexAI service account key to file
94+
if: matrix.environment == 'vertexai'
95+
env:
96+
GOOGLE_SA_KEY: ${{ secrets.GOOGLE_SA_KEY }}
97+
run: |
98+
echo "Setting up Google Cloud service account credentials..."
99+
100+
if [ -z "$GOOGLE_SA_KEY" ]; then
101+
echo "❌ GOOGLE_SA_KEY is not set. Please configure the secret in GitHub repository settings."
102+
exit 1
103+
fi
104+
105+
GCP_KEYS_PATH=./tmp/.gcp-keys
106+
echo "GCP_KEYS_PATH=$GCP_KEYS_PATH" >> $GITHUB_ENV
107+
108+
mkdir -p $GCP_KEYS_PATH
109+
110+
echo "Writing service account key to file..."
111+
112+
# Decode from base64, needed because GH changes the key if using the raw key
113+
printf '%s' "$GOOGLE_SA_KEY" | base64 -d > $GCP_KEYS_PATH/gcp-key.json
114+
115+
# Verify the file was created and is valid JSON
116+
if [ ! -f "$GCP_KEYS_PATH/gcp-key.json" ]; then
117+
echo "❌ Failed to create gcp-key.json file"
118+
exit 1
119+
fi
120+
121+
if ! jq empty "$GCP_KEYS_PATH/gcp-key.json" 2>/dev/null; then
122+
echo "❌ gcp-key.json is not valid JSON"
123+
exit 1
124+
fi
125+
echo "✅ gcp-key.json is valid JSON"
126+
127+
# Set proper permissions (readable by all, needed for container user 1001)
128+
chmod 644 $GCP_KEYS_PATH/gcp-key.json
129+
130+
echo "GOOGLE_APPLICATION_CREDENTIALS=/opt/app-root/.gcp-keys/gcp-key.json" >> $GITHUB_ENV
131+
94132
- name: Select and configure run.yaml
95133
env:
96134
CONFIG_ENVIRONMENT: ${{ matrix.environment || 'ci' }}
@@ -146,19 +184,30 @@ jobs:
146184
run: |
147185
echo $QUAY_ROBOT_TOKEN | docker login quay.io -u=$QUAY_ROBOT_USERNAME --password-stdin
148186
187+
- name: Create dummy GCP keys directory
188+
if: matrix.environment != 'vertexai'
189+
run: |
190+
echo "Creating dummy GCP keys directory for non-VertexAI environment..."
191+
mkdir -p ./tmp/.gcp-keys-dummy
192+
echo "✅ Dummy directory created."
193+
149194
- name: Run services (Server Mode)
150195
if: matrix.mode == 'server'
151-
env:
196+
env:
152197
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
153198
AZURE_API_KEY: ${{ env.AZURE_API_KEY }}
199+
VERTEX_AI_LOCATION: ${{ secrets.VERTEX_AI_LOCATION }}
200+
VERTEX_AI_PROJECT: ${{ secrets.VERTEX_AI_PROJECT }}
201+
GOOGLE_APPLICATION_CREDENTIALS: ${{ env.GOOGLE_APPLICATION_CREDENTIALS }}
202+
GCP_KEYS_PATH: ${{ env.GCP_KEYS_PATH }}
154203
run: |
155204
# Debug: Check if environment variable is available for docker-compose
156205
echo "OPENAI_API_KEY is set: $([ -n "$OPENAI_API_KEY" ] && echo 'YES' || echo 'NO')"
157206
echo "OPENAI_API_KEY length: ${#OPENAI_API_KEY}"
158-
207+
159208
docker compose version
160209
docker compose up -d
161-
210+
162211
# Check for errors and show logs if any services failed
163212
if docker compose ps | grep -E 'Exit|exited|stopped'; then
164213
echo "Some services failed to start - showing logs:"
@@ -173,10 +222,14 @@ jobs:
173222
env:
174223
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
175224
AZURE_API_KEY: ${{ env.AZURE_API_KEY }}
225+
VERTEX_AI_LOCATION: ${{ secrets.VERTEX_AI_LOCATION }}
226+
VERTEX_AI_PROJECT: ${{ secrets.VERTEX_AI_PROJECT }}
227+
GOOGLE_APPLICATION_CREDENTIALS: ${{ env.GOOGLE_APPLICATION_CREDENTIALS }}
228+
GCP_KEYS_PATH: ${{ env.GCP_KEYS_PATH }}
176229
run: |
177230
echo "Starting service in library mode (1 container)"
178231
docker compose -f docker-compose-library.yaml up -d
179-
232+
180233
if docker compose -f docker-compose-library.yaml ps | grep -E 'Exit|exited|stopped'; then
181234
echo "Service failed to start - showing logs:"
182235
docker compose -f docker-compose-library.yaml logs

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ Lightspeed Core Stack is based on the FastAPI framework (Uvicorn). The service i
121121
|----------------|-----------------------------------------------------------------------|
122122
| OpenAI | https://platform.openai.com |
123123
| Azure OpenAI | https://azure.microsoft.com/en-us/products/ai-services/openai-service |
124+
| Google VertexAI| https://cloud.google.com/vertex-ai |
124125
| RHOAI (vLLM) | See tests/e2e-prow/rhoai/configs/run.yaml |
125126
| RHEL AI (vLLM) | See tests/e2e/configs/run-rhelai.yaml |
126127

@@ -175,6 +176,9 @@ __Note__: Support for individual models is dependent on the specific inference p
175176
| RHEL AI (vLLM)| meta-llama/Llama-3.1-8B-Instruct | Yes | remote::vllm | [1](tests/e2e/configs/run-rhelai.yaml) |
176177
| Azure | gpt-5, gpt-5-mini, gpt-5-nano, gpt-5-chat, gpt-4.1, gpt-4.1-mini, gpt-4.1-nano, o3-mini, o4-mini | Yes | remote::azure | [1](examples/azure-run.yaml) |
177178
| Azure | o1, o1-mini | No | remote::azure | |
179+
| VertexAI | google/gemini-2.0-flash, google/gemini-2.5-flash, google/gemini-2.5-pro [^1] | Yes | remote::vertexai | [1](examples/vertexai-run.yaml) |
180+
181+
[^1]: List of models is limited by design in llama-stack, future versions will probably allow to use more models (see [here](https://github.com/llamastack/llama-stack/blob/release-0.3.x/llama_stack/providers/remote/inference/vertexai/vertexai.py#L54))
178182

179183
The "provider_type" is used in the llama stack configuration file when refering to the provider.
180184

docker-compose-library.yaml

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,20 +12,28 @@ services:
1212
# Mount both config files - lightspeed-stack.yaml should have library mode enabled
1313
- ./lightspeed-stack.yaml:/app-root/lightspeed-stack.yaml:Z
1414
- ./run.yaml:/app-root/run.yaml:Z
15+
- ${GCP_KEYS_PATH:-./tmp/.gcp-keys-dummy}:/opt/app-root/.gcp-keys:ro
1516
environment:
16-
# LLM Provider API Keys
17+
- BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY:-}
18+
- TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY:-}
19+
# OpenAI
1720
- OPENAI_API_KEY=${OPENAI_API_KEY}
1821
- E2E_OPENAI_MODEL=${E2E_OPENAI_MODEL:-gpt-4-turbo}
22+
# Azure
1923
- AZURE_API_KEY=${AZURE_API_KEY:-}
20-
- BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY:-}
21-
- TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY:-}
24+
# RHAIIS
2225
- RHAIIS_URL=${RHAIIS_URL:-}
2326
- RHAIIS_API_KEY=${RHAIIS_API_KEY:-}
2427
- RHAIIS_MODEL=${RHAIIS_MODEL:-}
28+
# RHEL AI
2529
- RHEL_AI_URL=${RHEL_AI_URL:-}
2630
- RHEL_AI_PORT=${RHEL_AI_PORT:-}
2731
- RHEL_AI_API_KEY=${RHEL_AI_API_KEY:-}
2832
- RHEL_AI_MODEL=${RHEL_AI_MODEL:-}
33+
# VertexAI
34+
- GOOGLE_APPLICATION_CREDENTIALS=${GOOGLE_APPLICATION_CREDENTIALS:-}
35+
- VERTEX_AI_PROJECT=${VERTEX_AI_PROJECT:-}
36+
- VERTEX_AI_LOCATION=${VERTEX_AI_LOCATION:-}
2937
# Enable debug logging if needed
3038
- LLAMA_STACK_LOGGING=${LLAMA_STACK_LOGGING:-}
3139
healthcheck:

docker-compose.yaml

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,22 +10,30 @@ services:
1010
- "8321:8321" # Expose llama-stack on 8321 (adjust if needed)
1111
volumes:
1212
- ./run.yaml:/opt/app-root/run.yaml:Z
13+
- ${GCP_KEYS_PATH:-./tmp/.gcp-keys-dummy}:/opt/app-root/.gcp-keys:ro
1314
environment:
15+
- BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY:-}
16+
- TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY:-}
17+
# OpenAI
1418
- OPENAI_API_KEY=${OPENAI_API_KEY}
1519
- E2E_OPENAI_MODEL=${E2E_OPENAI_MODEL}
20+
# Azure
1621
- AZURE_API_KEY=${AZURE_API_KEY}
17-
- BRAVE_SEARCH_API_KEY=${BRAVE_SEARCH_API_KEY:-}
18-
- TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY:-}
22+
# RHAIIS
1923
- RHAIIS_URL=${RHAIIS_URL}
2024
- RHAIIS_API_KEY=${RHAIIS_API_KEY}
2125
- RHAIIS_MODEL=${RHAIIS_MODEL}
26+
# RHEL AI
2227
- RHEL_AI_URL=${RHEL_AI_URL}
2328
- RHEL_AI_PORT=${RHEL_AI_PORT}
2429
- RHEL_AI_API_KEY=${RHEL_AI_API_KEY}
2530
- RHEL_AI_MODEL=${RHEL_AI_MODEL}
31+
# VertexAI
32+
- GOOGLE_APPLICATION_CREDENTIALS=${GOOGLE_APPLICATION_CREDENTIALS:-}
33+
- VERTEX_AI_PROJECT=${VERTEX_AI_PROJECT:-}
34+
- VERTEX_AI_LOCATION=${VERTEX_AI_LOCATION:-}
2635
# Enable debug logging if needed
2736
- LLAMA_STACK_LOGGING=${LLAMA_STACK_LOGGING:-}
28-
2937
networks:
3038
- lightspeednet
3139
healthcheck:

docs/config.html

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1116,6 +1116,11 @@ <h2 id="quotalimiterconfiguration">QuotaLimiterConfiguration</h2>
11161116
<h2 id="quotaschedulerconfiguration">QuotaSchedulerConfiguration</h2>
11171117
<p>Quota scheduler configuration.</p>
11181118
<table>
1119+
<colgroup>
1120+
<col style="width: 26%"/>
1121+
<col style="width: 23%"/>
1122+
<col style="width: 50%"/>
1123+
</colgroup>
11191124
<thead>
11201125
<tr class="header">
11211126
<th>Field</th>
@@ -1129,6 +1134,20 @@ <h2 id="quotaschedulerconfiguration">QuotaSchedulerConfiguration</h2>
11291134
<td>integer</td>
11301135
<td>Quota scheduler period specified in seconds</td>
11311136
</tr>
1137+
<tr class="even">
1138+
<td>database_reconnection_count</td>
1139+
<td>integer</td>
1140+
<td>Database reconnection count on startup. When database for quota is
1141+
not available on startup, the service tries to reconnect N times with
1142+
specified delay.</td>
1143+
</tr>
1144+
<tr class="odd">
1145+
<td>database_reconnection_delay</td>
1146+
<td>integer</td>
1147+
<td>Database reconnection delay specified in seconds. When database for
1148+
quota is not available on startup, the service tries to reconnect N
1149+
times with specified delay.</td>
1150+
</tr>
11321151
</tbody>
11331152
</table>
11341153
<h2 id="rhidentityconfiguration">RHIdentityConfiguration</h2>

docs/config.json

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,27 @@
66
},
77
"components": {
88
"schemas": {
9+
"APIKeyTokenConfiguration": {
10+
"additionalProperties": false,
11+
"description": "API Key Token configuration.",
12+
"properties": {
13+
"api_key": {
14+
"examples": [
15+
"some-api-key"
16+
],
17+
"format": "password",
18+
"minLength": 1,
19+
"title": "API key",
20+
"type": "string",
21+
"writeOnly": true
22+
}
23+
},
24+
"required": [
25+
"api_key"
26+
],
27+
"title": "APIKeyTokenConfiguration",
28+
"type": "object"
29+
},
930
"AccessRule": {
1031
"additionalProperties": false,
1132
"description": "Rule defining what actions a role can perform.",
@@ -56,7 +77,8 @@
5677
"get_metrics",
5778
"get_config",
5879
"info",
59-
"model_override"
80+
"model_override",
81+
"rlsapi_v1_infer"
6082
],
6183
"title": "Action",
6284
"type": "string"
@@ -98,6 +120,17 @@
98120
],
99121
"default": null
100122
},
123+
"api_key_config": {
124+
"anyOf": [
125+
{
126+
"$ref": "#/components/schemas/APIKeyTokenConfiguration"
127+
},
128+
{
129+
"type": "null"
130+
}
131+
],
132+
"default": null
133+
},
101134
"rh_identity_config": {
102135
"anyOf": [
103136
{
@@ -663,7 +696,7 @@
663696
},
664697
"ModelContextProtocolServer": {
665698
"additionalProperties": false,
666-
"description": "Model context protocol server configuration.\n\nMCP (Model Context Protocol) servers provide tools and\ncapabilities to the AI agents. These are configured by this structure.\nOnly MCP servers defined in the lightspeed-stack.yaml configuration are\navailable to the agents. Tools configured in the llama-stack run.yaml\nare not accessible to lightspeed-core agents.\n\nUseful resources:\n\n- [Model Context Protocol](https://modelcontextprotocol.io/docs/getting-started/intro)\n- [MCP FAQs](https://modelcontextprotocol.io/faqs)\n- [Wikipedia article](https://en.wikipedia.org/wiki/Model_Context_Protocol)",
699+
"description": "Model context protocol server configuration.\n\nMCP (Model Context Protocol) servers provide tools and capabilities to the\nAI agents. These are configured by this structure. Only MCP servers\ndefined in the lightspeed-stack.yaml configuration are available to the\nagents. Tools configured in the llama-stack run.yaml are not accessible to\nlightspeed-core agents.\n\nUseful resources:\n\n- [Model Context Protocol](https://modelcontextprotocol.io/docs/getting-started/intro)\n- [MCP FAQs](https://modelcontextprotocol.io/faqs)\n- [Wikipedia article](https://en.wikipedia.org/wiki/Model_Context_Protocol)",
667700
"properties": {
668701
"name": {
669702
"description": "MCP server name that must be unique",
@@ -691,7 +724,7 @@
691724
},
692725
"PostgreSQLDatabaseConfiguration": {
693726
"additionalProperties": false,
694-
"description": "PostgreSQL database configuration.\n\nPostgreSQL database is used by Lightspeed Core Stack service for storing information about\nconversation IDs. It can also be leveraged to store conversation history and information\nabout quota usage.\n\nUseful resources:\n\n- [Psycopg: connection classes](https://www.psycopg.org/psycopg3/docs/api/connections.html)\n- [PostgreSQL connection strings](https://www.connectionstrings.com/postgresql/)\n- [How to Use PostgreSQL in Python](https://www.freecodecamp.org/news/postgresql-in-python/)",
727+
"description": "PostgreSQL database configuration.\n\nPostgreSQL database is used by Lightspeed Core Stack service for storing\ninformation about conversation IDs. It can also be leveraged to store\nconversation history and information about quota usage.\n\nUseful resources:\n\n- [Psycopg: connection classes](https://www.psycopg.org/psycopg3/docs/api/connections.html)\n- [PostgreSQL connection strings](https://www.connectionstrings.com/postgresql/)\n- [How to Use PostgreSQL in Python](https://www.freecodecamp.org/news/postgresql-in-python/)",
695728
"properties": {
696729
"host": {
697730
"default": "localhost",
@@ -905,7 +938,7 @@
905938
},
906939
"ServiceConfiguration": {
907940
"additionalProperties": false,
908-
"description": "Service configuration.\n\nLightspeed Core Stack is a REST API service that accepts requests\non a specified hostname and port. It is also possible to enable\nauthentication and specify the number of Uvicorn workers. When more\nworkers are specified, the service can handle requests concurrently.",
941+
"description": "Service configuration.\n\nLightspeed Core Stack is a REST API service that accepts requests on a\nspecified hostname and port. It is also possible to enable authentication\nand specify the number of Uvicorn workers. When more workers are specified,\nthe service can handle requests concurrently.",
909942
"properties": {
910943
"host": {
911944
"default": "localhost",

docs/config.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,8 @@ Quota scheduler configuration.
420420
| Field | Type | Description |
421421
|-------|------|-------------|
422422
| period | integer | Quota scheduler period specified in seconds |
423+
| database_reconnection_count | integer | Database reconnection count on startup. When database for quota is not available on startup, the service tries to reconnect N times with specified delay. |
424+
| database_reconnection_delay | integer | Database reconnection delay specified in seconds. When database for quota is not available on startup, the service tries to reconnect N times with specified delay. |
423425

424426

425427
## RHIdentityConfiguration

docs/config.puml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,8 @@ class "QuotaLimiterConfiguration" as src.models.config.QuotaLimiterConfiguration
157157
type : Literal['user_limiter', 'cluster_limiter']
158158
}
159159
class "QuotaSchedulerConfiguration" as src.models.config.QuotaSchedulerConfiguration {
160+
database_reconnection_count
161+
database_reconnection_delay
160162
period
161163
}
162164
class "RHIdentityConfiguration" as src.models.config.RHIdentityConfiguration {

0 commit comments

Comments
 (0)