Skip to content

Commit 17481f2

Browse files
are-cesclaude
andcommitted
LCORE-1422: Add streaming responses test, prow config, and cleanup
- Add streaming Responses API inline RAG e2e scenario (stream: true) - Copy inline RAG config to e2e-prow/rhoai/configs for prow compatibility - Extract restart into explicit "The service is restarted" Gherkin step - Replace custom _resolve_env_var with llama_stack replace_env_vars Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent a9b4d2a commit 17481f2

4 files changed

Lines changed: 62 additions & 30 deletions

File tree

src/llama_stack_configuration.py

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
"""
77

88
import os
9-
import re
109
from argparse import ArgumentParser
1110
from pathlib import Path
1211
from typing import Any, Optional
@@ -156,24 +155,6 @@ def construct_storage_backends_section(
156155
return output
157156

158157

159-
def _resolve_env_var(value: str) -> str:
160-
"""Resolve ``${env.VAR}`` and ``${env.VAR:=default}`` patterns.
161-
162-
Parameters:
163-
value: A string that may contain an environment-variable reference.
164-
165-
Returns:
166-
The resolved value if a matching env var is set, the default if one is
167-
provided, or the original string unchanged.
168-
"""
169-
match = re.match(r"^\$\{env\.(\w+)(?::=([^}]*))?\}$", value)
170-
if match:
171-
var_name = match.group(1)
172-
default = match.group(2)
173-
return os.environ.get(var_name, default if default is not None else value)
174-
return value
175-
176-
177158
def construct_vector_stores_section(
178159
ls_config: dict[str, Any], byok_rag: list[dict[str, Any]]
179160
) -> list[dict[str, Any]]:
@@ -206,7 +187,7 @@ def construct_vector_stores_section(
206187
# Resolve ${env.VAR} patterns so comparisons work when existing entries
207188
# use environment variable references and new entries have resolved values.
208189
existing_store_ids = {
209-
_resolve_env_var(vs.get("vector_store_id", "")) for vs in output
190+
replace_env_vars(vs.get("vector_store_id", "")) for vs in output
210191
}
211192
added = 0
212193
for brag in byok_rag:
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
name: Lightspeed Core Service (LCS)
2+
service:
3+
host: 0.0.0.0
4+
port: 8080
5+
auth_enabled: false
6+
workers: 1
7+
color_log: true
8+
access_log: true
9+
llama_stack:
10+
use_as_library_client: false
11+
url: http://${env.E2E_LLAMA_HOSTNAME}:8321
12+
api_key: xyzzy
13+
user_data_collection:
14+
feedback_enabled: true
15+
feedback_storage: "/tmp/data/feedback"
16+
transcripts_enabled: true
17+
transcripts_storage: "/tmp/data/transcripts"
18+
19+
conversation_cache:
20+
type: "sqlite"
21+
sqlite:
22+
db_path: "/tmp/data/conversation-cache.db"
23+
24+
authentication:
25+
module: "noop"
26+
27+
byok_rag:
28+
- rag_id: e2e-test-docs
29+
rag_type: inline::faiss
30+
embedding_model: sentence-transformers/all-mpnet-base-v2
31+
embedding_dimension: 768
32+
vector_db_id: ${env.FAISS_VECTOR_STORE_ID}
33+
db_path: ${env.KV_RAG_PATH:=~/.llama/storage/rag/kv_store.db}
34+
score_multiplier: 1.0
35+
36+
rag:
37+
inline:
38+
- e2e-test-docs

tests/e2e/features/inline_rag.feature

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,13 @@ Feature: Inline RAG (BYOK) support tests
22

33
Background:
44
Given The service is started locally
5+
And The system is in default state
6+
And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
57
And REST API service prefix is /v1
68
And The service uses the lightspeed-stack-inline-rag.yaml configuration
9+
And The service is restarted
710

811
Scenario: Check if inline RAG source is registered
9-
Given The system is in default state
10-
And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
1112
When I access REST API endpoint rags using HTTP GET method
1213
Then The status code of the response is 200
1314
And the body of the response has the following structure
@@ -20,8 +21,6 @@ Feature: Inline RAG (BYOK) support tests
2021
"""
2122

2223
Scenario: Query with inline RAG returns relevant content
23-
Given The system is in default state
24-
And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
2524
When I use "query" to ask question with authorization header
2625
"""
2726
{"query": "What is the title of the article from Paul?", "system_prompt": "You are an assistant. Write only lowercase letters"}
@@ -33,8 +32,6 @@ Feature: Inline RAG (BYOK) support tests
3332
And The response should contain non-empty rag_chunks
3433

3534
Scenario: Inline RAG query includes referenced documents
36-
Given The system is in default state
37-
And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
3835
When I use "query" to ask question with authorization header
3936
"""
4037
{"query": "What does Paul Graham say about great work?"}
@@ -43,8 +40,6 @@ Feature: Inline RAG (BYOK) support tests
4340
And The response should contain non-empty referenced_documents
4441

4542
Scenario: Streaming query with inline RAG returns relevant content
46-
Given The system is in default state
47-
And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
4843
When I use "streaming_query" to ask question with authorization header
4944
"""
5045
{"query": "What is the title of the article from Paul?", "system_prompt": "You are an assistant. Write only lowercase letters"}
@@ -56,8 +51,6 @@ Feature: Inline RAG (BYOK) support tests
5651
| great work |
5752

5853
Scenario: Responses API with inline RAG returns relevant content
59-
Given The system is in default state
60-
And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
6154
When I use "responses" to ask question with authorization header
6255
"""
6356
{"input": "What is the title of the article from Paul?", "model": "{PROVIDER}/{MODEL}", "stream": false, "instructions": "You are an assistant. Write only lowercase letters"}
@@ -66,3 +59,14 @@ Feature: Inline RAG (BYOK) support tests
6659
And The response should contain following fragments
6760
| Fragments in LLM response |
6861
| great work |
62+
63+
Scenario: Streaming Responses API with inline RAG returns relevant content
64+
When I use "responses" to ask question with authorization header
65+
"""
66+
{"input": "What is the title of the article from Paul?", "model": "{PROVIDER}/{MODEL}", "stream": true, "instructions": "You are an assistant. Write only lowercase letters"}
67+
"""
68+
Then The status code of the response is 200
69+
And I wait for the response to be completed
70+
And The streamed response should contain following fragments
71+
| Fragments in LLM response |
72+
| great work |

tests/e2e/features/steps/common.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,15 @@ def configure_service(context: Context, config_name: str) -> None:
5454
config_path = f"tests/e2e/configuration/{mode_dir}/{config_name}"
5555
create_config_backup("lightspeed-stack.yaml")
5656
switch_config(config_path)
57+
58+
59+
@given("The service is restarted")
60+
def restart_service(context: Context) -> None:
61+
"""Restart the lightspeed-stack container and wait for it to be healthy.
62+
63+
Parameters:
64+
context (Context): Behave context.
65+
"""
5766
restart_container("lightspeed-stack")
5867
# Library mode needs extra time to load embedding models after restart
5968
wait_for_container_health("lightspeed-stack", max_attempts=12)

0 commit comments

Comments
 (0)