Skip to content

Commit 6044d4f

Browse files
committed
Use user-facing index names from LCORE config instead of llama-stack IDs
Model changes: - RAGChunk: Change source field to contain the index name from config instead of the llama-stack filename/UUID. Add attributes field (Optional[dict[str, Any]]) to preserve document metadata from the RAG provider (filename, file_id, and all provider-supplied attributes). - ReferencedDocument: Add source field (Optional[str]) containing the index name for consistency with RAGChunk. - ResponseGeneratorContext: Add vector_store_ids and rag_id_mapping fields for passing index identification context through streaming. Configuration: - AppConfig: Add rag_id_mapping property that builds a {vector_db_id: rag_id} mapping from BYOK RAG configuration. Add resolve_index_name helper for mapping vector store IDs to user-facing names with passthrough for unmapped IDs. Extraction logic (query_v2.py): - Add _resolve_source_for_result() to determine index name per result: checks result attributes for vector_store_id, falls back to single queried store, returns None when ambiguous (multiple stores). - Add _build_chunk_attributes() to merge provider attributes with original filename and file_id for debugging. - Update extract_rag_chunks_from_file_search_item(), parse_rag_chunks_from_responses_api(), _build_tool_call_summary(), and parse_referenced_documents_from_responses_api() to accept and forward vector_store_ids and rag_id_mapping parameters. - Update prepare_tools_for_responses_api() to also return the resolved vector_store_ids alongside tool configurations. /v1/rags endpoint: - List endpoint now maps llama-stack vector_store_ids to user-facing rag_ids from BYOK config. Unmapped IDs pass through unchanged. - Detail endpoint accepts both rag_id (from config) and raw vector_store_id, resolving rag_id to vector_db_id for the llama-stack lookup. Response displays the user-facing ID. Streaming support: - Update streaming_query_v2.py retrieve_response to return vector_store_ids and rag_id_mapping. - Update streaming_query.py to unpack and populate ResponseGeneratorContext with index identification data. - Update a2a.py to handle the expanded return tuple. Transcript storage: - create_rag_chunks_dict() now includes the attributes field in serialized RAG chunk dictionaries. Tests: - Add TestResolveSourceForResult: 7 tests covering attribute-based resolution, single store, multi-store, empty inputs, precedence. - Add TestBuildChunkAttributes: 3 tests for attribute merging. - Add TestExtractRagChunksWithIndexMapping: 3 tests for end-to-end chunk extraction with index mapping. - Add TestParseReferencedDocumentsWithSource: 2 tests for referenced document source population. - Add BYOK RAG mapping tests for /v1/rags: list with mapping, detail with rag_id resolution, _resolve_rag_id_to_vector_db_id. - Add configuration tests: rag_id_mapping property, resolve_index_name, error when config not loaded. - Update existing tests for new field defaults and return types.
1 parent cfe7ef3 commit 6044d4f

8 files changed

Lines changed: 319 additions & 7 deletions

File tree

src/app/endpoints/rags.py

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,14 @@ async def rags_endpoint_handler(
8888
rags = await client.vector_stores.list()
8989
logger.info("List of rags: %d", len(rags.data))
9090

91-
# convert into the proper response object
92-
return RAGListResponse(rags=[rag.id for rag in rags.data])
91+
# Map llama-stack vector store IDs to user-facing rag_ids from config
92+
rag_id_mapping = configuration.rag_id_mapping
93+
rag_ids = [
94+
configuration.resolve_index_name(rag.id, rag_id_mapping)
95+
for rag in rags.data
96+
]
97+
98+
return RAGListResponse(rags=rag_ids)
9399

94100
# connection to Llama Stack server
95101
except APIConnectionError as e:
@@ -98,6 +104,30 @@ async def rags_endpoint_handler(
98104
raise HTTPException(**response.model_dump()) from e
99105

100106

107+
def _resolve_rag_id_to_vector_db_id(rag_id: str) -> str:
108+
"""Resolve a user-facing rag_id to the llama-stack vector_db_id.
109+
110+
Checks if the given ID matches a rag_id in the BYOK config and returns
111+
the corresponding vector_db_id. If no match, returns the ID unchanged
112+
(assuming it is already a llama-stack vector store ID).
113+
114+
Parameters:
115+
rag_id: The user-provided RAG identifier.
116+
117+
Returns:
118+
The llama-stack vector_db_id, or the original ID if no mapping found.
119+
"""
120+
try:
121+
byok_rags = configuration.configuration.byok_rag
122+
except (AttributeError, RuntimeError):
123+
return rag_id
124+
125+
for brag in byok_rags:
126+
if brag.rag_id == rag_id:
127+
return brag.vector_db_id
128+
return rag_id
129+
130+
101131
@router.get("/rags/{rag_id}", responses=rag_responses)
102132
@authorize(Action.GET_RAG)
103133
async def get_rag_endpoint_handler(
@@ -107,6 +137,10 @@ async def get_rag_endpoint_handler(
107137
) -> RAGInfoResponse:
108138
"""Retrieve a single RAG by its unique ID.
109139
140+
Accepts both user-facing rag_id (from LCORE config) and llama-stack
141+
vector_store_id. If a rag_id from config is provided, it is resolved
142+
to the underlying vector_store_id for the llama-stack lookup.
143+
110144
Returns:
111145
RAGInfoResponse: A single RAG's details.
112146
@@ -129,13 +163,22 @@ async def get_rag_endpoint_handler(
129163
llama_stack_configuration = configuration.llama_stack_configuration
130164
logger.info("Llama stack config: %s", llama_stack_configuration)
131165

166+
# Resolve user-facing rag_id to llama-stack vector_db_id
167+
vector_db_id = _resolve_rag_id_to_vector_db_id(rag_id)
168+
132169
try:
133170
# try to get Llama Stack client
134171
client = AsyncLlamaStackClientHolder().get_client()
135172
# retrieve info about RAG
136-
rag_info = await client.vector_stores.retrieve(rag_id)
173+
rag_info = await client.vector_stores.retrieve(vector_db_id)
174+
175+
# Return the user-facing ID (rag_id from config if mapped, otherwise as-is)
176+
display_id = configuration.resolve_index_name(
177+
rag_info.id, configuration.rag_id_mapping
178+
)
179+
137180
return RAGInfoResponse(
138-
id=rag_info.id,
181+
id=display_id,
139182
name=rag_info.name,
140183
created_at=rag_info.created_at,
141184
last_active_at=rag_info.last_active_at,

src/configuration.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,5 +371,38 @@ def solr(self) -> Optional[SolrConfiguration]:
371371
raise LogicError("logic error: configuration is not loaded")
372372
return self._configuration.solr
373373

374+
@property
375+
def rag_id_mapping(self) -> dict[str, str]:
376+
"""Return mapping from vector_db_id to rag_id from BYOK RAG config.
377+
378+
Returns:
379+
dict[str, str]: Mapping where keys are llama-stack vector_db_ids
380+
and values are user-facing rag_ids from configuration.
381+
382+
Raises:
383+
LogicError: If the configuration has not been loaded.
384+
"""
385+
if self._configuration is None:
386+
raise LogicError("logic error: configuration is not loaded")
387+
return {brag.vector_db_id: brag.rag_id for brag in self._configuration.byok_rag}
388+
389+
def resolve_index_name(
390+
self, vector_store_id: str, rag_id_mapping: Optional[dict[str, str]] = None
391+
) -> str:
392+
"""Resolve a vector store ID to its user-facing index name.
393+
394+
Uses the provided mapping or falls back to the BYOK RAG config.
395+
If no mapping exists, returns the vector_store_id unchanged.
396+
397+
Parameters:
398+
vector_store_id: The llama-stack vector store identifier.
399+
rag_id_mapping: Optional pre-built mapping to avoid repeated lookups.
400+
401+
Returns:
402+
str: The user-facing index name from config, or the original ID.
403+
"""
404+
mapping = rag_id_mapping if rag_id_mapping is not None else self.rag_id_mapping
405+
return mapping.get(vector_store_id, vector_store_id)
406+
374407

375408
configuration: AppConfig = AppConfig()

src/models/context.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Context objects for internal operations."""
22

3-
from dataclasses import dataclass
3+
from dataclasses import dataclass, field
4+
from typing import Any
45

56
from llama_stack_client import AsyncLlamaStackClient
67

@@ -23,6 +24,9 @@ class ResponseGeneratorContext:
2324
query_request: The query request object
2425
started_at: Timestamp when the request started (ISO 8601 format)
2526
client: The Llama Stack client for API interactions
27+
metadata_map: Dictionary for storing metadata from tool responses
28+
vector_store_ids: Vector store IDs used in the query for source resolution.
29+
rag_id_mapping: Mapping from vector_db_id to user-facing rag_id.
2630
"""
2731

2832
# Conversation & User context
@@ -39,3 +43,8 @@ class ResponseGeneratorContext:
3943

4044
# Dependencies & State
4145
client: AsyncLlamaStackClient
46+
metadata_map: dict[str, dict[str, Any]]
47+
48+
# RAG index identification
49+
vector_store_ids: list[str] = field(default_factory=list)
50+
rag_id_mapping: dict[str, str] = field(default_factory=dict)

src/utils/endpoints.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,27 @@ def check_configuration_loaded(config: AppConfig) -> None:
196196
raise HTTPException(**response.model_dump()) from e
197197

198198

199+
def create_rag_chunks_dict(summary: TurnSummary) -> list[dict[str, Any]]:
200+
"""
201+
Create dictionary representation of RAG chunks for streaming response.
202+
203+
Args:
204+
summary: TurnSummary containing RAG chunks
205+
206+
Returns:
207+
List of dictionaries with content, source, score, and attributes.
208+
"""
209+
return [
210+
{
211+
"content": chunk.content,
212+
"source": chunk.source,
213+
"score": chunk.score,
214+
"attributes": chunk.attributes,
215+
}
216+
for chunk in summary.rag_chunks
217+
]
218+
219+
199220
def _process_http_source(
200221
src: str, doc_urls: set[str]
201222
) -> Optional[tuple[Optional[AnyUrl], str]]:

src/utils/types.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,15 @@ class RAGChunk(BaseModel):
154154
"""Model representing a RAG chunk used in the response."""
155155

156156
content: str = Field(description="The content of the chunk")
157-
source: Optional[str] = Field(default=None, description="Source document or URL")
157+
source: Optional[str] = Field(
158+
default=None,
159+
description="Index name identifying the knowledge source from configuration",
160+
)
158161
score: Optional[float] = Field(default=None, description="Relevance score")
162+
attributes: Optional[dict[str, Any]] = Field(
163+
default=None,
164+
description="Document metadata from the RAG provider (e.g., url, title, author)",
165+
)
159166

160167

161168
class ReferencedDocument(BaseModel):

tests/unit/app/endpoints/test_rags.py

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
"""Unit tests for the /rags REST API endpoints."""
22

3+
from pathlib import Path
4+
from typing import Any
5+
36
import pytest
47
from fastapi import HTTPException, Request, status
58
from llama_stack_client import APIConnectionError, BadRequestError
69
from pytest_mock import MockerFixture
710

811
from app.endpoints.rags import (
12+
_resolve_rag_id_to_vector_db_id,
913
get_rag_endpoint_handler,
1014
rags_endpoint_handler,
1115
)
@@ -244,3 +248,146 @@ def __init__(self) -> None:
244248
assert response.object == "faiss"
245249
assert response.status == "completed"
246250
assert response.usage_bytes == 100
251+
252+
253+
def _make_byok_config(tmp_path: Any) -> AppConfig:
254+
"""Create an AppConfig with BYOK RAG entries for testing."""
255+
db_file = Path(tmp_path) / "test.db"
256+
db_file.touch()
257+
cfg = AppConfig()
258+
cfg.init_from_dict(
259+
{
260+
"name": "test",
261+
"service": {"host": "localhost", "port": 8080},
262+
"llama_stack": {
263+
"api_key": "test-key",
264+
"url": "http://test.com:1234",
265+
"use_as_library_client": False,
266+
},
267+
"user_data_collection": {},
268+
"authentication": {"module": "noop"},
269+
"authorization": {"access_rules": []},
270+
"byok_rag": [
271+
{
272+
"rag_id": "ocp-4.18-docs",
273+
"rag_type": "inline::faiss",
274+
"embedding_model": "all-MiniLM-L6-v2",
275+
"embedding_dimension": 384,
276+
"vector_db_id": "vs_abc123",
277+
"db_path": str(db_file),
278+
},
279+
{
280+
"rag_id": "company-kb",
281+
"rag_type": "inline::faiss",
282+
"embedding_model": "all-MiniLM-L6-v2",
283+
"embedding_dimension": 384,
284+
"vector_db_id": "vs_def456",
285+
"db_path": str(db_file),
286+
},
287+
],
288+
}
289+
)
290+
return cfg
291+
292+
293+
@pytest.mark.asyncio
294+
async def test_rags_endpoint_returns_rag_ids_from_config(
295+
mocker: MockerFixture, tmp_path: str
296+
) -> None:
297+
"""Test that /rags endpoint maps llama-stack IDs to user-facing rag_ids."""
298+
byok_config = _make_byok_config(str(tmp_path))
299+
mocker.patch("app.endpoints.rags.configuration", byok_config)
300+
301+
# pylint: disable=R0903
302+
class RagInfo:
303+
"""RagInfo mock."""
304+
305+
def __init__(self, rag_id: str) -> None:
306+
"""Initialize with ID."""
307+
self.id = rag_id
308+
309+
# pylint: disable=R0903
310+
class RagList:
311+
"""List of RAGs mock."""
312+
313+
def __init__(self) -> None:
314+
"""Initialize with mapped and unmapped entries."""
315+
self.data = [
316+
RagInfo("vs_abc123"), # mapped to ocp-4.18-docs
317+
RagInfo("vs_def456"), # mapped to company-kb
318+
RagInfo("vs_unmapped"), # not in config, passed through
319+
]
320+
321+
mock_client = mocker.AsyncMock()
322+
mock_client.vector_stores.list.return_value = RagList()
323+
mocker.patch(
324+
"app.endpoints.rags.AsyncLlamaStackClientHolder"
325+
).return_value.get_client.return_value = mock_client
326+
327+
request = Request(scope={"type": "http"})
328+
auth: AuthTuple = ("test_user_id", "test_user", True, "test_token")
329+
330+
response = await rags_endpoint_handler(request=request, auth=auth)
331+
assert response.rags == ["ocp-4.18-docs", "company-kb", "vs_unmapped"]
332+
333+
334+
@pytest.mark.asyncio
335+
async def test_rag_info_endpoint_accepts_rag_id_from_config(
336+
mocker: MockerFixture, tmp_path: str
337+
) -> None:
338+
"""Test that /rags/{rag_id} accepts a user-facing rag_id and resolves it."""
339+
byok_config = _make_byok_config(str(tmp_path))
340+
mocker.patch("app.endpoints.rags.configuration", byok_config)
341+
342+
# pylint: disable=R0902,R0903
343+
class RagInfo:
344+
"""RagInfo mock."""
345+
346+
def __init__(self) -> None:
347+
"""Initialize with test data."""
348+
self.id = "vs_abc123"
349+
self.name = "OCP 4.18 Docs"
350+
self.created_at = 100
351+
self.last_active_at = 200
352+
self.expires_at = 300
353+
self.object = "vector_store"
354+
self.status = "completed"
355+
self.usage_bytes = 500
356+
357+
mock_client = mocker.AsyncMock()
358+
mock_client.vector_stores.retrieve.return_value = RagInfo()
359+
mocker.patch(
360+
"app.endpoints.rags.AsyncLlamaStackClientHolder"
361+
).return_value.get_client.return_value = mock_client
362+
363+
request = Request(scope={"type": "http"})
364+
auth: AuthTuple = ("test_user_id", "test_user", True, "test_token")
365+
366+
# Pass the user-facing rag_id, not the vector_store_id
367+
response = await get_rag_endpoint_handler(
368+
request=request, auth=auth, rag_id="ocp-4.18-docs"
369+
)
370+
371+
# The endpoint should resolve ocp-4.18-docs -> vs_abc123 for the lookup
372+
mock_client.vector_stores.retrieve.assert_called_once_with("vs_abc123")
373+
# The response should show the user-facing ID
374+
assert response.id == "ocp-4.18-docs"
375+
376+
377+
def test_resolve_rag_id_to_vector_db_id_with_mapping(
378+
mocker: MockerFixture, tmp_path: str
379+
) -> None:
380+
"""Test that _resolve_rag_id_to_vector_db_id maps rag_id to vector_db_id."""
381+
byok_config = _make_byok_config(str(tmp_path))
382+
mocker.patch("app.endpoints.rags.configuration", byok_config)
383+
assert _resolve_rag_id_to_vector_db_id("ocp-4.18-docs") == "vs_abc123"
384+
assert _resolve_rag_id_to_vector_db_id("company-kb") == "vs_def456"
385+
386+
387+
def test_resolve_rag_id_to_vector_db_id_passthrough(
388+
mocker: MockerFixture, tmp_path: str
389+
) -> None:
390+
"""Test that unmapped IDs are passed through unchanged."""
391+
byok_config = _make_byok_config(str(tmp_path))
392+
mocker.patch("app.endpoints.rags.configuration", byok_config)
393+
assert _resolve_rag_id_to_vector_db_id("vs_unknown") == "vs_unknown"

tests/unit/cache/test_postgres_cache.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -599,7 +599,7 @@ def test_insert_and_get_with_referenced_documents(
599599
inserted_json_str = sql_params[-3]
600600

601601
assert json.loads(inserted_json_str) == [
602-
{"doc_url": "http://example.com/", "doc_title": "Test Doc"}
602+
{"doc_url": "http://example.com/", "doc_title": "Test Doc", "source": None}
603603
]
604604

605605
# Simulate the database returning that data

0 commit comments

Comments
 (0)