Skip to content

Commit b7422c5

Browse files
committed
Use user-facing index names from LCORE config instead of llama-stack IDs
Model changes: - RAGChunk: Change source field to contain the index name from config instead of the llama-stack filename/UUID. Add attributes field (Optional[dict[str, Any]]) to preserve document metadata from the RAG provider (filename, file_id, and all provider-supplied attributes). - ReferencedDocument: Add source field (Optional[str]) containing the index name for consistency with RAGChunk. - ResponseGeneratorContext: Add vector_store_ids and rag_id_mapping fields for passing index identification context through streaming. Configuration: - AppConfig: Add rag_id_mapping property that builds a {vector_db_id: rag_id} mapping from BYOK RAG configuration. Add resolve_index_name helper for mapping vector store IDs to user-facing names with passthrough for unmapped IDs. Extraction logic (query_v2.py): - Add _resolve_source_for_result() to determine index name per result: checks result attributes for vector_store_id, falls back to single queried store, returns None when ambiguous (multiple stores). - Add _build_chunk_attributes() to merge provider attributes with original filename and file_id for debugging. - Update extract_rag_chunks_from_file_search_item(), parse_rag_chunks_from_responses_api(), _build_tool_call_summary(), and parse_referenced_documents_from_responses_api() to accept and forward vector_store_ids and rag_id_mapping parameters. - Update prepare_tools_for_responses_api() to also return the resolved vector_store_ids alongside tool configurations. /v1/rags endpoint: - List endpoint now maps llama-stack vector_store_ids to user-facing rag_ids from BYOK config. Unmapped IDs pass through unchanged. - Detail endpoint accepts both rag_id (from config) and raw vector_store_id, resolving rag_id to vector_db_id for the llama-stack lookup. Response displays the user-facing ID. Streaming support: - Update streaming_query_v2.py retrieve_response to return vector_store_ids and rag_id_mapping. - Update streaming_query.py to unpack and populate ResponseGeneratorContext with index identification data. - Update a2a.py to handle the expanded return tuple. Transcript storage: - create_rag_chunks_dict() now includes the attributes field in serialized RAG chunk dictionaries. Tests: - Add TestResolveSourceForResult: 7 tests covering attribute-based resolution, single store, multi-store, empty inputs, precedence. - Add TestBuildChunkAttributes: 3 tests for attribute merging. - Add TestExtractRagChunksWithIndexMapping: 3 tests for end-to-end chunk extraction with index mapping. - Add TestParseReferencedDocumentsWithSource: 2 tests for referenced document source population. - Add BYOK RAG mapping tests for /v1/rags: list with mapping, detail with rag_id resolution, _resolve_rag_id_to_vector_db_id. - Add configuration tests: rag_id_mapping property, resolve_index_name, error when config not loaded. - Update existing tests for new field defaults and return types.
1 parent 15754b4 commit b7422c5

15 files changed

Lines changed: 661 additions & 43 deletions

src/app/endpoints/a2a.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,7 @@ async def _process_task_streaming( # pylint: disable=too-many-locals
327327
)
328328

329329
# Stream response from LLM using the Responses API
330-
stream, conversation_id = await retrieve_response(
330+
stream, conversation_id, _vs_ids, _mapping = await retrieve_response(
331331
client,
332332
llama_stack_model_id,
333333
query_request,

src/app/endpoints/query_v2.py

Lines changed: 124 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@
8585
def _build_tool_call_summary( # pylint: disable=too-many-return-statements,too-many-branches
8686
output_item: OpenAIResponseOutput,
8787
rag_chunks: list[RAGChunk],
88+
vector_store_ids: Optional[list[str]] = None,
89+
rag_id_mapping: Optional[dict[str, str]] = None,
8890
) -> tuple[Optional[ToolCallSummary], Optional[ToolResultSummary]]:
8991
"""Translate Responses API tool outputs into ToolCallSummary and ToolResultSummary records.
9092
@@ -94,6 +96,8 @@ def _build_tool_call_summary( # pylint: disable=too-many-return-statements,too-
9496
Args:
9597
output_item: An OpenAIResponseOutput item from the response.output array
9698
rag_chunks: List to append extracted RAG chunks to (from file_search_call items)
99+
vector_store_ids: Vector store IDs used in the query for source resolution.
100+
rag_id_mapping: Mapping from vector_db_id to user-facing rag_id.
97101
Returns:
98102
A tuple of (ToolCallSummary, ToolResultSummary) one of them possibly None
99103
if current llama stack Responses API does not provide the information.
@@ -125,7 +129,9 @@ def _build_tool_call_summary( # pylint: disable=too-many-return-statements,too-
125129
file_search_item = cast(
126130
OpenAIResponseOutputMessageFileSearchToolCall, output_item
127131
)
128-
extract_rag_chunks_from_file_search_item(file_search_item, rag_chunks)
132+
extract_rag_chunks_from_file_search_item(
133+
file_search_item, rag_chunks, vector_store_ids, rag_id_mapping
134+
)
129135
response_payload: Optional[dict[str, Any]] = None
130136
if file_search_item.results is not None:
131137
response_payload = {
@@ -365,9 +371,10 @@ async def retrieve_response( # pylint: disable=too-many-locals,too-many-branche
365371
validate_attachments_metadata(query_request.attachments)
366372

367373
# Prepare tools for responses API
368-
toolgroups = await prepare_tools_for_responses_api(
374+
toolgroups, vector_store_ids = await prepare_tools_for_responses_api(
369375
client, query_request, token, configuration, mcp_headers
370376
)
377+
rag_id_mapping = configuration.rag_id_mapping
371378

372379
# Prepare input for Responses API
373380
# Convert attachments to text and concatenate with query
@@ -450,7 +457,9 @@ async def retrieve_response( # pylint: disable=too-many-locals,too-many-branche
450457
if message_text:
451458
llm_response += message_text
452459

453-
tool_call, tool_result = _build_tool_call_summary(output_item, rag_chunks)
460+
tool_call, tool_result = _build_tool_call_summary(
461+
output_item, rag_chunks, vector_store_ids, rag_id_mapping
462+
)
454463
if tool_call:
455464
tool_calls.append(tool_call)
456465
if tool_result:
@@ -470,7 +479,9 @@ async def retrieve_response( # pylint: disable=too-many-locals,too-many-branche
470479
)
471480

472481
# Extract referenced documents and token usage from Responses API response
473-
referenced_documents = parse_referenced_documents_from_responses_api(response)
482+
referenced_documents = parse_referenced_documents_from_responses_api(
483+
response, vector_store_ids, rag_id_mapping
484+
)
474485
model_label = model_id.split("/", 1)[1] if "/" in model_id else model_id
475486
token_usage = extract_token_usage_from_responses_api(
476487
response, model_label, provider_id, system_prompt
@@ -490,63 +501,150 @@ async def retrieve_response( # pylint: disable=too-many-locals,too-many-branche
490501
)
491502

492503

504+
def _resolve_source_for_result(
505+
result_attributes: dict[str, Any],
506+
vector_store_ids: list[str],
507+
rag_id_mapping: dict[str, str],
508+
) -> Optional[str]:
509+
"""Resolve the index name for a single file search result.
510+
511+
Attempts to determine the knowledge source index name by checking, in order:
512+
1. A ``vector_store_id`` key in the result's attributes dict
513+
2. The sole vector store when exactly one was queried
514+
515+
Parameters:
516+
result_attributes: The attributes dict from a file search result.
517+
vector_store_ids: The vector store IDs used in the query.
518+
rag_id_mapping: Mapping from vector_db_id to user-facing rag_id.
519+
520+
Returns:
521+
The resolved index name, or None if it cannot be determined.
522+
"""
523+
vs_id = result_attributes.get("vector_store_id")
524+
if vs_id:
525+
return rag_id_mapping.get(vs_id, vs_id)
526+
527+
if len(vector_store_ids) == 1:
528+
vs_id = vector_store_ids[0]
529+
return rag_id_mapping.get(vs_id, vs_id)
530+
531+
return None
532+
533+
534+
def _build_chunk_attributes(result: Any) -> dict[str, Any]:
535+
"""Build the attributes dict for a RAGChunk from a file search result.
536+
537+
Preserves the original result metadata (filename, file_id) alongside
538+
any provider-supplied attributes for debugging and downstream use.
539+
540+
Parameters:
541+
result: A file search result object from llama-stack.
542+
543+
Returns:
544+
A merged attributes dict.
545+
"""
546+
provider_attrs: dict[str, Any] = {}
547+
raw = getattr(result, "attributes", None)
548+
if isinstance(raw, dict):
549+
provider_attrs = dict(raw)
550+
551+
attrs: dict[str, Any] = {**provider_attrs}
552+
553+
filename = getattr(result, "filename", None)
554+
if filename is not None:
555+
attrs["filename"] = filename
556+
557+
file_id = getattr(result, "file_id", None)
558+
if file_id is not None:
559+
attrs["file_id"] = file_id
560+
561+
return attrs
562+
563+
493564
def extract_rag_chunks_from_file_search_item(
494565
item: OpenAIResponseOutputMessageFileSearchToolCall,
495566
rag_chunks: list[RAGChunk],
567+
vector_store_ids: Optional[list[str]] = None,
568+
rag_id_mapping: Optional[dict[str, str]] = None,
496569
) -> None:
497570
"""Extract RAG chunks from a file search tool call item and append to rag_chunks.
498571
499572
Args:
500573
item: The file search tool call item.
501574
rag_chunks: List to append extracted RAG chunks to.
575+
vector_store_ids: Vector store IDs used in the query for source resolution.
576+
rag_id_mapping: Mapping from vector_db_id to user-facing rag_id.
502577
"""
503-
if item.results is not None:
504-
for result in item.results:
505-
rag_chunk = RAGChunk(
506-
content=result.text, source=result.filename, score=result.score
507-
)
508-
rag_chunks.append(rag_chunk)
578+
if item.results is None:
579+
return
580+
581+
vs_ids = vector_store_ids or []
582+
mapping = rag_id_mapping or {}
583+
584+
for result in item.results:
585+
attrs = _build_chunk_attributes(result)
586+
source = _resolve_source_for_result(attrs, vs_ids, mapping)
587+
rag_chunk = RAGChunk(
588+
content=result.text,
589+
source=source,
590+
score=result.score,
591+
attributes=attrs,
592+
)
593+
rag_chunks.append(rag_chunk)
509594

510595

511596
def parse_rag_chunks_from_responses_api(
512597
response_obj: OpenAIResponseObject,
598+
vector_store_ids: Optional[list[str]] = None,
599+
rag_id_mapping: Optional[dict[str, str]] = None,
513600
) -> list[RAGChunk]:
514601
"""
515602
Extract rag_chunks from the llama-stack OpenAI response.
516603
517604
Args:
518605
response_obj: The ResponseObject from OpenAI compatible response API in llama-stack.
606+
vector_store_ids: Vector store IDs used in the query for source resolution.
607+
rag_id_mapping: Mapping from vector_db_id to user-facing rag_id.
519608
520609
Returns:
521-
List of RAGChunk with content, source, score
610+
List of RAGChunk with content, source, score, and attributes
522611
"""
523612
rag_chunks: list[RAGChunk] = []
524613

525614
for output_item in response_obj.output:
526615
item_type = getattr(output_item, "type", None)
527616
if item_type == "file_search_call":
528617
item = cast(OpenAIResponseOutputMessageFileSearchToolCall, output_item)
529-
extract_rag_chunks_from_file_search_item(item, rag_chunks)
618+
extract_rag_chunks_from_file_search_item(
619+
item, rag_chunks, vector_store_ids, rag_id_mapping
620+
)
530621

531622
return rag_chunks
532623

533624

534625
def parse_referenced_documents_from_responses_api(
535626
response: OpenAIResponseObject, # pylint: disable=unused-argument
627+
vector_store_ids: Optional[list[str]] = None,
628+
rag_id_mapping: Optional[dict[str, str]] = None,
536629
) -> list[ReferencedDocument]:
537630
"""
538631
Parse referenced documents from OpenAI Responses API response.
539632
540633
Args:
541634
response: The OpenAI Response API response object
635+
vector_store_ids: Vector store IDs used in the query for source resolution.
636+
rag_id_mapping: Mapping from vector_db_id to user-facing rag_id.
542637
543638
Returns:
544-
list[ReferencedDocument]: List of referenced documents with doc_url and doc_title
639+
list[ReferencedDocument]: List of referenced documents with doc_url, doc_title, and source
545640
"""
546641
documents: list[ReferencedDocument] = []
547642
# Use a set to track unique documents by (doc_url, doc_title) tuple
548643
seen_docs: set[tuple[Optional[str], Optional[str]]] = set()
549644

645+
vs_ids = vector_store_ids or []
646+
mapping = rag_id_mapping or {}
647+
550648
# Handle None response (e.g., when agent fails)
551649
if response is None or not response.output:
552650
return documents
@@ -574,12 +672,18 @@ def parse_referenced_documents_from_responses_api(
574672
)
575673
doc_title = attributes.get("title")
576674

675+
source = _resolve_source_for_result(attributes, vs_ids, mapping)
676+
577677
if doc_title or doc_url:
578678
# Treat empty string as None for URL to satisfy Optional[AnyUrl]
579679
final_url = doc_url if doc_url else None
580680
if (final_url, doc_title) not in seen_docs:
581681
documents.append(
582-
ReferencedDocument(doc_url=final_url, doc_title=doc_title)
682+
ReferencedDocument(
683+
doc_url=final_url,
684+
doc_title=doc_title,
685+
source=source,
686+
)
583687
)
584688
seen_docs.add((final_url, doc_title))
585689

@@ -809,7 +913,7 @@ async def prepare_tools_for_responses_api(
809913
token: str,
810914
config: AppConfig,
811915
mcp_headers: Optional[dict[str, dict[str, str]]] = None,
812-
) -> Optional[list[dict[str, Any]]]:
916+
) -> tuple[Optional[list[dict[str, Any]]], list[str]]:
813917
"""
814918
Prepare tools for Responses API including RAG and MCP tools.
815919
@@ -824,11 +928,11 @@ async def prepare_tools_for_responses_api(
824928
mcp_headers: Per-request headers for MCP servers
825929
826930
Returns:
827-
Optional[list[dict[str, Any]]]: List of tool configurations for the
828-
Responses API, or None if no_tools is True or no tools are available
931+
tuple[Optional[list[dict[str, Any]]], list[str]]: A tuple of the tool
932+
configurations list (or None if no tools) and the vector store IDs used.
829933
"""
830934
if query_request.no_tools:
831-
return None
935+
return None, []
832936

833937
toolgroups = []
834938
# Get vector stores for RAG tools - use specified ones or fetch all
@@ -855,6 +959,6 @@ async def prepare_tools_for_responses_api(
855959
)
856960
# Convert empty list to None for consistency with existing behavior
857961
if not toolgroups:
858-
return None
962+
return None, vector_store_ids
859963

860-
return toolgroups
964+
return toolgroups, vector_store_ids

src/app/endpoints/rags.py

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,14 @@ async def rags_endpoint_handler(
8888
rags = await client.vector_stores.list()
8989
logger.info("List of rags: %d", len(rags.data))
9090

91-
# convert into the proper response object
92-
return RAGListResponse(rags=[rag.id for rag in rags.data])
91+
# Map llama-stack vector store IDs to user-facing rag_ids from config
92+
rag_id_mapping = configuration.rag_id_mapping
93+
rag_ids = [
94+
configuration.resolve_index_name(rag.id, rag_id_mapping)
95+
for rag in rags.data
96+
]
97+
98+
return RAGListResponse(rags=rag_ids)
9399

94100
# connection to Llama Stack server
95101
except APIConnectionError as e:
@@ -98,6 +104,30 @@ async def rags_endpoint_handler(
98104
raise HTTPException(**response.model_dump()) from e
99105

100106

107+
def _resolve_rag_id_to_vector_db_id(rag_id: str) -> str:
108+
"""Resolve a user-facing rag_id to the llama-stack vector_db_id.
109+
110+
Checks if the given ID matches a rag_id in the BYOK config and returns
111+
the corresponding vector_db_id. If no match, returns the ID unchanged
112+
(assuming it is already a llama-stack vector store ID).
113+
114+
Parameters:
115+
rag_id: The user-provided RAG identifier.
116+
117+
Returns:
118+
The llama-stack vector_db_id, or the original ID if no mapping found.
119+
"""
120+
try:
121+
byok_rags = configuration.configuration.byok_rag
122+
except (AttributeError, RuntimeError):
123+
return rag_id
124+
125+
for brag in byok_rags:
126+
if brag.rag_id == rag_id:
127+
return brag.vector_db_id
128+
return rag_id
129+
130+
101131
@router.get("/rags/{rag_id}", responses=rag_responses)
102132
@authorize(Action.GET_RAG)
103133
async def get_rag_endpoint_handler(
@@ -107,6 +137,10 @@ async def get_rag_endpoint_handler(
107137
) -> RAGInfoResponse:
108138
"""Retrieve a single RAG by its unique ID.
109139
140+
Accepts both user-facing rag_id (from LCORE config) and llama-stack
141+
vector_store_id. If a rag_id from config is provided, it is resolved
142+
to the underlying vector_store_id for the llama-stack lookup.
143+
110144
Returns:
111145
RAGInfoResponse: A single RAG's details.
112146
@@ -129,13 +163,22 @@ async def get_rag_endpoint_handler(
129163
llama_stack_configuration = configuration.llama_stack_configuration
130164
logger.info("Llama stack config: %s", llama_stack_configuration)
131165

166+
# Resolve user-facing rag_id to llama-stack vector_db_id
167+
vector_db_id = _resolve_rag_id_to_vector_db_id(rag_id)
168+
132169
try:
133170
# try to get Llama Stack client
134171
client = AsyncLlamaStackClientHolder().get_client()
135172
# retrieve info about RAG
136-
rag_info = await client.vector_stores.retrieve(rag_id)
173+
rag_info = await client.vector_stores.retrieve(vector_db_id)
174+
175+
# Return the user-facing ID (rag_id from config if mapped, otherwise as-is)
176+
display_id = configuration.resolve_index_name(
177+
rag_info.id, configuration.rag_id_mapping
178+
)
179+
137180
return RAGInfoResponse(
138-
id=rag_info.id,
181+
id=display_id,
139182
name=rag_info.name,
140183
created_at=rag_info.created_at,
141184
last_active_at=rag_info.last_active_at,

0 commit comments

Comments
 (0)