lightspeed-core
diff --git a/‎src/app/endpoints/a2a.py‎
Lines changed: 1 addition & 1 deletion b/‎src/app/endpoints/a2a.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/app/endpoints/query_v2.py‎
Lines changed: 124 additions & 20 deletions b/‎src/app/endpoints/query_v2.py‎
Lines changed: 124 additions & 20 deletions
diff --git a/‎src/app/endpoints/rags.py‎
Lines changed: 47 additions & 4 deletions b/‎src/app/endpoints/rags.py‎
Lines changed: 47 additions & 4 deletions
@@ -327,7 +327,7 @@ async def _process_task_streaming(  # pylint: disable=too-many-locals
             )
 
             # Stream response from LLM using the Responses API
-            stream, conversation_id = await retrieve_response(
+            stream, conversation_id, _vs_ids, _mapping = await retrieve_response(
                 client,
                 llama_stack_model_id,
                 query_request,
 
@@ -85,6 +85,8 @@
 def _build_tool_call_summary(  # pylint: disable=too-many-return-statements,too-many-branches
     output_item: OpenAIResponseOutput,
     rag_chunks: list[RAGChunk],
+    vector_store_ids: Optional[list[str]] = None,
+    rag_id_mapping: Optional[dict[str, str]] = None,
 ) -> tuple[Optional[ToolCallSummary], Optional[ToolResultSummary]]:
     """Translate Responses API tool outputs into ToolCallSummary and ToolResultSummary records.
 
@@ -94,6 +96,8 @@ def _build_tool_call_summary(  # pylint: disable=too-many-return-statements,too-
     Args:
         output_item: An OpenAIResponseOutput item from the response.output array
         rag_chunks: List to append extracted RAG chunks to (from file_search_call items)
+        vector_store_ids: Vector store IDs used in the query for source resolution.
+        rag_id_mapping: Mapping from vector_db_id to user-facing rag_id.
     Returns:
         A tuple of (ToolCallSummary, ToolResultSummary) one of them possibly None
         if current llama stack Responses API does not provide the information.
@@ -125,7 +129,9 @@ def _build_tool_call_summary(  # pylint: disable=too-many-return-statements,too-
         file_search_item = cast(
             OpenAIResponseOutputMessageFileSearchToolCall, output_item
         )
-        extract_rag_chunks_from_file_search_item(file_search_item, rag_chunks)
+        extract_rag_chunks_from_file_search_item(
+            file_search_item, rag_chunks, vector_store_ids, rag_id_mapping
+        )
         response_payload: Optional[dict[str, Any]] = None
         if file_search_item.results is not None:
             response_payload = {
@@ -365,9 +371,10 @@ async def retrieve_response(  # pylint: disable=too-many-locals,too-many-branche
         validate_attachments_metadata(query_request.attachments)
 
     # Prepare tools for responses API
-    toolgroups = await prepare_tools_for_responses_api(
+    toolgroups, vector_store_ids = await prepare_tools_for_responses_api(
         client, query_request, token, configuration, mcp_headers
     )
+    rag_id_mapping = configuration.rag_id_mapping
 
     # Prepare input for Responses API
     # Convert attachments to text and concatenate with query
@@ -450,7 +457,9 @@ async def retrieve_response(  # pylint: disable=too-many-locals,too-many-branche
         if message_text:
             llm_response += message_text
 
-        tool_call, tool_result = _build_tool_call_summary(output_item, rag_chunks)
+        tool_call, tool_result = _build_tool_call_summary(
+            output_item, rag_chunks, vector_store_ids, rag_id_mapping
+        )
         if tool_call:
             tool_calls.append(tool_call)
         if tool_result:
@@ -470,7 +479,9 @@ async def retrieve_response(  # pylint: disable=too-many-locals,too-many-branche
     )
 
     # Extract referenced documents and token usage from Responses API response
-    referenced_documents = parse_referenced_documents_from_responses_api(response)
+    referenced_documents = parse_referenced_documents_from_responses_api(
+        response, vector_store_ids, rag_id_mapping
+    )
     model_label = model_id.split("/", 1)[1] if "/" in model_id else model_id
     token_usage = extract_token_usage_from_responses_api(
         response, model_label, provider_id, system_prompt
@@ -490,63 +501,150 @@ async def retrieve_response(  # pylint: disable=too-many-locals,too-many-branche
     )
 
 
+def _resolve_source_for_result(
+    result_attributes: dict[str, Any],
+    vector_store_ids: list[str],
+    rag_id_mapping: dict[str, str],
+) -> Optional[str]:
+    """Resolve the index name for a single file search result.
+
+    Attempts to determine the knowledge source index name by checking, in order:
+    1. A ``vector_store_id`` key in the result's attributes dict
+    2. The sole vector store when exactly one was queried
+
+    Parameters:
+        result_attributes: The attributes dict from a file search result.
+        vector_store_ids: The vector store IDs used in the query.
+        rag_id_mapping: Mapping from vector_db_id to user-facing rag_id.
+
+    Returns:
+        The resolved index name, or None if it cannot be determined.
+    """
+    vs_id = result_attributes.get("vector_store_id")
+    if vs_id:
+        return rag_id_mapping.get(vs_id, vs_id)
+
+    if len(vector_store_ids) == 1:
+        vs_id = vector_store_ids[0]
+        return rag_id_mapping.get(vs_id, vs_id)
+
+    return None
+
+
+def _build_chunk_attributes(result: Any) -> dict[str, Any]:
+    """Build the attributes dict for a RAGChunk from a file search result.
+
+    Preserves the original result metadata (filename, file_id) alongside
+    any provider-supplied attributes for debugging and downstream use.
+
+    Parameters:
+        result: A file search result object from llama-stack.
+
+    Returns:
+        A merged attributes dict.
+    """
+    provider_attrs: dict[str, Any] = {}
+    raw = getattr(result, "attributes", None)
+    if isinstance(raw, dict):
+        provider_attrs = dict(raw)
+
+    attrs: dict[str, Any] = {**provider_attrs}
+
+    filename = getattr(result, "filename", None)
+    if filename is not None:
+        attrs["filename"] = filename
+
+    file_id = getattr(result, "file_id", None)
+    if file_id is not None:
+        attrs["file_id"] = file_id
+
+    return attrs
+
+
 def extract_rag_chunks_from_file_search_item(
     item: OpenAIResponseOutputMessageFileSearchToolCall,
     rag_chunks: list[RAGChunk],
+    vector_store_ids: Optional[list[str]] = None,
+    rag_id_mapping: Optional[dict[str, str]] = None,
 ) -> None:
     """Extract RAG chunks from a file search tool call item and append to rag_chunks.
 
     Args:
         item: The file search tool call item.
         rag_chunks: List to append extracted RAG chunks to.
+        vector_store_ids: Vector store IDs used in the query for source resolution.
+        rag_id_mapping: Mapping from vector_db_id to user-facing rag_id.
     """
-    if item.results is not None:
-        for result in item.results:
-            rag_chunk = RAGChunk(
-                content=result.text, source=result.filename, score=result.score
-            )
-            rag_chunks.append(rag_chunk)
+    if item.results is None:
+        return
+
+    vs_ids = vector_store_ids or []
+    mapping = rag_id_mapping or {}
+
+    for result in item.results:
+        attrs = _build_chunk_attributes(result)
+        source = _resolve_source_for_result(attrs, vs_ids, mapping)
+        rag_chunk = RAGChunk(
+            content=result.text,
+            source=source,
+            score=result.score,
+            attributes=attrs,
+        )
+        rag_chunks.append(rag_chunk)
 
 
 def parse_rag_chunks_from_responses_api(
     response_obj: OpenAIResponseObject,
+    vector_store_ids: Optional[list[str]] = None,
+    rag_id_mapping: Optional[dict[str, str]] = None,
 ) -> list[RAGChunk]:
     """
     Extract rag_chunks from the llama-stack OpenAI response.
 
     Args:
         response_obj: The ResponseObject from OpenAI compatible response API in llama-stack.
+        vector_store_ids: Vector store IDs used in the query for source resolution.
+        rag_id_mapping: Mapping from vector_db_id to user-facing rag_id.
 
     Returns:
-        List of RAGChunk with content, source, score
+        List of RAGChunk with content, source, score, and attributes
     """
     rag_chunks: list[RAGChunk] = []
 
     for output_item in response_obj.output:
         item_type = getattr(output_item, "type", None)
         if item_type == "file_search_call":
             item = cast(OpenAIResponseOutputMessageFileSearchToolCall, output_item)
-            extract_rag_chunks_from_file_search_item(item, rag_chunks)
+            extract_rag_chunks_from_file_search_item(
+                item, rag_chunks, vector_store_ids, rag_id_mapping
+            )
 
     return rag_chunks
 
 
 def parse_referenced_documents_from_responses_api(
     response: OpenAIResponseObject,  # pylint: disable=unused-argument
+    vector_store_ids: Optional[list[str]] = None,
+    rag_id_mapping: Optional[dict[str, str]] = None,
 ) -> list[ReferencedDocument]:
     """
     Parse referenced documents from OpenAI Responses API response.
 
     Args:
         response: The OpenAI Response API response object
+        vector_store_ids: Vector store IDs used in the query for source resolution.
+        rag_id_mapping: Mapping from vector_db_id to user-facing rag_id.
 
     Returns:
-        list[ReferencedDocument]: List of referenced documents with doc_url and doc_title
+        list[ReferencedDocument]: List of referenced documents with doc_url, doc_title, and source
     """
     documents: list[ReferencedDocument] = []
     # Use a set to track unique documents by (doc_url, doc_title) tuple
     seen_docs: set[tuple[Optional[str], Optional[str]]] = set()
 
+    vs_ids = vector_store_ids or []
+    mapping = rag_id_mapping or {}
+
     # Handle None response (e.g., when agent fails)
     if response is None or not response.output:
         return documents
@@ -574,12 +672,18 @@ def parse_referenced_documents_from_responses_api(
                 )
                 doc_title = attributes.get("title")
 
+                source = _resolve_source_for_result(attributes, vs_ids, mapping)
+
                 if doc_title or doc_url:
                     # Treat empty string as None for URL to satisfy Optional[AnyUrl]
                     final_url = doc_url if doc_url else None
                     if (final_url, doc_title) not in seen_docs:
                         documents.append(
-                            ReferencedDocument(doc_url=final_url, doc_title=doc_title)
+                            ReferencedDocument(
+                                doc_url=final_url,
+                                doc_title=doc_title,
+                                source=source,
+                            )
                         )
                         seen_docs.add((final_url, doc_title))
 
@@ -809,7 +913,7 @@ async def prepare_tools_for_responses_api(
     token: str,
     config: AppConfig,
     mcp_headers: Optional[dict[str, dict[str, str]]] = None,
-) -> Optional[list[dict[str, Any]]]:
+) -> tuple[Optional[list[dict[str, Any]]], list[str]]:
     """
     Prepare tools for Responses API including RAG and MCP tools.
 
@@ -824,11 +928,11 @@ async def prepare_tools_for_responses_api(
         mcp_headers: Per-request headers for MCP servers
 
     Returns:
-        Optional[list[dict[str, Any]]]: List of tool configurations for the
-        Responses API, or None if no_tools is True or no tools are available
+        tuple[Optional[list[dict[str, Any]]], list[str]]: A tuple of the tool
+        configurations list (or None if no tools) and the vector store IDs used.
     """
     if query_request.no_tools:
-        return None
+        return None, []
 
     toolgroups = []
     # Get vector stores for RAG tools - use specified ones or fetch all
@@ -855,6 +959,6 @@ async def prepare_tools_for_responses_api(
         )
     # Convert empty list to None for consistency with existing behavior
     if not toolgroups:
-        return None
+        return None, vector_store_ids
 
-    return toolgroups
+    return toolgroups, vector_store_ids
@@ -88,8 +88,14 @@ async def rags_endpoint_handler(
         rags = await client.vector_stores.list()
         logger.info("List of rags: %d", len(rags.data))
 
-        # convert into the proper response object
-        return RAGListResponse(rags=[rag.id for rag in rags.data])
+        # Map llama-stack vector store IDs to user-facing rag_ids from config
+        rag_id_mapping = configuration.rag_id_mapping
+        rag_ids = [
+            configuration.resolve_index_name(rag.id, rag_id_mapping)
+            for rag in rags.data
+        ]
+
+        return RAGListResponse(rags=rag_ids)
 
     # connection to Llama Stack server
     except APIConnectionError as e:
@@ -98,6 +104,30 @@ async def rags_endpoint_handler(
         raise HTTPException(**response.model_dump()) from e
 
 
+def _resolve_rag_id_to_vector_db_id(rag_id: str) -> str:
+    """Resolve a user-facing rag_id to the llama-stack vector_db_id.
+
+    Checks if the given ID matches a rag_id in the BYOK config and returns
+    the corresponding vector_db_id. If no match, returns the ID unchanged
+    (assuming it is already a llama-stack vector store ID).
+
+    Parameters:
+        rag_id: The user-provided RAG identifier.
+
+    Returns:
+        The llama-stack vector_db_id, or the original ID if no mapping found.
+    """
+    try:
+        byok_rags = configuration.configuration.byok_rag
+    except (AttributeError, RuntimeError):
+        return rag_id
+
+    for brag in byok_rags:
+        if brag.rag_id == rag_id:
+            return brag.vector_db_id
+    return rag_id
+
+
 @router.get("/rags/{rag_id}", responses=rag_responses)
 @authorize(Action.GET_RAG)
 async def get_rag_endpoint_handler(
@@ -107,6 +137,10 @@ async def get_rag_endpoint_handler(
 ) -> RAGInfoResponse:
     """Retrieve a single RAG by its unique ID.
 
+    Accepts both user-facing rag_id (from LCORE config) and llama-stack
+    vector_store_id. If a rag_id from config is provided, it is resolved
+    to the underlying vector_store_id for the llama-stack lookup.
+
     Returns:
         RAGInfoResponse: A single RAG's details.
 
@@ -129,13 +163,22 @@ async def get_rag_endpoint_handler(
     llama_stack_configuration = configuration.llama_stack_configuration
     logger.info("Llama stack config: %s", llama_stack_configuration)
 
+    # Resolve user-facing rag_id to llama-stack vector_db_id
+    vector_db_id = _resolve_rag_id_to_vector_db_id(rag_id)
+
     try:
         # try to get Llama Stack client
         client = AsyncLlamaStackClientHolder().get_client()
         # retrieve info about RAG
-        rag_info = await client.vector_stores.retrieve(rag_id)
+        rag_info = await client.vector_stores.retrieve(vector_db_id)
+
+        # Return the user-facing ID (rag_id from config if mapped, otherwise as-is)
+        display_id = configuration.resolve_index_name(
+            rag_info.id, configuration.rag_id_mapping
+        )
+
         return RAGInfoResponse(
-            id=rag_info.id,
+            id=display_id,
             name=rag_info.name,
             created_at=rag_info.created_at,
             last_active_at=rag_info.last_active_at,
Original file line number	Diff line number	Diff line change
`@@ -327,7 +327,7 @@ async def _process_task_streaming( # pylint: disable=too-many-locals`
`327`	`327`	`)`
`328`	`328`
`329`	`329`	`# Stream response from LLM using the Responses API`
`330`		`- stream, conversation_id = await retrieve_response(`
	`330`	`+ stream, conversation_id, _vs_ids, _mapping = await retrieve_response(`
`331`	`331`	`client,`
`332`	`332`	`llama_stack_model_id,`
`333`	`333`	`query_request,`