Use user-facing index names from LCORE config instead of llama-stack IDs

max-svistunov · max-svistunov · commit 6044d4fffe5c · 2026-02-16T15:14:56.000+01:00
Model changes:
- RAGChunk: Change source field to contain the index name from config
  instead of the llama-stack filename/UUID. Add attributes field
  (Optional[dict[str, Any]]) to preserve document metadata from the
  RAG provider (filename, file_id, and all provider-supplied attributes).
- ReferencedDocument: Add source field (Optional[str]) containing the
  index name for consistency with RAGChunk.
- ResponseGeneratorContext: Add vector_store_ids and rag_id_mapping
  fields for passing index identification context through streaming.

Configuration:
- AppConfig: Add rag_id_mapping property that builds a
  {vector_db_id: rag_id} mapping from BYOK RAG configuration.
  Add resolve_index_name helper for mapping vector store IDs to
  user-facing names with passthrough for unmapped IDs.

Extraction logic (query_v2.py):
- Add _resolve_source_for_result() to determine index name per result:
  checks result attributes for vector_store_id, falls back to single
  queried store, returns None when ambiguous (multiple stores).
- Add _build_chunk_attributes() to merge provider attributes with
  original filename and file_id for debugging.
- Update extract_rag_chunks_from_file_search_item(),
  parse_rag_chunks_from_responses_api(), _build_tool_call_summary(),
  and parse_referenced_documents_from_responses_api() to accept and
  forward vector_store_ids and rag_id_mapping parameters.
- Update prepare_tools_for_responses_api() to also return the resolved
  vector_store_ids alongside tool configurations.

/v1/rags endpoint:
- List endpoint now maps llama-stack vector_store_ids to user-facing
  rag_ids from BYOK config. Unmapped IDs pass through unchanged.
- Detail endpoint accepts both rag_id (from config) and raw
  vector_store_id, resolving rag_id to vector_db_id for the
  llama-stack lookup. Response displays the user-facing ID.

Streaming support:
- Update streaming_query_v2.py retrieve_response to return
  vector_store_ids and rag_id_mapping.
- Update streaming_query.py to unpack and populate
  ResponseGeneratorContext with index identification data.
- Update a2a.py to handle the expanded return tuple.

Transcript storage:
- create_rag_chunks_dict() now includes the attributes field in
  serialized RAG chunk dictionaries.

Tests:
- Add TestResolveSourceForResult: 7 tests covering attribute-based
  resolution, single store, multi-store, empty inputs, precedence.
- Add TestBuildChunkAttributes: 3 tests for attribute merging.
- Add TestExtractRagChunksWithIndexMapping: 3 tests for end-to-end
  chunk extraction with index mapping.
- Add TestParseReferencedDocumentsWithSource: 2 tests for referenced
  document source population.
- Add BYOK RAG mapping tests for /v1/rags: list with mapping, detail
  with rag_id resolution, _resolve_rag_id_to_vector_db_id.
- Add configuration tests: rag_id_mapping property, resolve_index_name,
  error when config not loaded.
- Update existing tests for new field defaults and return types.
diff --git a/src/app/endpoints/rags.py b/src/app/endpoints/rags.py
@@ -88,8 +88,14 @@ async def rags_endpoint_handler(
         rags = await client.vector_stores.list()
         logger.info("List of rags: %d", len(rags.data))
 
-        # convert into the proper response object
-        return RAGListResponse(rags=[rag.id for rag in rags.data])
+        # Map llama-stack vector store IDs to user-facing rag_ids from config
+        rag_id_mapping = configuration.rag_id_mapping
+        rag_ids = [
+            configuration.resolve_index_name(rag.id, rag_id_mapping)
+            for rag in rags.data
+        ]
+
+        return RAGListResponse(rags=rag_ids)
 
     # connection to Llama Stack server
     except APIConnectionError as e:
@@ -98,6 +104,30 @@ async def rags_endpoint_handler(
         raise HTTPException(**response.model_dump()) from e
 
 
+def _resolve_rag_id_to_vector_db_id(rag_id: str) -> str:
+    """Resolve a user-facing rag_id to the llama-stack vector_db_id.
+
+    Checks if the given ID matches a rag_id in the BYOK config and returns
+    the corresponding vector_db_id. If no match, returns the ID unchanged
+    (assuming it is already a llama-stack vector store ID).
+
+    Parameters:
+        rag_id: The user-provided RAG identifier.
+
+    Returns:
+        The llama-stack vector_db_id, or the original ID if no mapping found.
+    """
+    try:
+        byok_rags = configuration.configuration.byok_rag
+    except (AttributeError, RuntimeError):
+        return rag_id
+
+    for brag in byok_rags:
+        if brag.rag_id == rag_id:
+            return brag.vector_db_id
+    return rag_id
+
+
 @router.get("/rags/{rag_id}", responses=rag_responses)
 @authorize(Action.GET_RAG)
 async def get_rag_endpoint_handler(
@@ -107,6 +137,10 @@ async def get_rag_endpoint_handler(
 ) -> RAGInfoResponse:
     """Retrieve a single RAG by its unique ID.
 
+    Accepts both user-facing rag_id (from LCORE config) and llama-stack
+    vector_store_id. If a rag_id from config is provided, it is resolved
+    to the underlying vector_store_id for the llama-stack lookup.
+
     Returns:
         RAGInfoResponse: A single RAG's details.
 
@@ -129,13 +163,22 @@ async def get_rag_endpoint_handler(
     llama_stack_configuration = configuration.llama_stack_configuration
     logger.info("Llama stack config: %s", llama_stack_configuration)
 
+    # Resolve user-facing rag_id to llama-stack vector_db_id
+    vector_db_id = _resolve_rag_id_to_vector_db_id(rag_id)
+
     try:
         # try to get Llama Stack client
         client = AsyncLlamaStackClientHolder().get_client()
         # retrieve info about RAG
-        rag_info = await client.vector_stores.retrieve(rag_id)
+        rag_info = await client.vector_stores.retrieve(vector_db_id)
+
+        # Return the user-facing ID (rag_id from config if mapped, otherwise as-is)
+        display_id = configuration.resolve_index_name(
+            rag_info.id, configuration.rag_id_mapping
+        )
+
         return RAGInfoResponse(
-            id=rag_info.id,
+            id=display_id,
             name=rag_info.name,
             created_at=rag_info.created_at,
             last_active_at=rag_info.last_active_at,
diff --git a/src/configuration.py b/src/configuration.py
@@ -371,5 +371,38 @@ def solr(self) -> Optional[SolrConfiguration]:
             raise LogicError("logic error: configuration is not loaded")
         return self._configuration.solr
 
+    @property
+    def rag_id_mapping(self) -> dict[str, str]:
+        """Return mapping from vector_db_id to rag_id from BYOK RAG config.
+
+        Returns:
+            dict[str, str]: Mapping where keys are llama-stack vector_db_ids
+            and values are user-facing rag_ids from configuration.
+
+        Raises:
+            LogicError: If the configuration has not been loaded.
+        """
+        if self._configuration is None:
+            raise LogicError("logic error: configuration is not loaded")
+        return {brag.vector_db_id: brag.rag_id for brag in self._configuration.byok_rag}
+
+    def resolve_index_name(
+        self, vector_store_id: str, rag_id_mapping: Optional[dict[str, str]] = None
+    ) -> str:
+        """Resolve a vector store ID to its user-facing index name.
+
+        Uses the provided mapping or falls back to the BYOK RAG config.
+        If no mapping exists, returns the vector_store_id unchanged.
+
+        Parameters:
+            vector_store_id: The llama-stack vector store identifier.
+            rag_id_mapping: Optional pre-built mapping to avoid repeated lookups.
+
+        Returns:
+            str: The user-facing index name from config, or the original ID.
+        """
+        mapping = rag_id_mapping if rag_id_mapping is not None else self.rag_id_mapping
+        return mapping.get(vector_store_id, vector_store_id)
+
 
 configuration: AppConfig = AppConfig()
diff --git a/src/models/context.py b/src/models/context.py
@@ -1,6 +1,7 @@
 """Context objects for internal operations."""
 
-from dataclasses import dataclass
+from dataclasses import dataclass, field
+from typing import Any
 
 from llama_stack_client import AsyncLlamaStackClient
 
@@ -23,6 +24,9 @@ class ResponseGeneratorContext:
         query_request: The query request object
         started_at: Timestamp when the request started (ISO 8601 format)
         client: The Llama Stack client for API interactions
+        metadata_map: Dictionary for storing metadata from tool responses
+        vector_store_ids: Vector store IDs used in the query for source resolution.
+        rag_id_mapping: Mapping from vector_db_id to user-facing rag_id.
     """
 
     # Conversation & User context
@@ -39,3 +43,8 @@ class ResponseGeneratorContext:
 
     # Dependencies & State
     client: AsyncLlamaStackClient
+    metadata_map: dict[str, dict[str, Any]]
+
+    # RAG index identification
+    vector_store_ids: list[str] = field(default_factory=list)
+    rag_id_mapping: dict[str, str] = field(default_factory=dict)
diff --git a/src/utils/endpoints.py b/src/utils/endpoints.py
@@ -196,6 +196,27 @@ def check_configuration_loaded(config: AppConfig) -> None:
         raise HTTPException(**response.model_dump()) from e
 
 
+def create_rag_chunks_dict(summary: TurnSummary) -> list[dict[str, Any]]:
+    """
+    Create dictionary representation of RAG chunks for streaming response.
+
+    Args:
+        summary: TurnSummary containing RAG chunks
+
+    Returns:
+        List of dictionaries with content, source, score, and attributes.
+    """
+    return [
+        {
+            "content": chunk.content,
+            "source": chunk.source,
+            "score": chunk.score,
+            "attributes": chunk.attributes,
+        }
+        for chunk in summary.rag_chunks
+    ]
+
+
 def _process_http_source(
     src: str, doc_urls: set[str]
 ) -> Optional[tuple[Optional[AnyUrl], str]]:
diff --git a/src/utils/types.py b/src/utils/types.py
@@ -154,8 +154,15 @@ class RAGChunk(BaseModel):
     """Model representing a RAG chunk used in the response."""
 
     content: str = Field(description="The content of the chunk")
-    source: Optional[str] = Field(default=None, description="Source document or URL")
+    source: Optional[str] = Field(
+        default=None,
+        description="Index name identifying the knowledge source from configuration",
+    )
     score: Optional[float] = Field(default=None, description="Relevance score")
+    attributes: Optional[dict[str, Any]] = Field(
+        default=None,
+        description="Document metadata from the RAG provider (e.g., url, title, author)",
+    )
 
 
 class ReferencedDocument(BaseModel):
diff --git a/tests/unit/app/endpoints/test_rags.py b/tests/unit/app/endpoints/test_rags.py
@@ -1,11 +1,15 @@
 """Unit tests for the /rags REST API endpoints."""
 
+from pathlib import Path
+from typing import Any
+
 import pytest
 from fastapi import HTTPException, Request, status
 from llama_stack_client import APIConnectionError, BadRequestError
 from pytest_mock import MockerFixture
 
 from app.endpoints.rags import (
+    _resolve_rag_id_to_vector_db_id,
     get_rag_endpoint_handler,
     rags_endpoint_handler,
 )
@@ -244,3 +248,146 @@ def __init__(self) -> None:
     assert response.object == "faiss"
     assert response.status == "completed"
     assert response.usage_bytes == 100
+
+
+def _make_byok_config(tmp_path: Any) -> AppConfig:
+    """Create an AppConfig with BYOK RAG entries for testing."""
+    db_file = Path(tmp_path) / "test.db"
+    db_file.touch()
+    cfg = AppConfig()
+    cfg.init_from_dict(
+        {
+            "name": "test",
+            "service": {"host": "localhost", "port": 8080},
+            "llama_stack": {
+                "api_key": "test-key",
+                "url": "http://test.com:1234",
+                "use_as_library_client": False,
+            },
+            "user_data_collection": {},
+            "authentication": {"module": "noop"},
+            "authorization": {"access_rules": []},
+            "byok_rag": [
+                {
+                    "rag_id": "ocp-4.18-docs",
+                    "rag_type": "inline::faiss",
+                    "embedding_model": "all-MiniLM-L6-v2",
+                    "embedding_dimension": 384,
+                    "vector_db_id": "vs_abc123",
+                    "db_path": str(db_file),
+                },
+                {
+                    "rag_id": "company-kb",
+                    "rag_type": "inline::faiss",
+                    "embedding_model": "all-MiniLM-L6-v2",
+                    "embedding_dimension": 384,
+                    "vector_db_id": "vs_def456",
+                    "db_path": str(db_file),
+                },
+            ],
+        }
+    )
+    return cfg
+
+
+@pytest.mark.asyncio
+async def test_rags_endpoint_returns_rag_ids_from_config(
+    mocker: MockerFixture, tmp_path: str
+) -> None:
+    """Test that /rags endpoint maps llama-stack IDs to user-facing rag_ids."""
+    byok_config = _make_byok_config(str(tmp_path))
+    mocker.patch("app.endpoints.rags.configuration", byok_config)
+
+    # pylint: disable=R0903
+    class RagInfo:
+        """RagInfo mock."""
+
+        def __init__(self, rag_id: str) -> None:
+            """Initialize with ID."""
+            self.id = rag_id
+
+    # pylint: disable=R0903
+    class RagList:
+        """List of RAGs mock."""
+
+        def __init__(self) -> None:
+            """Initialize with mapped and unmapped entries."""
+            self.data = [
+                RagInfo("vs_abc123"),  # mapped to ocp-4.18-docs
+                RagInfo("vs_def456"),  # mapped to company-kb
+                RagInfo("vs_unmapped"),  # not in config, passed through
+            ]
+
+    mock_client = mocker.AsyncMock()
+    mock_client.vector_stores.list.return_value = RagList()
+    mocker.patch(
+        "app.endpoints.rags.AsyncLlamaStackClientHolder"
+    ).return_value.get_client.return_value = mock_client
+
+    request = Request(scope={"type": "http"})
+    auth: AuthTuple = ("test_user_id", "test_user", True, "test_token")
+
+    response = await rags_endpoint_handler(request=request, auth=auth)
+    assert response.rags == ["ocp-4.18-docs", "company-kb", "vs_unmapped"]
+
+
+@pytest.mark.asyncio
+async def test_rag_info_endpoint_accepts_rag_id_from_config(
+    mocker: MockerFixture, tmp_path: str
+) -> None:
+    """Test that /rags/{rag_id} accepts a user-facing rag_id and resolves it."""
+    byok_config = _make_byok_config(str(tmp_path))
+    mocker.patch("app.endpoints.rags.configuration", byok_config)
+
+    # pylint: disable=R0902,R0903
+    class RagInfo:
+        """RagInfo mock."""
+
+        def __init__(self) -> None:
+            """Initialize with test data."""
+            self.id = "vs_abc123"
+            self.name = "OCP 4.18 Docs"
+            self.created_at = 100
+            self.last_active_at = 200
+            self.expires_at = 300
+            self.object = "vector_store"
+            self.status = "completed"
+            self.usage_bytes = 500
+
+    mock_client = mocker.AsyncMock()
+    mock_client.vector_stores.retrieve.return_value = RagInfo()
+    mocker.patch(
+        "app.endpoints.rags.AsyncLlamaStackClientHolder"
+    ).return_value.get_client.return_value = mock_client
+
+    request = Request(scope={"type": "http"})
+    auth: AuthTuple = ("test_user_id", "test_user", True, "test_token")
+
+    # Pass the user-facing rag_id, not the vector_store_id
+    response = await get_rag_endpoint_handler(
+        request=request, auth=auth, rag_id="ocp-4.18-docs"
+    )
+
+    # The endpoint should resolve ocp-4.18-docs -> vs_abc123 for the lookup
+    mock_client.vector_stores.retrieve.assert_called_once_with("vs_abc123")
+    # The response should show the user-facing ID
+    assert response.id == "ocp-4.18-docs"
+
+
+def test_resolve_rag_id_to_vector_db_id_with_mapping(
+    mocker: MockerFixture, tmp_path: str
+) -> None:
+    """Test that _resolve_rag_id_to_vector_db_id maps rag_id to vector_db_id."""
+    byok_config = _make_byok_config(str(tmp_path))
+    mocker.patch("app.endpoints.rags.configuration", byok_config)
+    assert _resolve_rag_id_to_vector_db_id("ocp-4.18-docs") == "vs_abc123"
+    assert _resolve_rag_id_to_vector_db_id("company-kb") == "vs_def456"
+
+
+def test_resolve_rag_id_to_vector_db_id_passthrough(
+    mocker: MockerFixture, tmp_path: str
+) -> None:
+    """Test that unmapped IDs are passed through unchanged."""
+    byok_config = _make_byok_config(str(tmp_path))
+    mocker.patch("app.endpoints.rags.configuration", byok_config)
+    assert _resolve_rag_id_to_vector_db_id("vs_unknown") == "vs_unknown"
diff --git a/tests/unit/cache/test_postgres_cache.py b/tests/unit/cache/test_postgres_cache.py
@@ -599,7 +599,7 @@ def test_insert_and_get_with_referenced_documents(
     inserted_json_str = sql_params[-3]
 
     assert json.loads(inserted_json_str) == [
-        {"doc_url": "http://example.com/", "doc_title": "Test Doc"}
+        {"doc_url": "http://example.com/", "doc_title": "Test Doc", "source": None}
     ]
 
     # Simulate the database returning that data
diff --git a/tests/unit/test_configuration.py b/tests/unit/test_configuration.py

Original file line number	Diff line number	Diff line change
`@@ -599,7 +599,7 @@ def test_insert_and_get_with_referenced_documents(`
`599`	`599`	`inserted_json_str = sql_params[-3]`
`600`	`600`
`601`	`601`	`assert json.loads(inserted_json_str) == [`
`602`		`- {"doc_url": "http://example.com/", "doc_title": "Test Doc"}`
	`602`	`+ {"doc_url": "http://example.com/", "doc_title": "Test Doc", "source": None}`
`603`	`603`	`]`
`604`	`604`
`605`	`605`	`# Simulate the database returning that data`