Skip to content

Commit 5375422

Browse files
committed
Add translation of rag_ids to vector_store_ids in inline BYOK
1 parent 620837e commit 5375422

2 files changed

Lines changed: 101 additions & 17 deletions

File tree

src/utils/vector_search.py

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,7 @@ def _process_solr_chunks_for_documents(
317317
async def _fetch_byok_rag(
318318
client: AsyncLlamaStackClient,
319319
query: str,
320-
vector_store_ids: Optional[list[str]] = None,
320+
vector_store_ids: Optional[list[str]] = None, # User-facing
321321
) -> tuple[list[RAGChunk], list[ReferencedDocument]]:
322322
"""Fetch chunks and documents from BYOK RAG sources.
323323
@@ -339,22 +339,23 @@ async def _fetch_byok_rag(
339339

340340
# Determine which BYOK vector stores to query for inline RAG.
341341
# Per-request override takes precedence; otherwise use config-based inline list.
342-
if vector_store_ids is not None:
343-
# Request-level override: filter out Solr store, use the rest
344-
vector_store_ids_to_query = [
345-
vs_id
346-
for vs_id in vector_store_ids
347-
if vs_id != constants.SOLR_DEFAULT_VECTOR_STORE_ID
348-
]
349-
else:
350-
inline_rag_ids = [
351-
rid
352-
for rid in configuration.configuration.rag.inline
353-
if rid != constants.OKP_RAG_ID
354-
]
355-
vector_store_ids_to_query = resolve_vector_store_ids(
356-
inline_rag_ids, configuration.configuration.byok_rag
357-
)
342+
rag_ids_to_query = (
343+
configuration.configuration.rag.inline
344+
if vector_store_ids is None
345+
else vector_store_ids
346+
)
347+
348+
# Translate user-facing rag_ids to llama-stack ids
349+
vector_store_ids_to_query: list[str] = resolve_vector_store_ids(
350+
rag_ids_to_query, configuration.configuration.byok_rag
351+
)
352+
353+
# Request-level override: filter out Solr store, use the rest
354+
vector_store_ids_to_query = [
355+
vs_id
356+
for vs_id in vector_store_ids_to_query
357+
if vs_id != constants.SOLR_DEFAULT_VECTOR_STORE_ID
358+
]
358359

359360
# If inline byok stores are not defined, we disable the inline RAG for backward compatibility
360361
if not vector_store_ids_to_query:

tests/unit/utils/test_vector_search.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,89 @@ async def test_byok_enabled_success(self, mocker) -> None: # type: ignore[no-un
400400
assert rag_chunks[0].content == "Test content"
401401
assert len(referenced_docs) > 0
402402

403+
@pytest.mark.asyncio
404+
async def test_user_facing_ids_translated_to_internal_ids( # type: ignore[no-untyped-def]
405+
self, mocker
406+
) -> None:
407+
"""Test that user-facing rag_ids (vector_store_ids) are translated to llama-stack ids."""
408+
config_mock = mocker.Mock(spec=AppConfig)
409+
byok_rag_mock = mocker.Mock()
410+
byok_rag_mock.rag_id = "my-kb"
411+
byok_rag_mock.vector_db_id = "vs-internal-001"
412+
config_mock.configuration.byok_rag = [byok_rag_mock]
413+
config_mock.score_multiplier_mapping = {"vs-internal-001": 1.0}
414+
config_mock.rag_id_mapping = {"vs-internal-001": "my-kb"}
415+
mocker.patch("utils.vector_search.configuration", config_mock)
416+
417+
chunk_mock = mocker.Mock()
418+
chunk_mock.content = "Test content"
419+
chunk_mock.chunk_id = "chunk_1"
420+
chunk_mock.metadata = {"document_id": "doc_1"}
421+
422+
search_response = mocker.Mock()
423+
search_response.chunks = [chunk_mock]
424+
search_response.scores = [0.9]
425+
426+
client_mock = mocker.AsyncMock()
427+
client_mock.vector_io.query.return_value = search_response
428+
429+
# Pass user-facing rag_id "my-kb"
430+
await _fetch_byok_rag(client_mock, "test query", vector_store_ids=["my-kb"])
431+
432+
# Must be called with the internal llama-stack ID, not the user-facing "my-kb"
433+
client_mock.vector_io.query.assert_called_once_with(
434+
vector_store_id="vs-internal-001",
435+
query="test query",
436+
params={"max_chunks": constants.BYOK_RAG_MAX_CHUNKS, "mode": "vector"},
437+
)
438+
439+
@pytest.mark.asyncio
440+
async def test_multiple_user_facing_ids_each_translated( # type: ignore[no-untyped-def]
441+
self, mocker
442+
) -> None:
443+
"""Test that multiple user-facing rag_ids are each translated to their vector_store_id."""
444+
config_mock = mocker.Mock(spec=AppConfig)
445+
byok_rag_1 = mocker.Mock()
446+
byok_rag_1.rag_id = "kb-part1"
447+
byok_rag_1.vector_db_id = "vs-aaa-111"
448+
byok_rag_2 = mocker.Mock()
449+
byok_rag_2.rag_id = "kb-part2"
450+
byok_rag_2.vector_db_id = "vs-bbb-222"
451+
config_mock.configuration.byok_rag = [byok_rag_1, byok_rag_2]
452+
config_mock.score_multiplier_mapping = {"vs-aaa-111": 1.0, "vs-bbb-222": 1.0}
453+
config_mock.rag_id_mapping = {
454+
"vs-aaa-111": "kb-part1",
455+
"vs-bbb-222": "kb-part2",
456+
}
457+
mocker.patch("utils.vector_search.configuration", config_mock)
458+
459+
chunk_mock = mocker.Mock()
460+
chunk_mock.content = "Content"
461+
chunk_mock.chunk_id = "chunk_1"
462+
chunk_mock.metadata = {}
463+
464+
search_response = mocker.Mock()
465+
search_response.chunks = [chunk_mock]
466+
search_response.scores = [0.8]
467+
468+
client_mock = mocker.AsyncMock()
469+
client_mock.vector_io.query.return_value = search_response
470+
471+
# Pass two user-facing rag_ids
472+
await _fetch_byok_rag(
473+
client_mock, "test query", vector_store_ids=["kb-part1", "kb-part2"]
474+
)
475+
476+
# Each call must use the internal ID, not the user-facing name
477+
call_args = [
478+
call.kwargs["vector_store_id"]
479+
for call in client_mock.vector_io.query.call_args_list
480+
]
481+
assert "vs-aaa-111" in call_args
482+
assert "vs-bbb-222" in call_args
483+
assert "kb-part1" not in call_args
484+
assert "kb-part2" not in call_args
485+
403486

404487
class TestFetchSolrRag:
405488
"""Tests for _fetch_solr_rag async function."""

0 commit comments

Comments
 (0)