Skip to content

Commit 44afc1f

Browse files
authored
Merge pull request #1407 from are-ces/LCORE-1500-inline-rag-config-filtering
LCORE-1500: Fix inline RAG triggered by unconfigured vector_store_ids
2 parents f4b8006 + 9a74b0b commit 44afc1f

4 files changed

Lines changed: 72 additions & 37 deletions

File tree

src/utils/vector_search.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -352,12 +352,17 @@ async def _fetch_byok_rag(
352352
referenced_documents: list[ReferencedDocument] = []
353353

354354
# Determine which BYOK vector stores to query for inline RAG.
355-
# Per-request override takes precedence; otherwise use config-based inline list.
356-
rag_ids_to_query = (
357-
configuration.configuration.rag.inline
358-
if vector_store_ids is None
359-
else vector_store_ids
360-
)
355+
# Config is the source of truth: only rag_ids registered in rag.inline are eligible.
356+
# Per-request IDs are intersected with the config to prevent triggering inline RAG
357+
# for stores not explicitly configured for inline use.
358+
if vector_store_ids is None:
359+
rag_ids_to_query = configuration.configuration.rag.inline
360+
else:
361+
rag_ids_to_query = [
362+
v
363+
for v in vector_store_ids
364+
if v in set(configuration.configuration.rag.inline)
365+
]
361366

362367
# Translate user-facing rag_ids to llama-stack ids
363368
vector_store_ids_to_query: list[str] = resolve_vector_store_ids(

tests/integration/endpoints/test_query_byok_integration.py

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -404,16 +404,14 @@ async def test_query_byok_inline_rag_with_request_vector_store_ids(
404404
test_request: Request,
405405
test_auth: AuthTuple,
406406
) -> None:
407-
"""Test that per-request vector_store_ids override config-based inline RAG.
407+
"""Test that per-request vector_store_ids not in rag.inline are filtered out.
408408
409409
Config has rag.inline = ["source-a"] (resolves to vs-source-a).
410-
Request passes vector_store_ids = ["vs-source-b"].
411-
Only vs-source-b should be queried, proving the override works.
412-
(passing vector_store_ids overrides config)
410+
Request passes vector_store_ids = ["source-b"] which is NOT in rag.inline.
411+
No inline RAG should be triggered because config is the source of truth.
413412
414413
Verifies:
415-
- vector_io.query is called with the request-specified store, not config
416-
- The config-based store is NOT queried
414+
- vector_io.query is NOT called (source-b is not in rag.inline)
417415
"""
418416
entry_a = mocker.MagicMock()
419417
entry_a.rag_id = "source-a"
@@ -441,10 +439,10 @@ async def test_query_byok_inline_rag_with_request_vector_store_ids(
441439

442440
mock_holder_class.return_value.get_client.return_value = mock_client
443441

444-
# Override: request specifies vs-source-b, not the config's vs-source-a
442+
# Request specifies source-b which is NOT in rag.inline config
445443
query_request = QueryRequest(
446444
query="What is OpenShift?",
447-
vector_store_ids=["vs-source-b"],
445+
vector_store_ids=["source-b"],
448446
)
449447

450448
await query_endpoint_handler(
@@ -454,12 +452,8 @@ async def test_query_byok_inline_rag_with_request_vector_store_ids(
454452
mcp_headers={},
455453
)
456454

457-
# Verify only vs-source-b was queried (not the config's vs-source-a)
458-
assert mock_client.vector_io.query.call_count == 1
459-
# call_args.kwargs holds the keyword arguments of the most recent call to vector_io.query.
460-
# e.g. "vector_store_id" is the store queried, "query" is the search text.
461-
call_kwargs = mock_client.vector_io.query.call_args.kwargs
462-
assert call_kwargs["vector_store_id"] == "vs-source-b"
455+
# source-b is not in rag.inline, so no inline RAG should be triggered
456+
assert mock_client.vector_io.query.call_count == 0
463457

464458

465459
@pytest.mark.asyncio
@@ -506,10 +500,10 @@ async def test_query_byok_request_vector_store_ids_filters_configured_stores(
506500

507501
mock_holder_class.return_value.get_client.return_value = mock_client
508502

509-
# Request narrows down to only vs-source-a
503+
# Request narrows down to only source-a (using rag_id, not vector_db_id)
510504
query_request = QueryRequest(
511505
query="What is OpenShift?",
512-
vector_store_ids=["vs-source-a"],
506+
vector_store_ids=["source-a"],
513507
)
514508

515509
response = await query_endpoint_handler(

tests/integration/endpoints/test_streaming_query_byok_integration.py

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -324,16 +324,14 @@ async def test_streaming_query_byok_inline_rag_with_request_vector_store_ids(
324324
test_request: Request,
325325
test_auth: AuthTuple,
326326
) -> None:
327-
"""Test that per-request vector_store_ids override config for streaming query.
327+
"""Test that per-request vector_store_ids not in rag.inline are filtered out.
328328
329329
Config has rag.inline = ["source-a"] (resolves to vs-source-a).
330-
Request passes vector_store_ids = ["vs-source-b"].
331-
Only vs-source-b should be queried, proving the override works.
332-
(passing vector_store_ids overrides config)
330+
Request passes vector_store_ids = ["source-b"] which is NOT in rag.inline.
331+
No inline RAG should be triggered because config is the source of truth.
333332
334333
Verifies:
335-
- vector_io.query is called with the request-specified store, not config
336-
- The config-based store is NOT queried
334+
- vector_io.query is NOT called (source-b is not in rag.inline)
337335
"""
338336
entry_a = mocker.MagicMock()
339337
entry_a.rag_id = "source-a"
@@ -363,10 +361,10 @@ async def test_streaming_query_byok_inline_rag_with_request_vector_store_ids(
363361

364362
mock_holder_class.return_value.get_client.return_value = mock_client
365363

366-
# Override: request specifies vs-source-b, not the config's vs-source-a
364+
# Request specifies source-b which is NOT in rag.inline config
367365
query_request = QueryRequest(
368366
query="What is OpenShift?",
369-
vector_store_ids=["vs-source-b"],
367+
vector_store_ids=["source-b"],
370368
)
371369

372370
response = await streaming_query_endpoint_handler(
@@ -378,12 +376,8 @@ async def test_streaming_query_byok_inline_rag_with_request_vector_store_ids(
378376

379377
assert isinstance(response, StreamingResponse)
380378

381-
# Verify only vs-source-b was queried (not the config's vs-source-a)
382-
assert mock_client.vector_io.query.call_count == 1
383-
# call_args.kwargs holds the keyword arguments of the most recent call to vector_io.query.
384-
# e.g. "vector_store_id" is the store queried, "query" is the search text.
385-
call_kwargs = mock_client.vector_io.query.call_args.kwargs
386-
assert call_kwargs["vector_store_id"] == "vs-source-b"
379+
# source-b is not in rag.inline, so no inline RAG should be triggered
380+
assert mock_client.vector_io.query.call_count == 0
387381

388382

389383
@pytest.mark.asyncio
@@ -433,7 +427,7 @@ async def test_streaming_query_byok_request_vector_store_ids_filters_configured_
433427

434428
query_request = QueryRequest(
435429
query="What is OpenShift?",
436-
vector_store_ids=["vs-source-a"],
430+
vector_store_ids=["source-a"],
437431
)
438432

439433
response = await streaming_query_endpoint_handler(

tests/unit/utils/test_vector_search.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,7 @@ async def test_user_facing_ids_translated_to_internal_ids(
440440
byok_rag_mock.rag_id = "my-kb"
441441
byok_rag_mock.vector_db_id = "vs-internal-001"
442442
config_mock.configuration.byok_rag = [byok_rag_mock]
443+
config_mock.configuration.rag.inline = ["my-kb"]
443444
config_mock.score_multiplier_mapping = {"vs-internal-001": 1.0}
444445
config_mock.rag_id_mapping = {"vs-internal-001": "my-kb"}
445446
mocker.patch("utils.vector_search.configuration", config_mock)
@@ -479,6 +480,7 @@ async def test_multiple_user_facing_ids_each_translated(
479480
byok_rag_2.rag_id = "kb-part2"
480481
byok_rag_2.vector_db_id = "vs-bbb-222"
481482
config_mock.configuration.byok_rag = [byok_rag_1, byok_rag_2]
483+
config_mock.configuration.rag.inline = ["kb-part1", "kb-part2"]
482484
config_mock.score_multiplier_mapping = {"vs-aaa-111": 1.0, "vs-bbb-222": 1.0}
483485
config_mock.rag_id_mapping = {
484486
"vs-aaa-111": "kb-part1",
@@ -513,6 +515,46 @@ async def test_multiple_user_facing_ids_each_translated(
513515
assert "kb-part1" not in call_args
514516
assert "kb-part2" not in call_args
515517

518+
@pytest.mark.asyncio
519+
async def test_no_inline_rag_configured_skips_byok(
520+
self, mocker: MockerFixture
521+
) -> None:
522+
"""Test that BYOK inline RAG is skipped when rag.inline is empty."""
523+
config_mock = mocker.Mock(spec=AppConfig)
524+
config_mock.configuration.rag.inline = []
525+
config_mock.configuration.byok_rag = []
526+
mocker.patch("utils.vector_search.configuration", config_mock)
527+
528+
client_mock = mocker.AsyncMock()
529+
530+
rag_chunks, referenced_docs = await _fetch_byok_rag(
531+
client_mock, "test query", vector_store_ids=["some-id"]
532+
)
533+
534+
assert rag_chunks == []
535+
assert referenced_docs == []
536+
client_mock.vector_io.query.assert_not_called()
537+
538+
@pytest.mark.asyncio
539+
async def test_request_id_not_in_inline_config_skips_byok(
540+
self, mocker: MockerFixture
541+
) -> None:
542+
"""Test that a request vector_store_id not registered in rag.inline is filtered out."""
543+
config_mock = mocker.Mock(spec=AppConfig)
544+
config_mock.configuration.rag.inline = ["registered-id"]
545+
config_mock.configuration.byok_rag = []
546+
mocker.patch("utils.vector_search.configuration", config_mock)
547+
548+
client_mock = mocker.AsyncMock()
549+
550+
rag_chunks, referenced_docs = await _fetch_byok_rag(
551+
client_mock, "test query", vector_store_ids=["unregistered-id"]
552+
)
553+
554+
assert rag_chunks == []
555+
assert referenced_docs == []
556+
client_mock.vector_io.query.assert_not_called()
557+
516558

517559
class TestFetchSolrRag:
518560
"""Tests for _fetch_solr_rag async function."""

0 commit comments

Comments
 (0)