Skip to content

Commit b2f54cf

Browse files
authored
Merge pull request #1326 from are-ces/fix-default-rag-tool
LCORE-1473: Tool RAG is always enabled
2 parents 3350016 + c7704b1 commit b2f54cf

7 files changed

Lines changed: 158 additions & 36 deletions

File tree

docs/byok_guide.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@ rag:
307307
- okp # include OKP context inline
308308
309309
# Tool RAG: the LLM can call file_search to retrieve context on demand
310-
# Omit to use all registered BYOK stores (backward compatibility)
310+
# If omitted, tool RAG is disabled. If both tool and inline are omitted, all registered stores are used as fallback
311311
tool:
312312
- my-docs # expose this BYOK store as the file_search tool
313313
- okp # expose OKP as the file_search tool

docs/openapi.json

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11033,25 +11033,18 @@
1103311033
"description": "RAG IDs whose sources are injected as context before the LLM call. Use 'okp' to enable OKP inline RAG. Empty by default (no inline RAG)."
1103411034
},
1103511035
"tool": {
11036-
"anyOf": [
11037-
{
11038-
"items": {
11039-
"type": "string"
11040-
},
11041-
"type": "array"
11042-
},
11043-
{
11044-
"type": "null"
11045-
}
11046-
],
11036+
"items": {
11037+
"type": "string"
11038+
},
11039+
"type": "array",
1104711040
"title": "Tool RAG IDs",
1104811041
"description": "RAG IDs made available to the LLM as a file_search tool. Use 'okp' to include the OKP vector store. When omitted, all registered BYOK vector stores are used (backward compatibility)."
1104911042
}
1105011043
},
1105111044
"additionalProperties": false,
1105211045
"type": "object",
1105311046
"title": "RagConfiguration",
11054-
"description": "RAG strategy configuration.\n\nControls which RAG sources are used for inline and tool-based retrieval.\n\nEach strategy lists RAG IDs to include. The special ID ``\"okp\"`` defined in constants,\nactivates the OKP provider; all other IDs refer to entries in ``byok_rag``.\n\nBackward compatibility:\n - ``inline`` defaults to ``[]`` (no inline RAG).\n - ``tool`` defaults to ``None`` which means all registered vector stores\n are used (identical to the previous ``tool.byok.enabled = True`` default)."
11047+
"description": "RAG strategy configuration.\n\nControls which RAG sources are used for inline and tool-based retrieval.\n\nEach strategy lists RAG IDs to include. The special ID ``\"okp\"`` defined in constants,\nactivates the OKP provider; all other IDs refer to entries in ``byok_rag``.\n\nBackward compatibility:\n - ``inline`` defaults to ``[]`` (no inline RAG).\n - ``tool`` defaults to ``[]`` (no tool RAG).\n\nIf no RAG strategy is defined (inline and tool are empty),\nthe RAG tool will register all stores available to llama-stack."
1105511048
},
1105611049
"ReadinessResponse": {
1105711050
"properties": {

src/models/config.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
from enum import Enum
77
from functools import cached_property
88
from pathlib import Path
9-
from typing import Any, Optional, Literal, Self
109
from re import Pattern
10+
from typing import Any, Literal, Optional, Self
1111

1212
import jsonpath_ng
1313
import yaml
@@ -1704,8 +1704,10 @@ class RagConfiguration(ConfigurationBase):
17041704
17051705
Backward compatibility:
17061706
- ``inline`` defaults to ``[]`` (no inline RAG).
1707-
- ``tool`` defaults to ``None`` which means all registered vector stores
1708-
are used (identical to the previous ``tool.byok.enabled = True`` default).
1707+
- ``tool`` defaults to ``[]`` (no tool RAG).
1708+
1709+
If no RAG strategy is defined (inline and tool are empty),
1710+
the RAG tool will register all stores available to llama-stack.
17091711
"""
17101712

17111713
inline: list[str] = Field(
@@ -1715,8 +1717,8 @@ class RagConfiguration(ConfigurationBase):
17151717
f"Use '{constants.OKP_RAG_ID}' to enable OKP inline RAG. Empty by default (no inline RAG).",
17161718
)
17171719

1718-
tool: Optional[list[str]] = Field(
1719-
default=None,
1720+
tool: list[str] = Field(
1721+
default_factory=list,
17201722
title="Tool RAG IDs",
17211723
description="RAG IDs made available to the LLM as a file_search tool. "
17221724
f"Use '{constants.OKP_RAG_ID}' to include the OKP vector store. "

src/utils/responses.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -174,18 +174,26 @@ async def prepare_tools( # pylint: disable=too-many-arguments,too-many-position
174174
return None
175175

176176
toolgroups: list[InputTool] = []
177-
178-
# Priority: per-request IDs > rag.tool config > all registered stores.
179-
# In all cases, customer-facing rag_ids are translated to internal vector_db_ids.
180-
# IDs fetched from llama-stack are already internal and need no translation.
177+
effective_ids: list[str] = []
178+
179+
# Vector store ID resolution priority:
180+
# 1. Per-request IDs: highest prio; customer-facing rag_ids are translated to vector_db_ids.
181+
# 2. rag.tool config IDs: used when no per-request IDs provided, and rag.tool is configured.
182+
# If rag.inline is configured, but not rag.tool, tool RAG is disabled.
183+
# 3. All registered vector DBs: fallback when neither rag.tool nor rag.inline are configured.
184+
# IDs fetched from llama-stack are already internal and need no translation.
181185
byok_rags = configuration.configuration.byok_rag
186+
187+
is_tool_rag_enabled = len(configuration.configuration.rag.tool) > 0
188+
is_inline_rag_enabled = len(configuration.configuration.rag.inline) > 0
189+
182190
if vector_store_ids is not None:
183-
effective_ids: list[str] = resolve_vector_store_ids(vector_store_ids, byok_rags)
184-
elif configuration.configuration.rag.tool is not None:
191+
effective_ids = resolve_vector_store_ids(vector_store_ids, byok_rags)
192+
elif is_tool_rag_enabled:
185193
effective_ids = resolve_vector_store_ids(
186194
configuration.configuration.rag.tool, byok_rags
187195
)
188-
else:
196+
elif not is_inline_rag_enabled:
189197
effective_ids = await get_vector_store_ids(client, None)
190198

191199
# Add RAG tools if vector stores are available

tests/unit/models/config/test_dump_configuration.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ def test_dump_configuration(tmp_path: Path) -> None:
208208
"azure_entra_id": None,
209209
"rag": {
210210
"inline": [],
211-
"tool": None,
211+
"tool": [],
212212
},
213213
"okp": {
214214
"offline": True,
@@ -559,7 +559,7 @@ def test_dump_configuration_with_quota_limiters(tmp_path: Path) -> None:
559559
"azure_entra_id": None,
560560
"rag": {
561561
"inline": [],
562-
"tool": None,
562+
"tool": [],
563563
},
564564
"okp": {
565565
"offline": True,
@@ -788,7 +788,7 @@ def test_dump_configuration_with_quota_limiters_different_values(
788788
"azure_entra_id": None,
789789
"rag": {
790790
"inline": [],
791-
"tool": None,
791+
"tool": [],
792792
},
793793
"okp": {
794794
"offline": True,
@@ -992,7 +992,7 @@ def test_dump_configuration_byok(tmp_path: Path) -> None:
992992
"azure_entra_id": None,
993993
"rag": {
994994
"inline": [],
995-
"tool": None,
995+
"tool": [],
996996
},
997997
"okp": {
998998
"offline": True,
@@ -1181,7 +1181,7 @@ def test_dump_configuration_pg_namespace(tmp_path: Path) -> None:
11811181
"azure_entra_id": None,
11821182
"rag": {
11831183
"inline": [],
1184-
"tool": None,
1184+
"tool": [],
11851185
},
11861186
"okp": {
11871187
"offline": True,

tests/unit/models/config/test_rag_configuration.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,13 @@ def test_default_values(self) -> None:
1717
"""Test that RagConfiguration has correct default values."""
1818
config = RagConfiguration()
1919
assert config.inline == []
20-
assert config.tool is None
20+
assert config.tool == []
2121

2222
def test_inline_with_byok_ids(self) -> None:
2323
"""Test inline list with BYOK rag IDs."""
2424
config = RagConfiguration(inline=["store-1", "store-2"])
2525
assert config.inline == ["store-1", "store-2"]
26-
assert config.tool is None
26+
assert config.tool == []
2727

2828
def test_inline_with_okp_rag(self) -> None:
2929
"""Test inline list including the special OKP ID."""
@@ -45,10 +45,10 @@ def test_tool_empty_list(self) -> None:
4545
config = RagConfiguration(tool=[])
4646
assert config.tool == []
4747

48-
def test_tool_none_means_all_stores(self) -> None:
49-
"""Test that tool=None (default) means all registered stores are used."""
48+
def test_tool_default_is_empty_list(self) -> None:
49+
"""Test that tool defaults to an empty list."""
5050
config = RagConfiguration()
51-
assert config.tool is None
51+
assert config.tool == []
5252

5353
def test_no_unknown_fields_allowed(self) -> None:
5454
"""Test that RagConfiguration rejects unknown fields."""

tests/unit/utils/test_responses.py

Lines changed: 120 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1026,6 +1026,8 @@ async def test_translates_byok_ids_in_prepare_tools(
10261026
mock_byok_rag.vector_db_id = "vs-001"
10271027
mock_config = mocker.Mock()
10281028
mock_config.configuration.byok_rag = [mock_byok_rag]
1029+
mock_config.configuration.rag.tool = []
1030+
mock_config.configuration.rag.inline = []
10291031
mocker.patch("utils.responses.configuration", mock_config)
10301032

10311033
result = await prepare_tools(mock_client, ["ocp_docs"], False, "token")
@@ -1045,6 +1047,8 @@ async def test_passes_through_unknown_ids_in_prepare_tools(
10451047
# Configure empty BYOK RAG
10461048
mock_config = mocker.Mock()
10471049
mock_config.configuration.byok_rag = []
1050+
mock_config.configuration.rag.tool = []
1051+
mock_config.configuration.rag.inline = []
10481052
mocker.patch("utils.responses.configuration", mock_config)
10491053

10501054
result = await prepare_tools(mock_client, ["raw-internal-id"], False, "token")
@@ -1073,7 +1077,8 @@ async def test_does_not_translate_when_ids_fetched_from_llama_stack(
10731077
mock_byok_rag.vector_db_id = "vs-translated"
10741078
mock_config = mocker.Mock()
10751079
mock_config.configuration.byok_rag = [mock_byok_rag]
1076-
mock_config.configuration.rag.tool = None
1080+
mock_config.configuration.rag.tool = []
1081+
mock_config.configuration.rag.inline = []
10771082
mocker.patch("utils.responses.configuration", mock_config)
10781083

10791084
result = await prepare_tools(mock_client, None, False, "token")
@@ -1082,6 +1087,120 @@ async def test_does_not_translate_when_ids_fetched_from_llama_stack(
10821087
assert result[0].vector_store_ids == ["vs-internal"]
10831088

10841089

1090+
class TestPrepareToolsVectorStoreResolution:
1091+
"""Tests for vector store ID resolution priority in prepare_tools."""
1092+
1093+
@pytest.mark.asyncio
1094+
async def test_uses_rag_tool_config_when_no_per_request_ids(
1095+
self, mocker: MockerFixture
1096+
) -> None:
1097+
"""Test that rag.tool config IDs are used when no per-request IDs are provided."""
1098+
mock_client = mocker.AsyncMock()
1099+
mocker.patch("utils.responses.get_mcp_tools", return_value=None)
1100+
1101+
mock_config = mocker.Mock()
1102+
mock_config.configuration.byok_rag = []
1103+
mock_config.configuration.rag.tool = ["rag-tool-id-1", "rag-tool-id-2"]
1104+
mock_config.configuration.rag.inline = []
1105+
mocker.patch("utils.responses.configuration", mock_config)
1106+
1107+
result = await prepare_tools(mock_client, None, False, "token")
1108+
1109+
assert result is not None
1110+
assert len(result) == 1
1111+
assert result[0].type == "file_search"
1112+
assert result[0].vector_store_ids == ["rag-tool-id-1", "rag-tool-id-2"]
1113+
mock_client.vector_stores.list.assert_not_called()
1114+
1115+
@pytest.mark.asyncio
1116+
async def test_rag_tool_config_ids_are_translated(
1117+
self, mocker: MockerFixture
1118+
) -> None:
1119+
"""Test that rag.tool config IDs are translated from rag_ids to vector_db_ids."""
1120+
mock_client = mocker.AsyncMock()
1121+
mocker.patch("utils.responses.get_mcp_tools", return_value=None)
1122+
1123+
mock_byok_rag = mocker.Mock()
1124+
mock_byok_rag.rag_id = "ocp_docs"
1125+
mock_byok_rag.vector_db_id = "vs-001"
1126+
mock_config = mocker.Mock()
1127+
mock_config.configuration.byok_rag = [mock_byok_rag]
1128+
mock_config.configuration.rag.tool = ["ocp_docs"]
1129+
mock_config.configuration.rag.inline = []
1130+
mocker.patch("utils.responses.configuration", mock_config)
1131+
1132+
result = await prepare_tools(mock_client, None, False, "token")
1133+
1134+
assert result is not None
1135+
assert result[0].vector_store_ids == ["vs-001"]
1136+
mock_client.vector_stores.list.assert_not_called()
1137+
1138+
@pytest.mark.asyncio
1139+
async def test_inline_rag_disables_tool_rag(self, mocker: MockerFixture) -> None:
1140+
"""Test that configuring rag.inline without rag.tool disables tool RAG."""
1141+
mock_client = mocker.AsyncMock()
1142+
mocker.patch("utils.responses.get_mcp_tools", return_value=None)
1143+
1144+
mock_config = mocker.Mock()
1145+
mock_config.configuration.byok_rag = []
1146+
mock_config.configuration.rag.tool = []
1147+
mock_config.configuration.rag.inline = [
1148+
"inline-store-id"
1149+
] # inline is configured
1150+
mocker.patch("utils.responses.configuration", mock_config)
1151+
1152+
result = await prepare_tools(mock_client, None, False, "token")
1153+
1154+
# Tool RAG should be disabled — no RAG tool in result, no llama-stack fetch
1155+
assert result is None
1156+
mock_client.vector_stores.list.assert_not_called()
1157+
1158+
@pytest.mark.asyncio
1159+
async def test_per_request_ids_override_rag_tool_config(
1160+
self, mocker: MockerFixture
1161+
) -> None:
1162+
"""Test that per-request vector_store_ids take priority over rag.tool config."""
1163+
mock_client = mocker.AsyncMock()
1164+
mocker.patch("utils.responses.get_mcp_tools", return_value=None)
1165+
1166+
mock_config = mocker.Mock()
1167+
mock_config.configuration.byok_rag = []
1168+
mock_config.configuration.rag.tool = ["config-id-1"]
1169+
mock_config.configuration.rag.inline = []
1170+
mocker.patch("utils.responses.configuration", mock_config)
1171+
1172+
result = await prepare_tools(mock_client, ["request-id-1"], False, "token")
1173+
1174+
assert result is not None
1175+
assert result[0].vector_store_ids == ["request-id-1"]
1176+
mock_client.vector_stores.list.assert_not_called()
1177+
1178+
@pytest.mark.asyncio
1179+
async def test_all_registered_dbs_used_when_neither_tool_nor_inline_configured(
1180+
self, mocker: MockerFixture
1181+
) -> None:
1182+
"""Test fallback to all registered vector DBs when neither rag.tool nor rag.inline are set."""
1183+
mock_client = mocker.AsyncMock()
1184+
mock_vs = mocker.Mock()
1185+
mock_vs.id = "vs-registered"
1186+
mock_list = mocker.Mock()
1187+
mock_list.data = [mock_vs]
1188+
mock_client.vector_stores.list = mocker.AsyncMock(return_value=mock_list)
1189+
mocker.patch("utils.responses.get_mcp_tools", return_value=None)
1190+
1191+
mock_config = mocker.Mock()
1192+
mock_config.configuration.byok_rag = []
1193+
mock_config.configuration.rag.tool = []
1194+
mock_config.configuration.rag.inline = []
1195+
mocker.patch("utils.responses.configuration", mock_config)
1196+
1197+
result = await prepare_tools(mock_client, None, False, "token")
1198+
1199+
assert result is not None
1200+
assert result[0].vector_store_ids == ["vs-registered"]
1201+
mock_client.vector_stores.list.assert_called_once()
1202+
1203+
10851204
class TestPrepareResponsesParams:
10861205
"""Tests for prepare_responses_params function."""
10871206

0 commit comments

Comments
 (0)