Skip to content

Commit 21d6bc5

Browse files
authored
Merge pull request #1998 from arin-deloatch/bug/LCORE-1613
LCORE-1613: RAG tool calls loop indefinitely
2 parents f9096d7 + 59edfd1 commit 21d6bc5

7 files changed

Lines changed: 157 additions & 9 deletions

File tree

docs/openapi.json

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13614,6 +13614,34 @@
1361413614
"type": "array",
1361513615
"title": "High-level inference providers",
1361613616
"description": "Unified-mode synthesis input (Decision S5): a high-level, backend-agnostic list of inference providers the synthesizer expands into Llama Stack provider entries. Lives at the configuration root so it survives a future backend change. A non-empty list signals unified mode. Empty (the default) leaves legacy/remote modes unaffected. The sibling default_model / default_provider keep their query-time routing meaning and are independent of this list."
13617+
},
13618+
"max_infer_iters": {
13619+
"anyOf": [
13620+
{
13621+
"type": "integer",
13622+
"exclusiveMinimum": 0.0
13623+
},
13624+
{
13625+
"type": "null"
13626+
}
13627+
],
13628+
"title": "Default max inference iterations",
13629+
"description": "Server-side default for the maximum number of inference iterations a model can perform in a single request. Prevents small models from looping indefinitely on tool calls. Per-request values take precedence over this default. Set to None to disable the limit.",
13630+
"default": 10
13631+
},
13632+
"max_tool_calls": {
13633+
"anyOf": [
13634+
{
13635+
"type": "integer",
13636+
"exclusiveMinimum": 0.0
13637+
},
13638+
{
13639+
"type": "null"
13640+
}
13641+
],
13642+
"title": "Default max tool calls",
13643+
"description": "Server-side default for the maximum number of tool calls allowed in a single response. Prevents small models from exhausting the context window with repeated tool calls. Per-request values take precedence over this default. Set to None to disable the limit.",
13644+
"default": 30
1361713645
}
1361813646
},
1361913647
"additionalProperties": false,

src/app/endpoints/responses.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,11 @@ async def responses_endpoint_handler(
464464
original_request.input, inline_rag_context.context_text
465465
)
466466

467+
if "max_infer_iters" not in original_request.model_fields_set:
468+
updated_request.max_infer_iters = configuration.inference.max_infer_iters
469+
if "max_tool_calls" not in original_request.model_fields_set:
470+
updated_request.max_tool_calls = configuration.inference.max_tool_calls
471+
467472
api_params = ResponsesApiParams.model_validate(updated_request.model_dump())
468473

469474
# Compact the conversation if it is approaching the context window limit.

src/models/config.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1729,6 +1729,26 @@ class InferenceConfiguration(ConfigurationBase):
17291729
"meaning and are independent of this list.",
17301730
)
17311731

1732+
max_infer_iters: Optional[PositiveInt] = Field(
1733+
default=10,
1734+
title="Default max inference iterations",
1735+
description="Server-side default for the maximum number of inference "
1736+
"iterations a model can perform in a single request. Prevents small "
1737+
"models from looping indefinitely on tool calls. "
1738+
"Per-request values take precedence over this default. "
1739+
"Set to None to disable the limit.",
1740+
)
1741+
1742+
max_tool_calls: Optional[PositiveInt] = Field(
1743+
default=30,
1744+
title="Default max tool calls",
1745+
description="Server-side default for the maximum number of tool calls "
1746+
"allowed in a single response. Prevents small models from exhausting "
1747+
"the context window with repeated tool calls. "
1748+
"Per-request values take precedence over this default. "
1749+
"Set to None to disable the limit.",
1750+
)
1751+
17321752
@model_validator(mode="after")
17331753
def check_default_model_and_provider(self) -> Self:
17341754
"""

src/utils/responses.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,8 @@ async def prepare_responses_params( # pylint: disable=too-many-arguments,too-ma
429429
stream=stream,
430430
store=store,
431431
extra_headers=extra_headers,
432+
max_infer_iters=configuration.inference.max_infer_iters,
433+
max_tool_calls=configuration.inference.max_tool_calls,
432434
)
433435

434436

tests/unit/models/config/test_dump_configuration.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,8 @@ def test_dump_configuration_minimal_cfg(tmp_path: Path) -> None:
163163
"default_model": None,
164164
"context_windows": {},
165165
"providers": [],
166+
"max_infer_iters": 10,
167+
"max_tool_calls": 30,
166168
},
167169
"database": {
168170
"sqlite": {
@@ -375,6 +377,8 @@ def test_dump_configuration_valid_values(tmp_path: Path) -> None:
375377
"default_model": "default_model",
376378
"context_windows": {},
377379
"providers": [],
380+
"max_infer_iters": 10,
381+
"max_tool_calls": 30,
378382
},
379383
"database": {
380384
"sqlite": None,
@@ -731,6 +735,8 @@ def test_dump_configuration_with_quota_limiters(tmp_path: Path) -> None:
731735
"default_model": "default_model",
732736
"context_windows": {},
733737
"providers": [],
738+
"max_infer_iters": 10,
739+
"max_tool_calls": 30,
734740
},
735741
"database": {
736742
"sqlite": None,
@@ -986,6 +992,8 @@ def test_dump_configuration_with_quota_limiters_different_values(
986992
"default_model": "default_model",
987993
"context_windows": {},
988994
"providers": [],
995+
"max_infer_iters": 10,
996+
"max_tool_calls": 30,
989997
},
990998
"database": {
991999
"sqlite": None,
@@ -1221,6 +1229,8 @@ def test_dump_configuration_byok(tmp_path: Path) -> None:
12211229
"default_model": "default_model",
12221230
"context_windows": {},
12231231
"providers": [],
1232+
"max_infer_iters": 10,
1233+
"max_tool_calls": 30,
12241234
},
12251235
"database": {
12261236
"sqlite": None,
@@ -1451,6 +1461,8 @@ def test_dump_configuration_pg_namespace(tmp_path: Path) -> None:
14511461
"default_model": "default_model",
14521462
"context_windows": {},
14531463
"providers": [],
1464+
"max_infer_iters": 10,
1465+
"max_tool_calls": 30,
14541466
},
14551467
"database": {
14561468
"sqlite": None,
@@ -1826,6 +1838,8 @@ def test_dump_configuration_allow_degraded_mode(tmp_path: Path) -> None:
18261838
"default_model": "default_model",
18271839
"context_windows": {},
18281840
"providers": [],
1841+
"max_infer_iters": 10,
1842+
"max_tool_calls": 30,
18291843
},
18301844
"database": {
18311845
"sqlite": None,
@@ -2047,6 +2061,8 @@ def test_dump_configuration_max_retries_settings(tmp_path: Path) -> None:
20472061
"default_model": "default_model",
20482062
"context_windows": {},
20492063
"providers": [],
2064+
"max_infer_iters": 10,
2065+
"max_tool_calls": 30,
20502066
},
20512067
"database": {
20522068
"sqlite": None,
@@ -2268,6 +2284,8 @@ def test_dump_configuration_retry_count_settings(tmp_path: Path) -> None:
22682284
"default_model": "default_model",
22692285
"context_windows": {},
22702286
"providers": [],
2287+
"max_infer_iters": 10,
2288+
"max_tool_calls": 30,
22712289
},
22722290
"database": {
22732291
"sqlite": None,
@@ -2496,6 +2514,8 @@ def test_dump_configuration_specific_compaction_values(tmp_path: Path) -> None:
24962514
"default_model": "default_model",
24972515
"context_windows": {},
24982516
"providers": [],
2517+
"max_infer_iters": 10,
2518+
"max_tool_calls": 30,
24992519
},
25002520
"database": {
25012521
"sqlite": None,

tests/unit/models/config/test_inference_configuration.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,3 +90,71 @@ def test_context_windows_rejects_negative_size() -> None:
9090
InferenceConfiguration(
9191
context_windows={"openai/gpt-4o-mini": -1},
9292
) # pyright: ignore[reportCallIssue]
93+
94+
95+
def test_max_infer_iters_default() -> None:
96+
"""Test that max_infer_iters defaults to 10."""
97+
config = InferenceConfiguration() # pyright: ignore[reportCallIssue]
98+
assert config.max_infer_iters == 10
99+
100+
101+
def test_max_tool_calls_default() -> None:
102+
"""Test that max_tool_calls defaults to 30."""
103+
config = InferenceConfiguration() # pyright: ignore[reportCallIssue]
104+
assert config.max_tool_calls == 30
105+
106+
107+
def test_max_infer_iters_accepts_positive_int() -> None:
108+
"""Test that max_infer_iters accepts a positive integer."""
109+
config = InferenceConfiguration(
110+
max_infer_iters=5
111+
) # pyright: ignore[reportCallIssue]
112+
assert config.max_infer_iters == 5
113+
114+
115+
def test_max_tool_calls_accepts_positive_int() -> None:
116+
"""Test that max_tool_calls accepts a positive integer."""
117+
config = InferenceConfiguration(
118+
max_tool_calls=20
119+
) # pyright: ignore[reportCallIssue]
120+
assert config.max_tool_calls == 20
121+
122+
123+
def test_max_infer_iters_rejects_zero() -> None:
124+
"""Test that max_infer_iters rejects zero."""
125+
with pytest.raises(ValueError):
126+
InferenceConfiguration(max_infer_iters=0) # pyright: ignore[reportCallIssue]
127+
128+
129+
def test_max_infer_iters_rejects_negative() -> None:
130+
"""Test that max_infer_iters rejects a negative value."""
131+
with pytest.raises(ValueError):
132+
InferenceConfiguration(max_infer_iters=-1) # pyright: ignore[reportCallIssue]
133+
134+
135+
def test_max_tool_calls_rejects_zero() -> None:
136+
"""Test that max_tool_calls rejects zero."""
137+
with pytest.raises(ValueError):
138+
InferenceConfiguration(max_tool_calls=0) # pyright: ignore[reportCallIssue]
139+
140+
141+
def test_max_tool_calls_rejects_negative() -> None:
142+
"""Test that max_tool_calls rejects a negative value."""
143+
with pytest.raises(ValueError):
144+
InferenceConfiguration(max_tool_calls=-1) # pyright: ignore[reportCallIssue]
145+
146+
147+
def test_max_infer_iters_accepts_none() -> None:
148+
"""Test that max_infer_iters accepts None to disable the limit."""
149+
config = InferenceConfiguration(
150+
max_infer_iters=None
151+
) # pyright: ignore[reportCallIssue]
152+
assert config.max_infer_iters is None
153+
154+
155+
def test_max_tool_calls_accepts_none() -> None:
156+
"""Test that max_tool_calls accepts None to disable the limit."""
157+
config = InferenceConfiguration(
158+
max_tool_calls=None
159+
) # pyright: ignore[reportCallIssue]
160+
assert config.max_tool_calls is None

tests/unit/utils/test_responses.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,12 @@
5656
import constants
5757
from models.api.requests import QueryRequest
5858
from models.common.responses.types import InputTool, InputToolMCP
59-
from models.config import ApprovalFilter, ByokRag, ModelContextProtocolServer
59+
from models.config import (
60+
ApprovalFilter,
61+
ByokRag,
62+
InferenceConfiguration,
63+
ModelContextProtocolServer,
64+
)
6065
from utils.query import normalize_vertex_ai_model_id
6166
from utils.responses import (
6267
_build_chunk_attributes,
@@ -1976,7 +1981,7 @@ async def test_prepare_responses_params_with_conversation_id(
19761981
) # pyright: ignore[reportCallIssue]
19771982

19781983
mock_config = mocker.Mock()
1979-
mock_config.inference = None
1984+
mock_config.inference = InferenceConfiguration()
19801985
mocker.patch("utils.responses.configuration", mock_config)
19811986
mocker.patch("utils.responses.get_system_prompt", return_value="System prompt")
19821987
mocker.patch("utils.responses.prepare_tools", return_value=None)
@@ -2012,7 +2017,7 @@ async def test_prepare_responses_params_create_conversation(
20122017
query_request = QueryRequest(query="test") # pyright: ignore[reportCallIssue]
20132018

20142019
mock_config = mocker.Mock()
2015-
mock_config.inference = None
2020+
mock_config.inference = InferenceConfiguration()
20162021
mocker.patch("utils.responses.configuration", mock_config)
20172022
mocker.patch("utils.responses.get_system_prompt", return_value="System prompt")
20182023
mocker.patch("utils.responses.prepare_tools", return_value=None)
@@ -2038,7 +2043,7 @@ async def test_prepare_responses_params_connection_error_on_models(
20382043

20392044
query_request = QueryRequest(query="test") # pyright: ignore[reportCallIssue]
20402045
mock_config = mocker.Mock()
2041-
mock_config.inference = None
2046+
mock_config.inference = InferenceConfiguration()
20422047
mocker.patch("utils.responses.configuration", mock_config)
20432048

20442049
with pytest.raises(HTTPException) as exc_info:
@@ -2064,7 +2069,7 @@ async def test_prepare_responses_params_connection_error_on_conversation(
20642069
query_request = QueryRequest(query="test") # pyright: ignore[reportCallIssue]
20652070

20662071
mock_config = mocker.Mock()
2067-
mock_config.inference = None
2072+
mock_config.inference = InferenceConfiguration()
20682073
mocker.patch("utils.responses.configuration", mock_config)
20692074
mocker.patch("utils.responses.get_system_prompt", return_value="System prompt")
20702075
mocker.patch("utils.responses.prepare_tools", return_value=None)
@@ -2088,7 +2093,7 @@ async def test_prepare_responses_params_api_status_error_on_models(
20882093

20892094
query_request = QueryRequest(query="test") # pyright: ignore[reportCallIssue]
20902095
mock_config = mocker.Mock()
2091-
mock_config.inference = None
2096+
mock_config.inference = InferenceConfiguration()
20922097
mocker.patch("utils.responses.configuration", mock_config)
20932098

20942099
with pytest.raises(HTTPException) as exc_info:
@@ -2131,7 +2136,7 @@ async def test_prepare_responses_params_includes_mcp_provider_data_headers(
21312136
]
21322137

21332138
mock_config = mocker.Mock()
2134-
mock_config.inference = None
2139+
mock_config.inference = InferenceConfiguration()
21352140
mocker.patch("utils.responses.configuration", mock_config)
21362141
mocker.patch("utils.responses.get_system_prompt", return_value="System prompt")
21372142
mocker.patch(
@@ -2179,7 +2184,7 @@ async def test_prepare_responses_params_no_extra_headers_without_mcp_tools(
21792184
query_request = QueryRequest(query="test") # pyright: ignore[reportCallIssue]
21802185

21812186
mock_config = mocker.Mock()
2182-
mock_config.inference = None
2187+
mock_config.inference = InferenceConfiguration()
21832188
mocker.patch("utils.responses.configuration", mock_config)
21842189
mocker.patch("utils.responses.get_system_prompt", return_value="System prompt")
21852190
mocker.patch("utils.responses.prepare_tools", return_value=None)
@@ -2211,7 +2216,7 @@ async def test_prepare_responses_params_api_status_error_on_conversation(
22112216
query_request = QueryRequest(query="test") # pyright: ignore[reportCallIssue]
22122217

22132218
mock_config = mocker.Mock()
2214-
mock_config.inference = None
2219+
mock_config.inference = InferenceConfiguration()
22152220
mocker.patch("utils.responses.configuration", mock_config)
22162221
mocker.patch("utils.responses.get_system_prompt", return_value="System prompt")
22172222
mocker.patch("utils.responses.prepare_tools", return_value=None)

0 commit comments

Comments
 (0)