Skip to content

Commit 25d7d25

Browse files
committed
Added e2e tests for tool choices in responses endpoint
1 parent 09f03e9 commit 25d7d25

7 files changed

Lines changed: 792 additions & 55 deletions

File tree

src/app/endpoints/responses.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -234,13 +234,14 @@ async def responses_endpoint_handler(
234234
)
235235

236236
# Build RAG context from Inline RAG sources
237-
inline_rag_context = await build_rag_context(
238-
client,
239-
moderation_result.decision,
240-
input_text,
241-
vector_store_ids,
242-
responses_request.solr,
243-
)
237+
# inline_rag_context = await build_rag_context(
238+
# client,
239+
# moderation_result.decision,
240+
# input_text,
241+
# vector_store_ids,
242+
# responses_request.solr,
243+
# )
244+
inline_rag_context = RAGContext()
244245
if moderation_result.decision == "passed":
245246
responses_request.input = append_inline_rag_context_to_responses_input(
246247
responses_request.input, inline_rag_context.context_text
@@ -654,6 +655,7 @@ async def handle_non_streaming_response(
654655
)
655656
else:
656657
try:
658+
print("API Params: ", api_params.model_dump(exclude_none=True))
657659
api_response = cast(
658660
OpenAIResponseObject,
659661
await client.responses.create(

src/app/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,5 +229,5 @@ async def send_wrapper(message: Message) -> None:
229229
# RestApiMetricsMiddleware (registered last) is outermost. This ensures metrics
230230
# always observe a status code — including 500s synthesised by the exception
231231
# middleware — rather than seeing a raw exception with no response.
232-
app.add_middleware(GlobalExceptionMiddleware)
232+
#app.add_middleware(GlobalExceptionMiddleware)
233233
app.add_middleware(RestApiMetricsMiddleware)

src/utils/responses.py

Lines changed: 91 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@
2626
from llama_stack_api.openai_responses import (
2727
OpenAIResponseInputToolChoice as ToolChoice,
2828
)
29+
from llama_stack_api.openai_responses import (
30+
OpenAIResponseInputToolChoiceAllowedTools as AllowedTools,
31+
)
2932
from llama_stack_api.openai_responses import (
3033
OpenAIResponseInputToolChoiceMode as ToolChoiceMode,
3134
)
@@ -417,6 +420,55 @@ def extract_vector_store_ids_from_tools(
417420
return vector_store_ids
418421

419422

423+
def _tool_matches_allowed_entry(tool: InputTool, entry: dict[str, str]) -> bool:
424+
"""Return True if the tool satisfies every key in the allowlist entry.
425+
426+
``OpenAIResponseInputToolChoiceAllowedTools.tools`` entries use string keys
427+
and values (e.g. ``type``, ``server_label``, ``name``); each must match the
428+
corresponding attribute on the tool.
429+
430+
Parameters:
431+
tool: A configured input tool.
432+
entry: One allowlist entry from ``allowed_tools.tools``.
433+
434+
Returns:
435+
True if all entry keys match the tool.
436+
"""
437+
for key, value in entry.items():
438+
if not hasattr(tool, key):
439+
return False
440+
attr = getattr(tool, key)
441+
if attr is None:
442+
return False
443+
if attr != value and str(attr) != value:
444+
return False
445+
return True
446+
447+
448+
def filter_tools_by_allowed_entries(
449+
tools: list[InputTool],
450+
allowed_entries: list[dict[str, str]],
451+
) -> list[InputTool]:
452+
"""Keep tools that match at least one allowlist entry.
453+
454+
If ``allowed_entries`` is empty, no tools are kept (strict allowlist).
455+
456+
Parameters:
457+
tools: Tools to filter (typically after translation / preparation).
458+
allowed_entries: Entries from ``OpenAIResponseInputToolChoiceAllowedTools.tools``.
459+
460+
Returns:
461+
A sublist of ``tools`` matching the allowlist.
462+
"""
463+
if not allowed_entries:
464+
return []
465+
return [
466+
t
467+
for t in tools
468+
if any(_tool_matches_allowed_entry(t, e) for e in allowed_entries)
469+
]
470+
471+
420472
def resolve_vector_store_ids(
421473
vector_store_ids: list[str], byok_rags: list[ByokRag]
422474
) -> list[str]:
@@ -1332,10 +1384,19 @@ async def resolve_tool_choice(
13321384
) -> tuple[Optional[list[InputTool]], Optional[ToolChoice], Optional[list[str]]]:
13331385
"""Resolve tools and tool_choice for the Responses API.
13341386
1335-
If the request includes tools, uses them as-is and derives vector_store_ids
1336-
from tool configs; otherwise loads tools via prepare_tools (using all
1337-
configured vector stores) and honors tool_choice "none" via the no_tools
1338-
flag. When no tools end up configured, tool_choice is cleared to None.
1387+
If ``tool_choice`` is ``none``, always returns ``(None, None, None)`` — no
1388+
tools are sent to Llama Stack, even when the request included explicit
1389+
``tools`` (e.g. file_search).
1390+
1391+
If ``tool_choice`` is ``allowed_tools``, it is rewritten for downstream
1392+
services: tools are filtered to those matching the allowlist entries, and
1393+
``tool_choice`` becomes ``auto`` or ``required`` per the allowlist ``mode``.
1394+
1395+
If the request includes tools and tool_choice is not ``none``, uses them
1396+
(after allowlist filtering) and derives vector_store_ids from the prepared
1397+
tools; otherwise loads tools via prepare_tools (using all configured vector
1398+
stores), then applies allowlist filtering when present. When no tools end
1399+
up configured, tool_choice is cleared to None.
13391400
13401401
Args:
13411402
tools: Tools from the request, or None to use LCORE-configured tools.
@@ -1349,35 +1410,46 @@ async def resolve_tool_choice(
13491410
prepared_tools is the list of tools to use, or None if none configured;
13501411
prepared_tool_choice is the resolved tool choice, or None when there
13511412
are no tools; vector_store_ids is extracted from tools (in user-facing format)
1352-
when provided, otherwise None.
1413+
when provided, otherwise None (also None when tool_choice is ``none``).
13531414
"""
1415+
if isinstance(tool_choice, ToolChoiceMode) and tool_choice == ToolChoiceMode.none:
1416+
return None, None, None
1417+
1418+
allowed_filters: Optional[list[dict[str, str]]] = None
1419+
if isinstance(tool_choice, AllowedTools):
1420+
allowed_filters = tool_choice.tools
1421+
tool_choice = ToolChoiceMode(tool_choice.mode)
1422+
13541423
prepared_tools: Optional[list[InputTool]] = None
1355-
client = AsyncLlamaStackClientHolder().get_client()
13561424
if tools: # explicitly specified in request
1357-
# Per-request override of vector stores (user-facing rag_ids)
1358-
vector_store_ids = extract_vector_store_ids_from_tools(tools)
1359-
# Translate user-facing rag_ids to llama-stack vector_store_ids in each file_search tool
13601425
byok_rags = configuration.configuration.byok_rag
13611426
prepared_tools = translate_tools_vector_store_ids(tools, byok_rags)
1427+
if allowed_filters is not None:
1428+
prepared_tools = filter_tools_by_allowed_entries(
1429+
prepared_tools, allowed_filters
1430+
)
1431+
if not prepared_tools:
1432+
return None, None, None
1433+
vector_store_ids_list = extract_vector_store_ids_from_tools(prepared_tools)
1434+
vector_store_ids = vector_store_ids_list if vector_store_ids_list else None
13621435
prepared_tool_choice = tool_choice or ToolChoiceMode.auto
13631436
else:
1364-
# Vector stores were not overwritten in request, use all configured vector stores
13651437
vector_store_ids = None
1366-
# Get all tools configured in LCORE (returns None or non-empty list)
1367-
no_tools = (
1368-
isinstance(tool_choice, ToolChoiceMode)
1369-
and tool_choice == ToolChoiceMode.none
1370-
)
1371-
# Vector stores are prepared in llama-stack format
1438+
client = AsyncLlamaStackClientHolder().get_client()
13721439
prepared_tools = await prepare_tools(
13731440
client=client,
1374-
vector_store_ids=vector_store_ids, # allow all configured vector stores
1375-
no_tools=no_tools,
1441+
vector_store_ids=vector_store_ids,
1442+
no_tools=False,
13761443
token=token,
13771444
mcp_headers=mcp_headers,
13781445
request_headers=request_headers,
13791446
)
1380-
# If there are no tools, tool_choice cannot be set at all - LLS implicit behavior
1447+
if allowed_filters is not None and prepared_tools:
1448+
prepared_tools = filter_tools_by_allowed_entries(
1449+
prepared_tools, allowed_filters
1450+
)
1451+
if not prepared_tools:
1452+
prepared_tools = None
13811453
prepared_tool_choice = tool_choice if prepared_tools else None
13821454

13831455
return prepared_tools, prepared_tool_choice, vector_store_ids

tests/e2e/features/responses.feature

Lines changed: 126 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ Feature: Responses endpoint API tests
55
Given The service is started locally
66
And REST API service prefix is /v1
77

8-
Scenario: Check if responses endpoint returns 200 for minimal request
8+
Scenario: Check if responses endpoint answers a minimal question
99
Given The system is in default state
1010
And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
1111
When I use "responses" to ask question with authorization header
@@ -14,11 +14,134 @@ Feature: Responses endpoint API tests
1414
"""
1515
Then The status code of the response is 200
1616

17-
Scenario: Check if responses endpoint returns 200 for minimal streaming request
17+
Scenario: Check if responses endpoint streams a minimal answer
1818
Given The system is in default state
1919
And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
2020
When I use "responses" to ask question with authorization header
2121
"""
2222
{"input": "Say hello", "model": "{PROVIDER}/{MODEL}", "stream": true}
2323
"""
24-
Then The status code of the response is 200
24+
Then The status code of the response is 200
25+
26+
Scenario: Check if responses endpoint with tool_choice none answers knowledge question without file search usage
27+
Given The system is in default state
28+
And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
29+
And I capture the current token metrics
30+
When I use "responses" to ask question with authorization header
31+
"""
32+
{
33+
"input": "What is the title of the article from Paul?",
34+
"model": "{PROVIDER}/{MODEL}",
35+
"stream": false,
36+
"instructions": "You are an assistant. You MUST use the file_search tool to answer. Answer in lowercase.",
37+
"tool_choice": "none"
38+
}
39+
"""
40+
Then The status code of the response is 200
41+
And The responses output should not include any tool invocation item types
42+
And The token metrics should have increased
43+
44+
Scenario: Check if responses endpoint with tool_choice auto answers a knowledge question using file search
45+
Given The system is in default state
46+
And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
47+
And I capture the current token metrics
48+
When I use "responses" to ask question with authorization header
49+
"""
50+
{
51+
"input": "What is the title of the article from Paul?",
52+
"model": "{PROVIDER}/{MODEL}",
53+
"stream": false,
54+
"instructions": "You are an assistant. You MUST use the file_search tool to answer. Answer in lowercase.",
55+
"tool_choice": "auto"
56+
}
57+
"""
58+
Then The status code of the response is 200
59+
And The responses output should include an item with type "file_search_call"
60+
And The responses output_text should contain following fragments
61+
| Fragments in LLM response |
62+
| great work |
63+
And The token metrics should have increased
64+
65+
Scenario: Check if responses endpoint with tool_choice required still invokes document search for a basic question
66+
Given The system is in default state
67+
And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
68+
And I capture the current token metrics
69+
When I use "responses" to ask question with authorization header
70+
"""
71+
{
72+
"input": "Hello World!",
73+
"model": "{PROVIDER}/{MODEL}",
74+
"stream": false,
75+
"tool_choice": "required"
76+
}
77+
"""
78+
Then The status code of the response is 200
79+
And The responses output should include an item with type "file_search_call"
80+
And The token metrics should have increased
81+
82+
Scenario: Check if responses endpoint with file search as the chosen tool answers using file search
83+
Given The system is in default state
84+
And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
85+
And I capture the current token metrics
86+
When I use "responses" to ask question with authorization header
87+
"""
88+
{
89+
"input": "What is the title of the article from Paul?",
90+
"model": "{PROVIDER}/{MODEL}",
91+
"stream": false,
92+
"instructions": "You are an assistant. You MUST use the file_search tool to answer. Answer in lowercase.",
93+
"tool_choice": {"type": "file_search"}
94+
}
95+
"""
96+
Then The status code of the response is 200
97+
And The responses output should include an item with type "file_search_call"
98+
And The responses output_text should contain following fragments
99+
| Fragments in LLM response |
100+
| great work |
101+
And The token metrics should have increased
102+
103+
Scenario: Check if responses endpoint with allowed tools in automatic mode answers knowledge question using file search
104+
Given The system is in default state
105+
And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
106+
And I capture the current token metrics
107+
When I use "responses" to ask question with authorization header
108+
"""
109+
{
110+
"input": "What is the title of the article from Paul?",
111+
"model": "{PROVIDER}/{MODEL}",
112+
"stream": false,
113+
"instructions": "You are an assistant. You MUST use the file_search tool to answer. Answer in lowercase.",
114+
"tool_choice": {
115+
"type": "allowed_tools",
116+
"mode": "auto",
117+
"tools": [{"type": "file_search"}]
118+
}
119+
}
120+
"""
121+
Then The status code of the response is 200
122+
And The responses output should include an item with type "file_search_call"
123+
And The responses output_text should contain following fragments
124+
| Fragments in LLM response |
125+
| great work |
126+
And The token metrics should have increased
127+
128+
Scenario: Check if responses endpoint with allowed tools in required mode invokes file search for a basic question
129+
Given The system is in default state
130+
And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
131+
And I capture the current token metrics
132+
When I use "responses" to ask question with authorization header
133+
"""
134+
{
135+
"input": "Hello world!",
136+
"model": "{PROVIDER}/{MODEL}",
137+
"stream": false,
138+
"tool_choice": {
139+
"type": "allowed_tools",
140+
"mode": "required",
141+
"tools": [{"type": "file_search"}]
142+
}
143+
}
144+
"""
145+
Then The status code of the response is 200
146+
And The responses output should include an item with type "file_search_call"
147+
And The token metrics should have increased

0 commit comments

Comments
 (0)