Skip to content

Commit 18b9a10

Browse files
authored
Merge pull request #1414 from asimurka/e2e_tests_responses_tools
LCORE-1270: E2e tests responses tools
2 parents b3d8cbb + 506a86f commit 18b9a10

2 files changed

Lines changed: 261 additions & 0 deletions

File tree

tests/e2e/features/responses.feature

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,3 +427,172 @@ Feature: Responses endpoint API tests
427427
"""
428428
Then The status code of the response is 503
429429
And The body of the response contains Unable to connect to Llama Stack
430+
431+
432+
Scenario: Responses endpoint with tool_choice none answers knowledge question without file search usage
433+
Given The system is in default state
434+
And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
435+
And I capture the current token metrics
436+
When I use "responses" to ask question with authorization header
437+
"""
438+
{
439+
"input": "What is the title of the article from Paul?",
440+
"model": "{PROVIDER}/{MODEL}",
441+
"stream": false,
442+
"instructions": "You are an assistant. You MUST use the file_search tool to answer. Answer in lowercase.",
443+
"tool_choice": "none"
444+
}
445+
"""
446+
Then The status code of the response is 200
447+
And The responses output should not include any tool invocation item types
448+
And The token metrics should have increased
449+
450+
Scenario: Check if responses endpoint with tool_choice auto answers a knowledge question using file search
451+
Given The system is in default state
452+
And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
453+
And I capture the current token metrics
454+
When I use "responses" to ask question with authorization header
455+
"""
456+
{
457+
"input": "What is the title of the article from Paul?",
458+
"model": "{PROVIDER}/{MODEL}",
459+
"stream": false,
460+
"instructions": "You are an assistant. You MUST use the file_search tool to answer. Answer in lowercase.",
461+
"tool_choice": "auto"
462+
}
463+
"""
464+
Then The status code of the response is 200
465+
And The responses output should include an item with type "file_search_call"
466+
And The responses output_text should contain following fragments
467+
| Fragments in LLM response |
468+
| great work |
469+
And The token metrics should have increased
470+
471+
Scenario: Check if responses endpoint with tool_choice required still invokes document search for a basic question
472+
Given The system is in default state
473+
And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
474+
And I capture the current token metrics
475+
When I use "responses" to ask question with authorization header
476+
"""
477+
{
478+
"input": "Hello World!",
479+
"model": "{PROVIDER}/{MODEL}",
480+
"stream": false,
481+
"tool_choice": "required"
482+
}
483+
"""
484+
Then The status code of the response is 200
485+
And The responses output should include an item with type "file_search_call"
486+
And The token metrics should have increased
487+
488+
Scenario: Check if responses endpoint with file search as the chosen tool answers using file search
489+
Given The system is in default state
490+
And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
491+
And I capture the current token metrics
492+
When I use "responses" to ask question with authorization header
493+
"""
494+
{
495+
"input": "What is the title of the article from Paul?",
496+
"model": "{PROVIDER}/{MODEL}",
497+
"stream": false,
498+
"instructions": "You are an assistant. You MUST use the file_search tool to answer. Answer in lowercase.",
499+
"tool_choice": {"type": "file_search"}
500+
}
501+
"""
502+
Then The status code of the response is 200
503+
And The responses output should include an item with type "file_search_call"
504+
And The responses output_text should contain following fragments
505+
| Fragments in LLM response |
506+
| great work |
507+
And The token metrics should have increased
508+
509+
Scenario: Check if responses endpoint with allowed tools in automatic mode answers knowledge question using file search
510+
Given The system is in default state
511+
And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
512+
And I capture the current token metrics
513+
When I use "responses" to ask question with authorization header
514+
"""
515+
{
516+
"input": "What is the title of the article from Paul?",
517+
"model": "{PROVIDER}/{MODEL}",
518+
"stream": false,
519+
"instructions": "You are an assistant. You MUST use the file_search tool to answer. Answer in lowercase.",
520+
"tool_choice": {
521+
"type": "allowed_tools",
522+
"mode": "auto",
523+
"tools": [{"type": "file_search"}]
524+
}
525+
}
526+
"""
527+
Then The status code of the response is 200
528+
And The responses output should include an item with type "file_search_call"
529+
And The responses output_text should contain following fragments
530+
| Fragments in LLM response |
531+
| great work |
532+
And The token metrics should have increased
533+
534+
Scenario: Check if responses endpoint with allowed tools in required mode invokes file search for a basic question
535+
Given The system is in default state
536+
And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
537+
And I capture the current token metrics
538+
When I use "responses" to ask question with authorization header
539+
"""
540+
{
541+
"input": "Hello world!",
542+
"model": "{PROVIDER}/{MODEL}",
543+
"stream": false,
544+
"tool_choice": {
545+
"type": "allowed_tools",
546+
"mode": "required",
547+
"tools": [{"type": "file_search"}]
548+
}
549+
}
550+
"""
551+
Then The status code of the response is 200
552+
And The responses output should include an item with type "file_search_call"
553+
And The token metrics should have increased
554+
555+
Scenario: Allowed tools auto mode with only MCP in allowlist does not use file search for knowledge question
556+
Given The system is in default state
557+
And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
558+
And I capture the current token metrics
559+
When I use "responses" to ask question with authorization header
560+
"""
561+
{
562+
"input": "What is the title of the article from Paul?",
563+
"model": "{PROVIDER}/{MODEL}",
564+
"stream": false,
565+
"instructions": "You are an assistant. Answer in lowercase.",
566+
"tool_choice": {
567+
"type": "allowed_tools",
568+
"mode": "auto",
569+
"tools": [{"type": "mcp"}]
570+
}
571+
}
572+
"""
573+
Then The status code of the response is 200
574+
And The responses output should not include an item with type "file_search_call"
575+
And The token metrics should have increased
576+
577+
Scenario: Required allowed_tools with invalid filter returns no tool invocations on knowledge question
578+
Given The system is in default state
579+
And I set the Authorization header to Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6Ikpva
580+
And I capture the current token metrics
581+
When I use "responses" to ask question with authorization header
582+
"""
583+
{
584+
"input": "What is the title of the article from Paul?",
585+
"model": "{PROVIDER}/{MODEL}",
586+
"stream": false,
587+
"instructions": "You are an assistant. You MUST use the file_search tool to answer. Answer in lowercase.",
588+
"tools": [],
589+
"tool_choice": {
590+
"type": "allowed_tools",
591+
"mode": "required",
592+
"tools": [{"non-existing": "tool"}]
593+
}
594+
}
595+
"""
596+
Then The status code of the response is 200
597+
And The responses output should not include any tool invocation item types
598+
And The token metrics should have increased

tests/e2e/features/steps/llm_query_response.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import json
44
import os
5+
from typing import Any, cast
56

67
import requests
78
from behave import step, then # pyright: ignore[reportAttributeAccessIssue]
@@ -12,6 +13,75 @@
1213
# Longer timeout for Prow/OpenShift with CPU-based vLLM
1314
DEFAULT_LLM_TIMEOUT = 180 if os.getenv("RUNNING_PROW") else 60
1415

16+
# Responses API ``output`` item types that indicate tool listing or invocation.
17+
_RESPONSE_TOOL_OUTPUT_ITEM_TYPES = frozenset(
18+
{
19+
"file_search_call",
20+
"mcp_call",
21+
"mcp_list_tools",
22+
"function_call",
23+
"web_search_call",
24+
}
25+
)
26+
27+
28+
def _collect_output_item_types(response_body: dict[str, Any]) -> list[str]:
29+
"""Collect ``type`` from each top-level ``output`` item in a Responses API JSON body."""
30+
output = cast(list[dict[str, Any]], response_body["output"])
31+
return [item["type"] for item in output]
32+
33+
34+
@then("The responses output should not include any tool invocation item types")
35+
def responses_output_should_not_include_tool_items(context: Context) -> None:
36+
"""Assert no tool-related items appear in the Responses JSON ``output`` array."""
37+
assert context.response is not None, "Request needs to be performed first"
38+
response_json = cast(dict[str, Any], context.response.json())
39+
types_found = _collect_output_item_types(response_json)
40+
bad = [t for t in types_found if t in _RESPONSE_TOOL_OUTPUT_ITEM_TYPES]
41+
assert not bad, (
42+
"Expected no tool-related output items, but found types "
43+
f"{bad!r} among all output types {types_found!r}"
44+
)
45+
46+
47+
@then('The responses output should include an item with type "{item_type}"')
48+
def responses_output_should_include_item_type(context: Context, item_type: str) -> None:
49+
"""Assert at least one ``output`` item has the given ``type``."""
50+
assert context.response is not None, "Request needs to be performed first"
51+
response_json = cast(dict[str, Any], context.response.json())
52+
types_found = _collect_output_item_types(response_json)
53+
assert item_type in types_found, (
54+
f"Expected output item type {item_type!r} not found; "
55+
f"had types {types_found!r}"
56+
)
57+
58+
59+
@then('The responses output should not include an item with type "{item_type}"')
60+
def responses_output_should_not_include_item_type(
61+
context: Context, item_type: str
62+
) -> None:
63+
"""Assert no ``output`` item has the given ``type``."""
64+
assert context.response is not None, "Request needs to be performed first"
65+
response_json = cast(dict[str, Any], context.response.json())
66+
types_found = _collect_output_item_types(response_json)
67+
assert item_type not in types_found, (
68+
f"Expected output item type {item_type!r} to be absent; "
69+
f"but found types {types_found!r}"
70+
)
71+
72+
73+
@then("The responses output should include an item with one of these types")
74+
def responses_output_should_include_one_of_types(context: Context) -> None:
75+
"""Assert at least one output item type matches a row in the scenario table."""
76+
assert context.response is not None, "Request needs to be performed first"
77+
assert context.table is not None, "Table with column 'item type' is required"
78+
allowed = [row["item type"].strip() for row in context.table]
79+
response_json = cast(dict[str, Any], context.response.json())
80+
types_found = _collect_output_item_types(response_json)
81+
assert any(
82+
a in types_found for a in allowed
83+
), f"Expected at least one of {allowed!r} in output types {types_found!r}"
84+
1585

1686
@step("I wait for the response to be completed")
1787
def wait_for_complete_response(context: Context) -> None:
@@ -163,6 +233,28 @@ def check_referenced_documents_present(context: Context) -> None:
163233
), "referenced_documents is empty — no documents were referenced"
164234

165235

236+
@then("The responses output_text should contain following fragments")
237+
def check_fragments_in_responses_output_text(context: Context) -> None:
238+
"""Check that fragments from the scenario table appear in JSON ``output_text``.
239+
240+
Used for POST ``/v1/responses`` (query endpoint uses the ``response`` field).
241+
"""
242+
assert context.response is not None, "Request needs to be performed first"
243+
response_json = context.response.json()
244+
assert (
245+
"output_text" in response_json
246+
), f"Expected 'output_text' in JSON body, got keys: {list(response_json.keys())}"
247+
output_text = response_json["output_text"]
248+
249+
assert context.table is not None, "Fragments are not specified in table"
250+
251+
for fragment in context.table:
252+
expected = fragment["Fragments in LLM response"]
253+
assert (
254+
expected in output_text
255+
), f"Fragment '{expected}' not found in output_text: '{output_text}'"
256+
257+
166258
@then("The response should contain following fragments")
167259
def check_fragments_in_response(context: Context) -> None:
168260
"""Check that all specified fragments are present in the LLM response.

0 commit comments

Comments
 (0)