Skip to content

Commit 9cd3142

Browse files
committed
fix: surface model refusals as final output
1 parent 3a3f34f commit 9cd3142

5 files changed

Lines changed: 121 additions & 2 deletions

File tree

src/agents/items.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -722,6 +722,19 @@ def extract_text(cls, message: TResponseOutputItem) -> str | None:
722722

723723
return text or None
724724

725+
@classmethod
726+
def extract_refusal(cls, message: TResponseOutputItem) -> str | None:
727+
"""Extracts all refusal content from a message, if any. Ignores text."""
728+
if not isinstance(message, ResponseOutputMessage):
729+
return None
730+
731+
refusal = ""
732+
for content_item in message.content:
733+
if isinstance(content_item, ResponseOutputRefusal):
734+
refusal += content_item.refusal or ""
735+
736+
return refusal or None
737+
725738
@classmethod
726739
def input_to_new_input_list(
727740
cls, input: str | list[TResponseInputItem]

src/agents/run_internal/turn_resolution.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -671,6 +671,9 @@ async def execute_tools_and_side_effects(
671671
potential_final_output_text = (
672672
ItemHelpers.extract_text(message_items[-1].raw_item) if message_items else None
673673
)
674+
potential_final_output_refusal = (
675+
ItemHelpers.extract_refusal(message_items[-1].raw_item) if message_items else None
676+
)
674677

675678
if not processed_response.has_tools_or_approvals_to_run():
676679
has_tool_activity_without_message = not message_items and bool(
@@ -691,14 +694,33 @@ async def execute_tools_and_side_effects(
691694
tool_input_guardrail_results=tool_input_guardrail_results,
692695
tool_output_guardrail_results=tool_output_guardrail_results,
693696
)
697+
if (
698+
output_schema
699+
and not output_schema.is_plain_text()
700+
and potential_final_output_refusal
701+
):
702+
return await execute_final_output_call(
703+
public_agent=public_agent,
704+
original_input=original_input,
705+
new_response=new_response,
706+
pre_step_items=pre_step_items,
707+
new_step_items=new_step_items,
708+
final_output=potential_final_output_refusal,
709+
hooks=hooks,
710+
context_wrapper=context_wrapper,
711+
tool_input_guardrail_results=tool_input_guardrail_results,
712+
tool_output_guardrail_results=tool_output_guardrail_results,
713+
)
694714
if not output_schema or output_schema.is_plain_text():
695715
return await execute_final_output_call(
696716
public_agent=public_agent,
697717
original_input=original_input,
698718
new_response=new_response,
699719
pre_step_items=pre_step_items,
700720
new_step_items=new_step_items,
701-
final_output=potential_final_output_text or "",
721+
final_output=potential_final_output_text
722+
or potential_final_output_refusal
723+
or "",
702724
hooks=hooks,
703725
context_wrapper=context_wrapper,
704726
tool_input_guardrail_results=tool_input_guardrail_results,

tests/test_items_helpers.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,27 @@ def test_extract_text_concatenates_all_text_segments() -> None:
128128
)
129129

130130

131+
def test_extract_refusal_concatenates_all_refusal_segments() -> None:
132+
first_refusal = ResponseOutputRefusal(refusal="no", type="refusal")
133+
text = ResponseOutputText(annotations=[], text="ignored", type="output_text", logprobs=[])
134+
second_refusal = ResponseOutputRefusal(refusal=" way", type="refusal")
135+
message = make_message([first_refusal, text, second_refusal])
136+
137+
assert ItemHelpers.extract_refusal(message) == "no way"
138+
assert (
139+
ItemHelpers.extract_refusal(
140+
ResponseFunctionToolCall(
141+
id="tool123",
142+
arguments="{}",
143+
call_id="call123",
144+
name="func",
145+
type="function_call",
146+
)
147+
)
148+
is None
149+
)
150+
151+
131152
def test_extract_text_tolerates_none_text_content() -> None:
132153
"""Regression: ``content_item.text`` can be ``None`` when output items
133154
are assembled via ``model_construct`` (e.g. partial streaming responses)

tests/test_max_turns.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import json
44

55
import pytest
6+
from openai.types.responses.response_output_message import ResponseOutputMessage
7+
from openai.types.responses.response_output_refusal import ResponseOutputRefusal
68
from pydantic import BaseModel
79
from typing_extensions import TypedDict
810

@@ -21,6 +23,16 @@
2123
from .test_responses import get_function_tool, get_function_tool_call, get_text_message
2224

2325

26+
def get_refusal_message(refusal: str) -> ResponseOutputMessage:
27+
return ResponseOutputMessage(
28+
id="1",
29+
type="message",
30+
role="assistant",
31+
content=[ResponseOutputRefusal(type="refusal", refusal=refusal)],
32+
status="completed",
33+
)
34+
35+
2436
@pytest.mark.asyncio
2537
async def test_non_streamed_max_turns():
2638
model = FakeModel()
@@ -141,6 +153,57 @@ async def test_structured_output_streamed_max_turns():
141153
pass
142154

143155

156+
@pytest.mark.asyncio
157+
async def test_structured_output_refusal_finishes_without_retries():
158+
refusal = "I can't help with that request."
159+
model = FakeModel(initial_output=[get_refusal_message(refusal)])
160+
agent = Agent(
161+
name="test_1",
162+
model=model,
163+
output_type=Foo,
164+
)
165+
166+
result = await Runner.run(agent, input="user_message", max_turns=3)
167+
168+
assert result.final_output == refusal
169+
assert len(result.raw_responses) == 1
170+
assert ItemHelpers.extract_refusal(result.raw_responses[0].output[0]) == refusal
171+
172+
173+
@pytest.mark.asyncio
174+
async def test_structured_output_refusal_streamed_finishes_without_retries():
175+
refusal = "I can't help with that request."
176+
model = FakeModel(initial_output=[get_refusal_message(refusal)])
177+
agent = Agent(
178+
name="test_1",
179+
model=model,
180+
output_type=Foo,
181+
)
182+
183+
result = Runner.run_streamed(agent, input="user_message", max_turns=3)
184+
async for _ in result.stream_events():
185+
pass
186+
187+
assert result.final_output == refusal
188+
assert len(result.raw_responses) == 1
189+
assert ItemHelpers.extract_refusal(result.raw_responses[0].output[0]) == refusal
190+
191+
192+
@pytest.mark.asyncio
193+
async def test_plain_text_refusal_finishes_as_final_output():
194+
refusal = "I can't help with that request."
195+
model = FakeModel(initial_output=[get_refusal_message(refusal)])
196+
agent = Agent(
197+
name="test_1",
198+
model=model,
199+
)
200+
201+
result = await Runner.run(agent, input="user_message", max_turns=3)
202+
203+
assert result.final_output == refusal
204+
assert len(result.raw_responses) == 1
205+
206+
144207
@pytest.mark.asyncio
145208
async def test_structured_output_max_turns_handler_invalid_output():
146209
model = FakeModel()

tests/test_run_step_execution.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,7 @@ async def test_plaintext_agent_hosted_shell_with_refusal_message_is_final_output
409409
assert isinstance(result.generated_items[1], ToolCallOutputItem)
410410
assert isinstance(result.generated_items[2], MessageOutputItem)
411411
assert isinstance(result.next_step, NextStepFinalOutput)
412-
assert result.next_step.output == ""
412+
assert result.next_step.output == "I cannot help with that."
413413

414414

415415
@pytest.mark.asyncio

0 commit comments

Comments
 (0)