11"""OpenAI model provider using the Responses API.
22
3- The Responses API is OpenAI's newer API that differs from the Chat Completions API in several key ways:
3+ Note: Built-in tools (web search, code interpreter, file search) are not yet supported.
44
5- 1. The Responses API can maintain conversation state server-side through "previous_response_id",
6- while Chat Completions is stateless and requires sending full conversation history each time.
7- Note: This implementation currently only implements the stateless approach.
8-
9- 2. Responses API uses "input" (list of items) instead of "messages", and system
10- prompts are passed as "instructions" rather than a system role message.
11-
12- 3. Responses API supports built-in tools (web search, code interpreter, file search)
13- Note: These are not yet implemented in this provider.
14-
15- - Docs: https://platform.openai.com/docs/api-reference/responses
5+ Docs: https://platform.openai.com/docs/api-reference/responses
166"""
177
188import base64
@@ -132,10 +122,14 @@ class OpenAIResponsesConfig(TypedDict, total=False):
132122 params: Model parameters (e.g., max_output_tokens, temperature, etc.).
133123 For a complete list of supported parameters, see
134124 https://platform.openai.com/docs/api-reference/responses/create.
125+ stateful: Whether to enable server-side conversation state management.
126+ When True, the server stores conversation history and the client does not need to
127+ send the full message history with each request. Defaults to False.
135128 """
136129
137130 model_id : str
138131 params : dict [str , Any ] | None
132+ stateful : bool
139133
140134 def __init__ (
141135 self , client_args : dict [str , Any ] | None = None , ** model_config : Unpack [OpenAIResponsesConfig ]
@@ -153,6 +147,15 @@ def __init__(
153147
154148 logger .debug ("config=<%s> | initializing" , self .config )
155149
150+ @property
151+ @override
152+ def stateful (self ) -> bool :
153+ """Whether server-side conversation storage is enabled.
154+
155+ Derived from the ``stateful`` configuration option.
156+ """
157+ return bool (self .config .get ("stateful" ))
158+
156159 @override
157160 def update_config (self , ** model_config : Unpack [OpenAIResponsesConfig ]) -> None : # type: ignore[override]
158161 """Update the OpenAI Responses API model configuration with the provided arguments.
@@ -180,6 +183,7 @@ async def stream(
180183 system_prompt : str | None = None ,
181184 * ,
182185 tool_choice : ToolChoice | None = None ,
186+ model_state : dict [str , Any ] | None = None ,
183187 ** kwargs : Any ,
184188 ) -> AsyncGenerator [StreamEvent , None ]:
185189 """Stream conversation with the OpenAI Responses API model.
@@ -189,6 +193,7 @@ async def stream(
189193 tool_specs: List of tool specifications to make available to the model.
190194 system_prompt: System prompt to provide context to the model.
191195 tool_choice: Selection strategy for tool invocation.
196+ model_state: Runtime state for model providers (e.g., server-side response ids).
192197 **kwargs: Additional keyword arguments for future extensibility.
193198
194199 Yields:
@@ -199,7 +204,7 @@ async def stream(
199204 ModelThrottledException: If the request is throttled by OpenAI (rate limits).
200205 """
201206 logger .debug ("formatting request for OpenAI Responses API" )
202- request = self ._format_request (messages , tool_specs , system_prompt , tool_choice )
207+ request = self ._format_request (messages , tool_specs , system_prompt , tool_choice , model_state )
203208 logger .debug ("formatted request=<%s>" , request )
204209
205210 logger .debug ("invoking OpenAI Responses API model" )
@@ -219,7 +224,14 @@ async def stream(
219224
220225 async for event in response :
221226 if hasattr (event , "type" ):
222- if event .type == "response.reasoning_text.delta" :
227+ if event .type == "response.created" :
228+ # Capture response id for server-side conversation chaining
229+ if hasattr (event , "response" ):
230+ response_id = getattr (event .response , "id" , None )
231+ if model_state is not None and response_id :
232+ model_state ["response_id" ] = response_id
233+
234+ elif event .type == "response.reasoning_text.delta" :
223235 # Reasoning content streaming (for o1/o3 reasoning models)
224236 chunks , data_type = self ._stream_switch_content ("reasoning_content" , data_type )
225237 for chunk in chunks :
@@ -383,6 +395,7 @@ def _format_request(
383395 tool_specs : list [ToolSpec ] | None = None ,
384396 system_prompt : str | None = None ,
385397 tool_choice : ToolChoice | None = None ,
398+ model_state : dict [str , Any ] | None = None ,
386399 ) -> dict [str , Any ]:
387400 """Format an OpenAI Responses API compatible response streaming request.
388401
@@ -391,6 +404,7 @@ def _format_request(
391404 tool_specs: List of tool specifications to make available to the model.
392405 system_prompt: System prompt to provide context to the model.
393406 tool_choice: Selection strategy for tool invocation.
407+ model_state: Runtime state for model providers (e.g., server-side response ids).
394408
395409 Returns:
396410 An OpenAI Responses API compatible response streaming request.
@@ -400,13 +414,18 @@ def _format_request(
400414 format.
401415 """
402416 input_items = self ._format_request_messages (messages )
403- request = {
417+ request : dict [ str , Any ] = {
404418 "model" : self .config ["model_id" ],
405419 "input" : input_items ,
406420 "stream" : True ,
407421 ** cast (dict [str , Any ], self .config .get ("params" , {})),
422+ "store" : self .stateful ,
408423 }
409424
425+ response_id = model_state .get ("response_id" ) if model_state else None
426+ if response_id and self .stateful :
427+ request ["previous_response_id" ] = response_id
428+
410429 if system_prompt :
411430 request ["instructions" ] = system_prompt
412431
0 commit comments