@@ -243,14 +243,17 @@ def _prepare_conversation_continuation(
243243 return trimmed_messages , cast (ModelSettings , new_settings )
244244
245245 @asynccontextmanager
246- async def request_stream (
246+ async def request_stream ( # pylint: disable=unused-argument
247247 self ,
248248 messages : list [ModelMessage ],
249249 model_settings : ModelSettings | None ,
250250 model_request_parameters : ModelRequestParameters ,
251251 run_context : RunContext [Any ] | None = None ,
252252 ) -> AsyncIterator [StreamedResponse ]:
253- """Request a streaming response, filtering Llama Stack-specific event quirks.
253+ """Request a streaming response with Llama Stack compatibility fixes.
254+
255+ Applies the same conversation continuation handling as :meth:`request`
256+ before calling the Responses API, then filters streaming tool-call events.
254257
255258 Args:
256259 messages: Model messages for the request.
@@ -262,10 +265,10 @@ async def request_stream(
262265 A StreamedResponse with the filtered event stream.
263266 """
264267 check_allow_model_requests ()
265- model_settings , model_request_parameters = self .prepare_request (
266- model_settings ,
267- model_request_parameters ,
268+ messages , model_settings = self ._prepare_conversation_continuation (
269+ messages , model_settings
268270 )
271+
269272 model_settings_cast = cast (OpenAIResponsesModelSettings , model_settings or {})
270273 response = await self ._responses_create (
271274 messages , True , model_settings_cast , model_request_parameters
0 commit comments