Skip to content

Commit afab5ad

Browse files
committed
potential fix
1 parent 9ff72ff commit afab5ad

1 file changed

Lines changed: 64 additions & 0 deletions

File tree

  • src/pydantic_ai_lightspeed/llamastack

src/pydantic_ai_lightspeed/llamastack/_model.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,70 @@ class LlamaStackResponsesModel(OpenAIResponsesModel):
181181
before the corresponding ``McpCall`` or ``ResponseFunctionToolCall`` item.
182182
"""
183183

184+
async def request(
185+
self,
186+
messages: list[ModelMessage],
187+
model_settings: ModelSettings | None,
188+
model_request_parameters: ModelRequestParameters,
189+
run_context: RunContext[Any] | None = None,
190+
) -> Any:
191+
"""Non-streaming request with Llama Stack conversation continuation fix.
192+
193+
Llama Stack rejects requests containing both ``conversation`` and
194+
``previous_response_id``. On continuation turns (where a prior
195+
``ModelResponse`` exists), we trim messages to only the new input and
196+
disable ``previous_response_id`` so that only ``conversation`` is sent.
197+
This ensures all responses are persisted to the conversation.
198+
"""
199+
messages, model_settings = self._prepare_conversation_continuation(
200+
messages, model_settings
201+
)
202+
return await super().request(
203+
messages, model_settings, model_request_parameters
204+
)
205+
206+
def _prepare_conversation_continuation(
207+
self,
208+
messages: list[ModelMessage],
209+
model_settings: ModelSettings | None,
210+
) -> tuple[list[ModelMessage], ModelSettings | None]:
211+
"""Trim messages and disable previous_response_id for conversation continuations.
212+
213+
Llama Stack rejects requests with both ``previous_response_id`` and
214+
``conversation``. When ``conversation`` is in ``extra_body`` and there's
215+
already a ModelResponse in the history (a continuation turn), we:
216+
217+
1. Trim messages to only those AFTER the last ModelResponse (new input only)
218+
2. Disable ``openai_previous_response_id`` so pydantic-ai won't resolve one
219+
220+
This means Llama Stack receives ``conversation`` (for persistence) plus only
221+
the new input items. Llama Stack reconstructs prior history from the
222+
conversation and appends the new input correctly.
223+
"""
224+
from pydantic_ai.messages import ModelResponse # noqa: PLC0415
225+
226+
if not model_settings or not isinstance(model_settings, dict):
227+
return messages, model_settings
228+
229+
extra_body = model_settings.get("extra_body")
230+
if not extra_body or "conversation" not in extra_body:
231+
return messages, model_settings
232+
233+
last_response_idx = None
234+
for i in range(len(messages) - 1, -1, -1):
235+
if isinstance(messages[i], ModelResponse) and messages[i].provider_response_id:
236+
last_response_idx = i
237+
break
238+
239+
if last_response_idx is None:
240+
return messages, model_settings
241+
242+
trimmed_messages = messages[last_response_idx + 1:]
243+
244+
new_settings = dict(model_settings)
245+
new_settings.pop("openai_previous_response_id", None)
246+
return trimmed_messages, cast(ModelSettings, new_settings)
247+
184248
@asynccontextmanager
185249
async def request_stream(
186250
self,

0 commit comments

Comments
 (0)