Skip to content

Commit 6782625

Browse files
committed
refactor(responses): add shared response helpers
Signed-off-by: Major Hayden <major@redhat.com>
1 parent 774964a commit 6782625

1 file changed

Lines changed: 231 additions & 2 deletions

File tree

src/app/endpoints/responses.py

Lines changed: 231 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44

55
import asyncio
66
import json
7-
from collections.abc import AsyncIterator
7+
from collections.abc import AsyncIterator, Sequence
88
from datetime import UTC, datetime
9-
from typing import Annotated, Any, Final, Optional, cast
9+
from typing import Annotated, Any, Final, NoReturn, Optional, cast
1010

1111
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Request
1212
from fastapi.responses import StreamingResponse
@@ -224,6 +224,235 @@ def _queue_responses_splunk_event( # pylint: disable=too-many-arguments,too-man
224224
background_tasks.add_task(send_splunk_event, event, sourcetype)
225225

226226

227+
def _queue_responses_error_event(
228+
error: Exception,
229+
api_params: ResponsesApiParams,
230+
context: ResponsesContext,
231+
) -> None:
232+
"""Queue fire-and-forget Splunk telemetry for a Responses API error.
233+
234+
Args:
235+
error: The backend exception being converted into an HTTP error.
236+
api_params: Responses API parameters for the failed request.
237+
context: Request-scoped Responses API context.
238+
"""
239+
_queue_responses_splunk_event(
240+
background_tasks=context.background_tasks,
241+
input_text=context.input_text,
242+
response_text=str(error),
243+
conversation_id=normalize_conversation_id(api_params.conversation),
244+
model=api_params.model,
245+
rh_identity_context=context.rh_identity_context,
246+
inference_time=(datetime.now(UTC) - context.started_at).total_seconds(),
247+
sourcetype="responses_error",
248+
fire_and_forget=True,
249+
user_agent=context.user_agent,
250+
)
251+
252+
253+
def _http_exception_for_response_api_error(
254+
error: Exception,
255+
api_params: ResponsesApiParams,
256+
) -> Optional[HTTPException]:
257+
"""Map known Responses API backend errors to HTTP exceptions.
258+
259+
Args:
260+
error: The backend exception raised while creating a response.
261+
api_params: Responses API parameters for the request.
262+
263+
Returns:
264+
HTTPException for known API failures, or None for unknown RuntimeError.
265+
"""
266+
if isinstance(error, RuntimeError):
267+
if not is_context_length_error(str(error)):
268+
return None
269+
error_response = PromptTooLongResponse(model=api_params.model)
270+
elif isinstance(error, APIConnectionError):
271+
error_response = ServiceUnavailableResponse(
272+
backend_name="Llama Stack",
273+
cause=str(error),
274+
)
275+
elif isinstance(error, (LLSApiStatusError, OpenAIAPIStatusError)):
276+
error_response = handle_known_apistatus_errors(error, api_params.model)
277+
else:
278+
return None
279+
return HTTPException(**error_response.model_dump())
280+
281+
282+
def _raise_response_api_http_exception(
283+
error: Exception,
284+
api_params: ResponsesApiParams,
285+
context: ResponsesContext,
286+
) -> NoReturn:
287+
"""Queue error telemetry and raise the mapped Responses API HTTP error.
288+
289+
Args:
290+
error: The backend exception raised while creating a response.
291+
api_params: Responses API parameters for the request.
292+
context: Request-scoped Responses API context.
293+
294+
Raises:
295+
Exception: Re-raises unknown RuntimeError instances unchanged.
296+
HTTPException: Raised for known Responses API failures.
297+
"""
298+
http_exception = _http_exception_for_response_api_error(error, api_params)
299+
if http_exception is None:
300+
raise error
301+
_queue_responses_error_event(error, api_params, context)
302+
raise http_exception from error
303+
304+
305+
async def _persist_blocked_response_turn(
306+
api_params: ResponsesApiParams,
307+
context: ResponsesContext,
308+
) -> None:
309+
"""Persist a shield-blocked refusal turn when response storage is enabled.
310+
311+
Args:
312+
api_params: Responses API parameters for the blocked request.
313+
context: Request-scoped Responses API context with moderation details.
314+
"""
315+
if api_params.store:
316+
moderation_result = cast(ShieldModerationBlocked, context.moderation_result)
317+
await append_turn_items_to_conversation(
318+
client=context.client,
319+
conversation_id=api_params.conversation,
320+
user_input=api_params.input,
321+
llm_output=[moderation_result.refusal_response],
322+
)
323+
324+
325+
def _queue_blocked_response_event(
326+
api_params: ResponsesApiParams,
327+
context: ResponsesContext,
328+
response_text: str,
329+
) -> None:
330+
"""Queue Splunk telemetry for a shield-blocked Responses API request.
331+
332+
Args:
333+
api_params: Responses API parameters for the blocked request.
334+
context: Request-scoped Responses API context.
335+
response_text: Refusal text sent to the client.
336+
"""
337+
_queue_responses_splunk_event(
338+
background_tasks=context.background_tasks,
339+
input_text=context.input_text,
340+
response_text=response_text,
341+
conversation_id=normalize_conversation_id(api_params.conversation),
342+
model=api_params.model,
343+
rh_identity_context=context.rh_identity_context,
344+
inference_time=(datetime.now(UTC) - context.started_at).total_seconds(),
345+
sourcetype="responses_shield_blocked",
346+
user_agent=context.user_agent,
347+
)
348+
349+
350+
async def _append_previous_response_turn(
351+
api_params: ResponsesApiParams,
352+
context: ResponsesContext,
353+
output: Sequence[Any],
354+
) -> None:
355+
"""Append response output when continuing from a previous response id.
356+
357+
Args:
358+
api_params: Responses API parameters containing conversation details.
359+
context: Request-scoped Responses API context.
360+
output: Final output items from the Responses API object.
361+
"""
362+
if api_params.store and api_params.previous_response_id:
363+
await append_turn_items_to_conversation(
364+
context.client,
365+
api_params.conversation,
366+
api_params.input,
367+
output,
368+
)
369+
370+
371+
async def _maybe_get_topic_summary(
372+
api_params: ResponsesApiParams,
373+
context: ResponsesContext,
374+
) -> Optional[str]:
375+
"""Generate a topic summary when requested for the current response.
376+
377+
Args:
378+
api_params: Responses API parameters containing the selected model.
379+
context: Request-scoped Responses API context.
380+
381+
Returns:
382+
Generated topic summary, or None when topic summaries are disabled.
383+
"""
384+
if not context.generate_topic_summary:
385+
return None
386+
logger.debug("Generating topic summary for new conversation")
387+
return await get_topic_summary(context.input_text, context.client, api_params.model)
388+
389+
390+
def _store_response_query_results(
391+
api_params: ResponsesApiParams,
392+
context: ResponsesContext,
393+
turn_summary: TurnSummary,
394+
completed_at: datetime,
395+
topic_summary: Optional[str],
396+
) -> None:
397+
"""Persist Responses API query results when request storage is enabled.
398+
399+
Args:
400+
api_params: Responses API parameters containing conversation details.
401+
context: Request-scoped Responses API context.
402+
turn_summary: Summary of the completed model turn.
403+
completed_at: Time when response handling completed.
404+
topic_summary: Optional generated topic summary for the conversation.
405+
"""
406+
if not api_params.store:
407+
return
408+
user_id, _, skip_userid_check, _ = context.auth
409+
store_query_results(
410+
user_id=user_id,
411+
conversation_id=normalize_conversation_id(api_params.conversation),
412+
model=api_params.model,
413+
started_at=context.started_at.strftime("%Y-%m-%dT%H:%M:%SZ"),
414+
completed_at=completed_at.strftime("%Y-%m-%dT%H:%M:%SZ"),
415+
summary=turn_summary,
416+
query=context.input_text,
417+
attachments=[],
418+
skip_userid_check=skip_userid_check,
419+
topic_summary=topic_summary,
420+
)
421+
422+
423+
def _queue_completed_response_event(
424+
api_params: ResponsesApiParams,
425+
context: ResponsesContext,
426+
turn_summary: TurnSummary,
427+
completed_at: datetime,
428+
response_text: str,
429+
) -> None:
430+
"""Queue Splunk telemetry for a completed Responses API request.
431+
432+
Args:
433+
api_params: Responses API parameters for the completed request.
434+
context: Request-scoped Responses API context.
435+
turn_summary: Summary containing token usage for telemetry.
436+
completed_at: Time when response handling completed.
437+
response_text: Final text sent to the client.
438+
"""
439+
if context.moderation_result.decision != "passed":
440+
return
441+
_queue_responses_splunk_event(
442+
background_tasks=context.background_tasks,
443+
input_text=context.input_text,
444+
response_text=response_text,
445+
conversation_id=normalize_conversation_id(api_params.conversation),
446+
model=api_params.model,
447+
rh_identity_context=context.rh_identity_context,
448+
inference_time=(completed_at - context.started_at).total_seconds(),
449+
sourcetype="responses_completed",
450+
input_tokens=turn_summary.token_usage.input_tokens,
451+
output_tokens=turn_summary.token_usage.output_tokens,
452+
user_agent=context.user_agent,
453+
)
454+
455+
227456
@router.post(
228457
"/responses",
229458
responses=responses_response,

0 commit comments

Comments
 (0)