Skip to content

Commit a4c237e

Browse files
committed
fix prompt too long
1 parent 3b7855a commit a4c237e

2 files changed

Lines changed: 27 additions & 11 deletions

File tree

src/utils/agents/streaming.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -221,12 +221,29 @@ async def generate_agent_response(
221221
context.query_request.conversation_id is None
222222
and bool(context.query_request.generate_topic_summary)
223223
)
224-
topic_summary = await maybe_get_topic_summary(
225-
generate_topic_summary=should_generate_topic_summary,
226-
input_text=context.query_request.query,
227-
client=context.client,
228-
model_id=responses_params.model,
229-
)
224+
try:
225+
topic_summary = await maybe_get_topic_summary(
226+
generate_topic_summary=should_generate_topic_summary,
227+
input_text=context.query_request.query,
228+
client=context.client,
229+
model_id=responses_params.model,
230+
)
231+
except HTTPException as exc:
232+
logger.warning(
233+
"Topic summary failed for request %s: %s",
234+
context.request_id,
235+
exc.detail,
236+
)
237+
detail: dict[str, str] = exc.detail if isinstance(exc.detail, dict) else {}
238+
yield serialize_event(
239+
ErrorStreamPayload.create(
240+
status_code=exc.status_code,
241+
response=detail.get("response", "Internal server error"),
242+
cause=detail.get("cause", str(exc.detail)),
243+
),
244+
media_type,
245+
)
246+
return
230247
logger.info("Consuming tokens")
231248
consume_query_tokens(
232249
user_id=context.user_id,

src/utils/query.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -574,10 +574,9 @@ def handle_known_apistatus_errors(
574574
Returns:
575575
AbstractErrorResponse: The error response model.
576576
"""
577-
if error.status_code == 400:
578-
error_message = getattr(error, "message", str(error))
579-
if is_context_length_error(error_message):
580-
return PromptTooLongResponse(model=model_id)
581-
elif error.status_code == 429:
577+
error_message = getattr(error, "message", str(error))
578+
if is_context_length_error(error_message):
579+
return PromptTooLongResponse(model=model_id)
580+
if error.status_code == 429:
582581
return QuotaExceededResponse.model(model_id)
583582
return InternalServerErrorResponse.generic()

0 commit comments

Comments
 (0)