4646from configuration import configuration
4747from constants import (
4848 ENDPOINT_PATH_STREAMING_QUERY ,
49- INTERRUPTED_RESPONSE_MESSAGE ,
5049 LLM_TOKEN_EVENT ,
5150 LLM_TOOL_CALL_EVENT ,
5251 LLM_TOOL_RESULT_EVENT ,
122121 validate_shield_ids_override ,
123122)
124123from utils .stream_interrupts import (
124+ build_interrupted_response ,
125125 deregister_stream ,
126126 persist_interrupted_turn ,
127127 register_interrupt_callback ,
@@ -634,16 +634,22 @@ async def generate_response( # pylint: disable=too-many-arguments,too-many-posi
634634 current_task = asyncio .current_task ()
635635 if current_task is not None :
636636 current_task .uncancel ()
637+ full_text , suffix = build_interrupted_response (turn_summary .partial_tokens )
637638 if not persist_guard [0 ]:
638639 persist_guard [0 ] = True
639- turn_summary .llm_response = INTERRUPTED_RESPONSE_MESSAGE
640+ turn_summary .llm_response = full_text
640641 await persist_interrupted_turn (
641642 context ,
642643 responses_params ,
643644 turn_summary ,
644645 _background_topic_summary_tasks ,
645646 original_input ,
646647 )
648+ yield stream_event (
649+ {"id" : turn_summary .next_chunk_id , "token" : suffix },
650+ LLM_TOKEN_EVENT ,
651+ context .query_request .media_type or MEDIA_TYPE_JSON ,
652+ )
647653 yield stream_interrupted_event (context .request_id )
648654 finally :
649655 deregister_stream (context .request_id )
@@ -765,15 +771,17 @@ async def response_generator( # pylint: disable=too-many-branches,too-many-stat
765771
766772 # Content part started - emit an empty token to kick off UI streaming
767773 if event_type == "response.content_part.added" :
774+ event_id = chunk_id
775+ chunk_id += 1
776+ turn_summary .next_chunk_id = chunk_id
768777 yield stream_event (
769778 {
770- "id" : chunk_id ,
779+ "id" : event_id ,
771780 "token" : "" ,
772781 },
773782 LLM_TOKEN_EVENT ,
774783 media_type ,
775784 )
776- chunk_id += 1
777785
778786 # Store MCP call item info for later lookup when arguments.done event occurs
779787 elif event_type == "response.output_item.added" :
@@ -789,15 +797,18 @@ async def response_generator( # pylint: disable=too-many-branches,too-many-stat
789797 elif event_type == "response.output_text.delta" :
790798 delta_chunk = cast (TextDeltaChunk , chunk )
791799 text_parts .append (delta_chunk .delta )
800+ turn_summary .partial_tokens .append (delta_chunk .delta )
801+ event_id = chunk_id
802+ chunk_id += 1
803+ turn_summary .next_chunk_id = chunk_id
792804 yield stream_event (
793805 {
794- "id" : chunk_id ,
806+ "id" : event_id ,
795807 "token" : delta_chunk .delta ,
796808 },
797809 LLM_TOKEN_EVENT ,
798810 media_type ,
799811 )
800- chunk_id += 1
801812
802813 # Final text of the output (capture, but emit at response.completed)
803814 elif event_type == "response.output_text.done" :
@@ -877,15 +888,17 @@ async def response_generator( # pylint: disable=too-many-branches,too-many-stat
877888 # (LCORE-1572), so the persisted turn keeps non-text output items
878889 # rather than being flattened to the response text.
879890 turn_summary .output_items = list (latest_response_object .output or [])
891+ event_id = chunk_id
892+ chunk_id += 1
893+ turn_summary .next_chunk_id = chunk_id
880894 yield stream_event (
881895 {
882- "id" : chunk_id ,
896+ "id" : event_id ,
883897 "token" : turn_summary .llm_response ,
884898 },
885899 LLM_TURN_COMPLETE_EVENT ,
886900 media_type ,
887901 )
888- chunk_id += 1
889902
890903 # Incomplete or failed response - emit error
891904 elif event_type in ("response.incomplete" , "response.failed" ):
0 commit comments