lightspeed-core
diff --git a/‎src/app/endpoints/streaming_query.py‎
Lines changed: 21 additions & 8 deletions b/‎src/app/endpoints/streaming_query.py‎
Lines changed: 21 additions & 8 deletions
diff --git a/‎src/constants.py‎
Lines changed: 1 addition & 1 deletion b/‎src/constants.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/models/common/turn_summary.py‎
Lines changed: 10 additions & 0 deletions b/‎src/models/common/turn_summary.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎src/utils/agents/streaming.py‎
Lines changed: 13 additions & 2 deletions b/‎src/utils/agents/streaming.py‎
Lines changed: 13 additions & 2 deletions
@@ -46,7 +46,6 @@
 from configuration import configuration
 from constants import (
     ENDPOINT_PATH_STREAMING_QUERY,
-    INTERRUPTED_RESPONSE_MESSAGE,
     LLM_TOKEN_EVENT,
     LLM_TOOL_CALL_EVENT,
     LLM_TOOL_RESULT_EVENT,
@@ -122,6 +121,7 @@
     validate_shield_ids_override,
 )
 from utils.stream_interrupts import (
+    build_interrupted_response,
     deregister_stream,
     persist_interrupted_turn,
     register_interrupt_callback,
@@ -634,16 +634,22 @@ async def generate_response(  # pylint: disable=too-many-arguments,too-many-posi
         current_task = asyncio.current_task()
         if current_task is not None:
             current_task.uncancel()
+        full_text, suffix = build_interrupted_response(turn_summary.partial_tokens)
         if not persist_guard[0]:
             persist_guard[0] = True
-            turn_summary.llm_response = INTERRUPTED_RESPONSE_MESSAGE
+            turn_summary.llm_response = full_text
             await persist_interrupted_turn(
                 context,
                 responses_params,
                 turn_summary,
                 _background_topic_summary_tasks,
                 original_input,
             )
+        yield stream_event(
+            {"id": turn_summary.next_chunk_id, "token": suffix},
+            LLM_TOKEN_EVENT,
+            context.query_request.media_type or MEDIA_TYPE_JSON,
+        )
         yield stream_interrupted_event(context.request_id)
     finally:
         deregister_stream(context.request_id)
@@ -765,15 +771,17 @@ async def response_generator(  # pylint: disable=too-many-branches,too-many-stat
 
         # Content part started - emit an empty token to kick off UI streaming
         if event_type == "response.content_part.added":
+            event_id = chunk_id
+            chunk_id += 1
+            turn_summary.next_chunk_id = chunk_id
             yield stream_event(
                 {
-                    "id": chunk_id,
+                    "id": event_id,
                     "token": "",
                 },
                 LLM_TOKEN_EVENT,
                 media_type,
             )
-            chunk_id += 1
 
         # Store MCP call item info for later lookup when arguments.done event occurs
         elif event_type == "response.output_item.added":
@@ -789,15 +797,18 @@ async def response_generator(  # pylint: disable=too-many-branches,too-many-stat
         elif event_type == "response.output_text.delta":
             delta_chunk = cast(TextDeltaChunk, chunk)
             text_parts.append(delta_chunk.delta)
+            turn_summary.partial_tokens.append(delta_chunk.delta)
+            event_id = chunk_id
+            chunk_id += 1
+            turn_summary.next_chunk_id = chunk_id
             yield stream_event(
                 {
-                    "id": chunk_id,
+                    "id": event_id,
                     "token": delta_chunk.delta,
                 },
                 LLM_TOKEN_EVENT,
                 media_type,
             )
-            chunk_id += 1
 
         # Final text of the output (capture, but emit at response.completed)
         elif event_type == "response.output_text.done":
@@ -877,15 +888,17 @@ async def response_generator(  # pylint: disable=too-many-branches,too-many-stat
             # (LCORE-1572), so the persisted turn keeps non-text output items
             # rather than being flattened to the response text.
             turn_summary.output_items = list(latest_response_object.output or [])
+            event_id = chunk_id
+            chunk_id += 1
+            turn_summary.next_chunk_id = chunk_id
             yield stream_event(
                 {
-                    "id": chunk_id,
+                    "id": event_id,
                     "token": turn_summary.llm_response,
                 },
                 LLM_TURN_COMPLETE_EVENT,
                 media_type,
             )
-            chunk_id += 1
 
         # Incomplete or failed response - emit error
         elif event_type in ("response.incomplete", "response.failed"):
 
@@ -25,7 +25,7 @@
 UNABLE_TO_PROCESS_RESPONSE: Final[str] = "Unable to process this request"
 
 # Response stored in the conversation when the user interrupts a streaming request
-INTERRUPTED_RESPONSE_MESSAGE: Final[str] = "You interrupted this request."
+INTERRUPTED_RESPONSE_MESSAGE: Final[str] = "Response stopped by the user."
 
 # Max seconds to wait for topic summary in background task after interrupt persist.
 TOPIC_SUMMARY_INTERRUPT_TIMEOUT_SECONDS: Final[float] = 30.0
 
@@ -114,6 +114,16 @@ class TurnSummary(BaseModel):
         description="Structured response output items, captured for compacted-mode "
         "turn persistence (LCORE-1572). Empty on the non-compacted path.",
     )
+    partial_tokens: list[str] = Field(
+        default_factory=list,
+        description="Accumulated text deltas during streaming, used to reconstruct "
+        "partial content on interruption.",
+    )
+    next_chunk_id: int = Field(
+        default=0,
+        description="Next monotonic SSE chunk index, kept in sync with the inner "
+        "generator so the interrupt handler can emit a sequentially valid id.",
+    )
 
 
 class ToolInfoSummary(BaseModel):
 
@@ -25,7 +25,7 @@
 )
 
 from configuration import configuration
-from constants import INTERRUPTED_RESPONSE_MESSAGE, MEDIA_TYPE_JSON
+from constants import MEDIA_TYPE_JSON
 from log import get_logger
 from models.common.agents import (
     AgentTurnAccumulator,
@@ -65,6 +65,7 @@
     maybe_get_topic_summary,
 )
 from utils.stream_interrupts import (
+    build_interrupted_response,
     deregister_stream,
     persist_interrupted_turn,
     register_interrupt_callback,
@@ -197,16 +198,23 @@ async def generate_agent_response(
         current_task = asyncio.current_task()
         if current_task is not None:
             current_task.uncancel()
+        full_text, suffix = build_interrupted_response(turn_summary.partial_tokens)
         if not persist_guard[0]:
             persist_guard[0] = True
-            turn_summary.llm_response = INTERRUPTED_RESPONSE_MESSAGE
+            turn_summary.llm_response = full_text
             await persist_interrupted_turn(
                 context,
                 responses_params,
                 turn_summary,
                 background_topic_summary_tasks,
                 original_input,
             )
+        yield serialize_event(
+            TokenStreamPayload.create(
+                chunk_id=turn_summary.next_chunk_id, token=suffix
+            ),
+            media_type,
+        )
         yield serialize_event(
             InterruptedStreamPayload.create(request_id=context.request_id),
             media_type,
@@ -347,11 +355,13 @@ def _process_token(
         Token stream payload containing the emitted token chunk.
     """
     state.text_parts.append(text)
+    state.turn_summary.partial_tokens.append(text)
     payload = TokenStreamPayload.create(
         chunk_id=state.chunk_id,
         token=text,
     )
     state.chunk_id += 1
+    state.turn_summary.next_chunk_id = state.chunk_id
     return payload
 
 
@@ -402,6 +412,7 @@ def _(
         token=final_text,
     )
     state.chunk_id += 1
+    state.turn_summary.next_chunk_id = state.chunk_id
     return payload