AstrBotDevs · zouyonghe · May 27, 2026 · gemini-code-assist · May 27, 2026
diff --git a/astrbot/core/provider/sources/openai_source.py b/astrbot/core/provider/sources/openai_source.py
@@ -655,6 +655,9 @@ async def _query_stream(
         llm_response = LLMResponse("assistant", is_chunk=True)
 
         state = ChatCompletionStreamState()
+        streamed_text_parts: list[str] = []
+        streamed_reasoning_parts: list[str] = []
+        latest_usage = None
 
         async for chunk in stream:
             choice = chunk.choices[0] if chunk.choices else None
@@ -688,20 +691,24 @@ async def _query_stream(
             llm_response.completion_text = ""
             if reasoning is not None:
                 llm_response.reasoning_content = reasoning
+                streamed_reasoning_parts.append(reasoning)
                 _y = True
             if delta and delta.content:
                 # Don't strip streaming chunks to preserve spaces between words
                 completion_text = self._normalize_content(delta.content, strip=False)
+                streamed_text_parts.append(completion_text)
                 llm_response.result_chain = MessageChain(
                     chain=[Comp.Plain(completion_text)],
                 )
                 _y = True
             if chunk.usage:
                 llm_response.usage = self._extract_usage(chunk.usage)
+                latest_usage = llm_response.usage
             elif choice and (choice_usage := getattr(choice, "usage", None)):
                 # Workaround for some providers that only return usage in choices[].usage, e.g. MoonshotAI
                 # See https://github.com/AstrBotDevs/AstrBot/issues/6614
                 llm_response.usage = self._extract_usage(choice_usage)
+                latest_usage = llm_response.usage
                 state.current_completion_snapshot.usage = choice_usage
             if _y:
                 yield llm_response
@@ -712,8 +719,15 @@ async def _query_stream(
             yield llm_response
         except Exception as e:
             logger.error("get_final_completion error: " + str(e))
-            # 流式内容已通过 yield 发出，记录错误后正常结束即可
-            return
+            if streamed_text_parts or streamed_reasoning_parts:
+                yield LLMResponse(
+                    "assistant",
+                    completion_text="".join(streamed_text_parts),
+                    reasoning_content="".join(streamed_reasoning_parts) or None,
+                    usage=latest_usage,
+                )
-                yield LLMResponse(
-                    "assistant",
-                    completion_text="".join(streamed_text_parts),
-                    reasoning_content="".join(streamed_reasoning_parts) or None,
-                    usage=latest_usage,
-                )
+                yield LLMResponse(
+                    "assistant",
+                    result_chain=MessageChain().message("".join(streamed_text_parts)) if streamed_text_parts else None,
+                    reasoning_content="".join(streamed_reasoning_parts) or None,
+                    usage=latest_usage,
+                )
-                yield LLMResponse(
-                    "assistant",
-                    completion_text="".join(streamed_text_parts),
-                    reasoning_content="".join(streamed_reasoning_parts) or None,
-                    usage=latest_usage,
-                )
+                yield LLMResponse(
+                    "assistant",
+                    result_chain=MessageChain().message("".join(streamed_text_parts)) if streamed_text_parts else None,
+                    reasoning_content="".join(streamed_reasoning_parts) or None,
+                    usage=latest_usage,
+                )
+                return
+            raise
 
     def _extract_reasoning_content(
         self,

diff --git a/tests/test_openai_source.py b/tests/test_openai_source.py
@@ -1325,6 +1325,65 @@ async def fake_create(**kwargs):
         await provider.terminate()
 
 
+@pytest.mark.asyncio
+async def test_query_stream_yields_final_response_when_final_completion_parse_fails(
+    monkeypatch,
+):
+    provider = _make_provider()
+    try:
+        chunks = [
+            ChatCompletionChunk.model_validate(
+                {
+                    "id": "chatcmpl-stream",
+                    "object": "chat.completion.chunk",
+                    "created": 0,
+                    "model": "gpt-4o-mini",
+                    "choices": [
+                        {
+                            "index": 0,
+                            "delta": {
+                                "role": "assistant",
+                                "content": "hello",
+                            },
+                            "finish_reason": None,
+                        }
+                    ],
+                }
+            )
+        ]
+
+        async def fake_stream():
+            for chunk in chunks:
+                yield chunk
+
+        async def fake_create(**kwargs):
+            return fake_stream()
+
+        async def fake_parse_completion(completion, tools):
+            raise EmptyModelOutputError("final completion was empty")
+
+        monkeypatch.setattr(provider.client.chat.completions, "create", fake_create)
+        monkeypatch.setattr(provider, "_parse_openai_completion", fake_parse_completion)
+
+        responses = [
+            response
+            async for response in provider._query_stream(
+                payloads={
+                    "model": "gpt-4o-mini",
+                    "messages": [{"role": "user", "content": "hello"}],
+                },
+                tools=None,
+            )
+        ]
+
+        assert len(responses) == 2
+        assert responses[0].is_chunk
+        assert not responses[-1].is_chunk
+        assert responses[-1].completion_text == "hello"
+    finally:
+        await provider.terminate()
+
+
 @pytest.mark.asyncio
 async def test_query_filters_empty_assistant_message_without_tool_calls(monkeypatch):
     """Test that empty assistant messages without tool_calls are filtered out."""