@@ -655,6 +655,9 @@ async def _query_stream(
655655 llm_response = LLMResponse ("assistant" , is_chunk = True )
656656
657657 state = ChatCompletionStreamState ()
658+ streamed_text_parts : list [str ] = []
659+ streamed_reasoning_parts : list [str ] = []
660+ latest_usage = None
658661
659662 async for chunk in stream :
660663 choice = chunk .choices [0 ] if chunk .choices else None
@@ -688,20 +691,24 @@ async def _query_stream(
688691 llm_response .completion_text = ""
689692 if reasoning is not None :
690693 llm_response .reasoning_content = reasoning
694+ streamed_reasoning_parts .append (reasoning )
691695 _y = True
692696 if delta and delta .content :
693697 # Don't strip streaming chunks to preserve spaces between words
694698 completion_text = self ._normalize_content (delta .content , strip = False )
699+ streamed_text_parts .append (completion_text )
695700 llm_response .result_chain = MessageChain (
696701 chain = [Comp .Plain (completion_text )],
697702 )
698703 _y = True
699704 if chunk .usage :
700705 llm_response .usage = self ._extract_usage (chunk .usage )
706+ latest_usage = llm_response .usage
701707 elif choice and (choice_usage := getattr (choice , "usage" , None )):
702708 # Workaround for some providers that only return usage in choices[].usage, e.g. MoonshotAI
703709 # See https://github.com/AstrBotDevs/AstrBot/issues/6614
704710 llm_response .usage = self ._extract_usage (choice_usage )
711+ latest_usage = llm_response .usage
705712 state .current_completion_snapshot .usage = choice_usage
706713 if _y :
707714 yield llm_response
@@ -712,8 +719,15 @@ async def _query_stream(
712719 yield llm_response
713720 except Exception as e :
714721 logger .error ("get_final_completion error: " + str (e ))
715- # 流式内容已通过 yield 发出,记录错误后正常结束即可
716- return
722+ if streamed_text_parts or streamed_reasoning_parts :
723+ yield LLMResponse (
724+ "assistant" ,
725+ completion_text = "" .join (streamed_text_parts ),
726+ reasoning_content = "" .join (streamed_reasoning_parts ) or None ,
727+ usage = latest_usage ,
728+ )
729+ return
730+ raise
717731
718732 def _extract_reasoning_content (
719733 self ,
0 commit comments