99from backend .adapter .cli_proxy import CLIProxy
1010from backend .core .config import resolve_model , settings
1111from backend .core .request_logging import new_request_id , request_context , update_request_context
12+ from backend .core .request_trace import log_test_prompt , prompt_tail
1213from backend .runtime import stream_presenter
1314from backend .runtime .execution import (
1415 build_tool_directive ,
1718 collect_completion_run_with_recovery ,
1819 evaluate_retry_directive ,
1920 request_max_attempts ,
21+ tool_directive_visible_text ,
2022)
2123from backend .services .auth_quota import resolve_auth_context
2224from backend .services .completion_bridge import force_fresh_chat_after_empty_response , is_empty_upstream_response
@@ -69,6 +71,7 @@ def __init__(self, *, msg_id: str, model_name: str, prompt: str):
6971 self .prompt = prompt
7072 self .pending_chunks : list [str ] = []
7173 self .answer_text_buffer : list [tuple [int , str ]] = []
74+ self .flushed_answer_text = ""
7275 self .block_index = 0
7376 self .current_block : dict [str , object ] = {"type" : None , "index" : None , "tool_call_id" : None }
7477 self .opened_tool_calls : set [str ] = set ()
@@ -142,11 +145,39 @@ def flush_answer_text(self) -> None:
142145 self .pending_chunks .append (
143146 stream_presenter .anthropic_content_block_delta (index , {"type" : "text_delta" , "text" : text_chunk })
144147 )
148+ self .flushed_answer_text += text_chunk
145149 self .answer_text_buffer = []
146150
147151 def clear_answer_text (self ) -> None :
148152 self .answer_text_buffer = []
149153
154+ def answer_text (self ) -> str :
155+ return "" .join (text_chunk for _ , text_chunk in self .answer_text_buffer )
156+
157+ def queued_answer_text (self ) -> str :
158+ return self .flushed_answer_text + self .answer_text ()
159+
160+ def buffer_missing_answer_tail (self , final_text : str ) -> None :
161+ if not final_text :
162+ return
163+ queued = self .queued_answer_text ()
164+ if queued == final_text :
165+ return
166+ if final_text .startswith (queued ):
167+ missing = final_text [len (queued ):]
168+ if missing :
169+ self .buffer_answer_text (missing )
170+ return
171+ if final_text .startswith (self .flushed_answer_text ):
172+ missing = final_text [len (self .flushed_answer_text ):]
173+ self .answer_text_buffer = []
174+ if missing :
175+ self .buffer_answer_text (missing )
176+ return
177+ if not self .flushed_answer_text :
178+ self .answer_text_buffer = []
179+ self .buffer_answer_text (final_text )
180+
150181
151182def _build_standard_request (req_data : dict ) -> StandardRequest :
152183 """浣跨敤 CLIProxy 杩涜鍗忚杞崲"""
@@ -196,7 +227,7 @@ def _visible_answer_text_length(*, directive, execution, stream_state: _Anthropi
196227 if directive .stop_reason == "tool_use" :
197228 return 0
198229 if stream_state is not None :
199- return sum ( len (text_chunk ) for _ , text_chunk in stream_state .answer_text_buffer )
230+ return len (stream_state .queued_answer_text () )
200231 return len (execution .state .answer_text )
201232
202233
@@ -319,15 +350,27 @@ async def generate():
319350 standard_request , effective_payload , model_name , qwen_model , prompt , msg_id = await prepare_locked_request (req_data )
320351 update_request_context (requested_model = model_name , resolved_model = qwen_model )
321352 tool_names = [t .get ('name' ) for t in standard_request .tools ]
353+ log_test_prompt (
354+ log ,
355+ stage = "anthropic_request" ,
356+ surface = "anthropic" ,
357+ model = qwen_model ,
358+ stream = standard_request .stream ,
359+ tools = tool_names ,
360+ prompt = prompt ,
361+ )
322362 log .info (
323- "[ANT] model=%s stream=%s tool_enabled=%s tools=%s mcp_tools=%s workspace=%s prompt_len=%s" ,
363+ "[ANT] model=%s stream=%s tool_enabled=%s tools=%s mcp_tools=%s workspace=%s prompt_len=%s prompt_tail=%r delete_expected=%s persistent_session=%s " ,
324364 qwen_model ,
325365 standard_request .stream ,
326366 standard_request .tool_enabled ,
327367 tool_names ,
328368 [name for name in tool_names if isinstance (name , str ) and name .startswith ("mcp__" )],
329369 standard_request .workspace_root or "-" ,
330370 len (prompt ),
371+ prompt_tail (prompt ),
372+ (getattr (standard_request , "chat_type" , "t2t" ) != "t2t" ) or not bool (getattr (standard_request , "persistent_session" , False )),
373+ bool (getattr (standard_request , "persistent_session" , False )),
331374 )
332375 history_messages = original_history_messages
333376 current_prompt = prompt
@@ -347,18 +390,11 @@ async def on_delta(evt, text_chunk, _):
347390 stream_state .buffer_answer_text (text_chunk )
348391 return
349392 if phase == "tool_call" :
350- extra = evt .get ("extra" , {}) or {}
351- tool_call_id = extra .get ("tool_call_id" )
352- if tool_call_id is None :
353- tool_call_id = f"tc_idx_{ extra .get ('index' , 0 )} "
354- tool_name = extra .get ("tool_name" )
355- if not tool_name :
356- return
357- stream_state .append_tool_delta (
358- tool_call_id = str (tool_call_id ),
359- tool_name = str (tool_name ),
360- partial_json = evt .get ("content" , "" ),
361- )
393+ # Buffering means no SSE chunk is sent until the
394+ # final directive is built. Emit tool blocks only
395+ # after ToolGuard has had a chance to suppress
396+ # redundant completed tool calls.
397+ return
362398
363399 execution = await collect_completion_run_with_recovery (
364400 client ,
@@ -411,15 +447,18 @@ async def on_delta(evt, text_chunk, _):
411447 stream_state .pending_chunks .append (_message_start_event (msg_id , model_name , current_prompt , execution .state .answer_text ))
412448
413449 directive = build_tool_directive (standard_request , execution .state , history_messages = history_messages )
450+ visible_text = tool_directive_visible_text (directive , execution .state .answer_text )
451+ if directive .stop_reason != "tool_use" :
452+ stream_state .buffer_missing_answer_tail (visible_text )
414453 if (
415454 directive .stop_reason != "tool_use"
416455 and not stream_state .answer_text_buffer
417- and execution . state . answer_text
456+ and visible_text
418457 ):
419458 # ToolSieve may hold short normal replies until stream end to
420459 # avoid leaking partial tool markup. If no live text delta was
421460 # emitted, replay the finalized visible answer here.
422- stream_state .buffer_answer_text (execution . state . answer_text )
461+ stream_state .buffer_answer_text (visible_text )
423462 visible_answer_length = _visible_answer_text_length (
424463 directive = directive ,
425464 execution = execution ,
@@ -501,15 +540,27 @@ async def on_delta(evt, text_chunk, _):
501540 standard_request , effective_payload , model_name , qwen_model , prompt , msg_id = await prepare_locked_request (req_data )
502541 update_request_context (requested_model = model_name , resolved_model = qwen_model )
503542 tool_names = [t .get ('name' ) for t in standard_request .tools ]
543+ log_test_prompt (
544+ log ,
545+ stage = "anthropic_request" ,
546+ surface = "anthropic" ,
547+ model = qwen_model ,
548+ stream = standard_request .stream ,
549+ tools = tool_names ,
550+ prompt = prompt ,
551+ )
504552 log .info (
505- "[ANT] model=%s stream=%s tool_enabled=%s tools=%s mcp_tools=%s workspace=%s prompt_len=%s" ,
553+ "[ANT] model=%s stream=%s tool_enabled=%s tools=%s mcp_tools=%s workspace=%s prompt_len=%s prompt_tail=%r delete_expected=%s persistent_session=%s " ,
506554 qwen_model ,
507555 standard_request .stream ,
508556 standard_request .tool_enabled ,
509557 tool_names ,
510558 [name for name in tool_names if isinstance (name , str ) and name .startswith ("mcp__" )],
511559 standard_request .workspace_root or "-" ,
512560 len (prompt ),
561+ prompt_tail (prompt ),
562+ (getattr (standard_request , "chat_type" , "t2t" ) != "t2t" ) or not bool (getattr (standard_request , "persistent_session" , False )),
563+ bool (getattr (standard_request , "persistent_session" , False )),
513564 )
514565 history_messages = original_history_messages
515566 current_prompt = prompt
@@ -552,17 +603,24 @@ async def on_delta(evt, text_chunk, _):
552603 raise RuntimeError ("empty upstream response after retries" )
553604
554605 directive = build_tool_directive (standard_request , execution .state , history_messages = history_messages )
606+ visible_text = tool_directive_visible_text (directive , execution .state .answer_text )
555607 _log_response_tool_blocks ("json_response" , directive .tool_blocks )
556608 content_blocks : list [dict ] = []
557609 if execution .state .reasoning_text :
558610 content_blocks .append ({"type" : "thinking" , "thinking" : execution .state .reasoning_text })
559611 content_blocks .extend (directive .tool_blocks )
612+ if (
613+ directive .stop_reason != "tool_use"
614+ and visible_text
615+ and not any (block .get ("type" ) == "text" for block in content_blocks )
616+ ):
617+ content_blocks .append ({"type" : "text" , "text" : visible_text })
560618
561619 await _add_used_tokens_for_prompt (
562620 users_db = users_db ,
563621 token = token ,
564622 prompt_text = current_prompt ,
565- answer_text_length = len (execution . state . answer_text ),
623+ answer_text_length = len (visible_text ),
566624 )
567625 assistant_message = build_anthropic_assistant_history_message (
568626 execution = execution ,
@@ -592,7 +650,7 @@ async def on_delta(evt, text_chunk, _):
592650 "content" : content_blocks ,
593651 "stop_reason" : directive .stop_reason ,
594652 "stop_sequence" : None ,
595- "usage" : _anthropic_usage (current_prompt , execution . state . answer_text ),
653+ "usage" : _anthropic_usage (current_prompt , visible_text ),
596654 }
597655 )
598656 except Exception as e :
0 commit comments