Skip to content

Commit c62a6f4

Browse files
authored
Merge pull request #70 from YuJunZhiXue/trae
Trae
2 parents 5245e6a + ec7866e commit c62a6f4

19 files changed

Lines changed: 1597 additions & 93 deletions

.env.example

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ REQUEST_JITTER_MIN_MS=0
99
REQUEST_JITTER_MAX_MS=0
1010
RATE_LIMIT_BASE_COOLDOWN=600
1111
RATE_LIMIT_MAX_COOLDOWN=3600
12+
CHAT_ID_PREWARM_TARGET_PER_ACCOUNT=5
13+
CHAT_ID_PREWARM_TTL_SECONDS=120
14+
TRACE_RESPONSE_FINGERPRINTS=0
15+
TRACE_RESPONSE_TAIL_CHARS=160
1216
ACCOUNTS_FILE=/workspace/data/accounts.json
1317
USERS_FILE=/workspace/data/users.json
1418
CAPTURES_FILE=/workspace/data/captures.json

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# qwen2API Enterprise Gateway
22

3+
## 加入群聊
4+
5+
Telegram 群聊: [https://t.me/qwen2api](https://t.me/qwen2api)
6+
37
[![Stars](https://img.shields.io/github/stars/YuJunZhiXue/qwen2API?style=flat-square)](https://github.com/YuJunZhiXue/qwen2API/stargazers)
48
[![Forks](https://img.shields.io/github/forks/YuJunZhiXue/qwen2API?style=flat-square)](https://github.com/YuJunZhiXue/qwen2API/network/members)
59
[![Release](https://img.shields.io/github/v/release/YuJunZhiXue/qwen2API?style=flat-square)](https://github.com/YuJunZhiXue/qwen2API/releases)

backend/adapter/cli_proxy.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,8 @@ def to_anthropic_response(execution, standard_request: StandardRequest, msg_id:
225225
Returns:
226226
dict: Claude 格式的响应
227227
"""
228+
from backend.runtime.execution import tool_directive_visible_text
229+
228230
content_blocks: list[dict] = []
229231

230232
# 添加思考内容
@@ -233,6 +235,13 @@ def to_anthropic_response(execution, standard_request: StandardRequest, msg_id:
233235

234236
# 添加工具调用块
235237
content_blocks.extend(directive.tool_blocks)
238+
visible_text = tool_directive_visible_text(directive, execution.state.answer_text)
239+
if (
240+
directive.stop_reason != "tool_use"
241+
and visible_text
242+
and not any(block.get("type") == "text" for block in content_blocks)
243+
):
244+
content_blocks.append({"type": "text", "text": visible_text})
236245

237246
return {
238247
"id": msg_id,
@@ -244,7 +253,7 @@ def to_anthropic_response(execution, standard_request: StandardRequest, msg_id:
244253
"stop_sequence": None,
245254
"usage": {
246255
"input_tokens": len(standard_request.prompt),
247-
"output_tokens": len(execution.state.answer_text),
256+
"output_tokens": len(visible_text),
248257
},
249258
}
250259

backend/api/anthropic.py

Lines changed: 77 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from backend.adapter.cli_proxy import CLIProxy
1010
from backend.core.config import resolve_model, settings
1111
from backend.core.request_logging import new_request_id, request_context, update_request_context
12+
from backend.core.request_trace import log_test_prompt, prompt_tail
1213
from backend.runtime import stream_presenter
1314
from backend.runtime.execution import (
1415
build_tool_directive,
@@ -17,6 +18,7 @@
1718
collect_completion_run_with_recovery,
1819
evaluate_retry_directive,
1920
request_max_attempts,
21+
tool_directive_visible_text,
2022
)
2123
from backend.services.auth_quota import resolve_auth_context
2224
from backend.services.completion_bridge import force_fresh_chat_after_empty_response, is_empty_upstream_response
@@ -69,6 +71,7 @@ def __init__(self, *, msg_id: str, model_name: str, prompt: str):
6971
self.prompt = prompt
7072
self.pending_chunks: list[str] = []
7173
self.answer_text_buffer: list[tuple[int, str]] = []
74+
self.flushed_answer_text = ""
7275
self.block_index = 0
7376
self.current_block: dict[str, object] = {"type": None, "index": None, "tool_call_id": None}
7477
self.opened_tool_calls: set[str] = set()
@@ -142,11 +145,39 @@ def flush_answer_text(self) -> None:
142145
self.pending_chunks.append(
143146
stream_presenter.anthropic_content_block_delta(index, {"type": "text_delta", "text": text_chunk})
144147
)
148+
self.flushed_answer_text += text_chunk
145149
self.answer_text_buffer = []
146150

147151
def clear_answer_text(self) -> None:
148152
self.answer_text_buffer = []
149153

154+
def answer_text(self) -> str:
155+
return "".join(text_chunk for _, text_chunk in self.answer_text_buffer)
156+
157+
def queued_answer_text(self) -> str:
158+
return self.flushed_answer_text + self.answer_text()
159+
160+
def buffer_missing_answer_tail(self, final_text: str) -> None:
161+
if not final_text:
162+
return
163+
queued = self.queued_answer_text()
164+
if queued == final_text:
165+
return
166+
if final_text.startswith(queued):
167+
missing = final_text[len(queued):]
168+
if missing:
169+
self.buffer_answer_text(missing)
170+
return
171+
if final_text.startswith(self.flushed_answer_text):
172+
missing = final_text[len(self.flushed_answer_text):]
173+
self.answer_text_buffer = []
174+
if missing:
175+
self.buffer_answer_text(missing)
176+
return
177+
if not self.flushed_answer_text:
178+
self.answer_text_buffer = []
179+
self.buffer_answer_text(final_text)
180+
150181

151182
def _build_standard_request(req_data: dict) -> StandardRequest:
152183
"""浣跨敤 CLIProxy 杩涜鍗忚杞崲"""
@@ -196,7 +227,7 @@ def _visible_answer_text_length(*, directive, execution, stream_state: _Anthropi
196227
if directive.stop_reason == "tool_use":
197228
return 0
198229
if stream_state is not None:
199-
return sum(len(text_chunk) for _, text_chunk in stream_state.answer_text_buffer)
230+
return len(stream_state.queued_answer_text())
200231
return len(execution.state.answer_text)
201232

202233

@@ -319,15 +350,27 @@ async def generate():
319350
standard_request, effective_payload, model_name, qwen_model, prompt, msg_id = await prepare_locked_request(req_data)
320351
update_request_context(requested_model=model_name, resolved_model=qwen_model)
321352
tool_names = [t.get('name') for t in standard_request.tools]
353+
log_test_prompt(
354+
log,
355+
stage="anthropic_request",
356+
surface="anthropic",
357+
model=qwen_model,
358+
stream=standard_request.stream,
359+
tools=tool_names,
360+
prompt=prompt,
361+
)
322362
log.info(
323-
"[ANT] model=%s stream=%s tool_enabled=%s tools=%s mcp_tools=%s workspace=%s prompt_len=%s",
363+
"[ANT] model=%s stream=%s tool_enabled=%s tools=%s mcp_tools=%s workspace=%s prompt_len=%s prompt_tail=%r delete_expected=%s persistent_session=%s",
324364
qwen_model,
325365
standard_request.stream,
326366
standard_request.tool_enabled,
327367
tool_names,
328368
[name for name in tool_names if isinstance(name, str) and name.startswith("mcp__")],
329369
standard_request.workspace_root or "-",
330370
len(prompt),
371+
prompt_tail(prompt),
372+
(getattr(standard_request, "chat_type", "t2t") != "t2t") or not bool(getattr(standard_request, "persistent_session", False)),
373+
bool(getattr(standard_request, "persistent_session", False)),
331374
)
332375
history_messages = original_history_messages
333376
current_prompt = prompt
@@ -347,18 +390,11 @@ async def on_delta(evt, text_chunk, _):
347390
stream_state.buffer_answer_text(text_chunk)
348391
return
349392
if phase == "tool_call":
350-
extra = evt.get("extra", {}) or {}
351-
tool_call_id = extra.get("tool_call_id")
352-
if tool_call_id is None:
353-
tool_call_id = f"tc_idx_{extra.get('index', 0)}"
354-
tool_name = extra.get("tool_name")
355-
if not tool_name:
356-
return
357-
stream_state.append_tool_delta(
358-
tool_call_id=str(tool_call_id),
359-
tool_name=str(tool_name),
360-
partial_json=evt.get("content", ""),
361-
)
393+
# Buffering means no SSE chunk is sent until the
394+
# final directive is built. Emit tool blocks only
395+
# after ToolGuard has had a chance to suppress
396+
# redundant completed tool calls.
397+
return
362398

363399
execution = await collect_completion_run_with_recovery(
364400
client,
@@ -411,15 +447,18 @@ async def on_delta(evt, text_chunk, _):
411447
stream_state.pending_chunks.append(_message_start_event(msg_id, model_name, current_prompt, execution.state.answer_text))
412448

413449
directive = build_tool_directive(standard_request, execution.state, history_messages=history_messages)
450+
visible_text = tool_directive_visible_text(directive, execution.state.answer_text)
451+
if directive.stop_reason != "tool_use":
452+
stream_state.buffer_missing_answer_tail(visible_text)
414453
if (
415454
directive.stop_reason != "tool_use"
416455
and not stream_state.answer_text_buffer
417-
and execution.state.answer_text
456+
and visible_text
418457
):
419458
# ToolSieve may hold short normal replies until stream end to
420459
# avoid leaking partial tool markup. If no live text delta was
421460
# emitted, replay the finalized visible answer here.
422-
stream_state.buffer_answer_text(execution.state.answer_text)
461+
stream_state.buffer_answer_text(visible_text)
423462
visible_answer_length = _visible_answer_text_length(
424463
directive=directive,
425464
execution=execution,
@@ -501,15 +540,27 @@ async def on_delta(evt, text_chunk, _):
501540
standard_request, effective_payload, model_name, qwen_model, prompt, msg_id = await prepare_locked_request(req_data)
502541
update_request_context(requested_model=model_name, resolved_model=qwen_model)
503542
tool_names = [t.get('name') for t in standard_request.tools]
543+
log_test_prompt(
544+
log,
545+
stage="anthropic_request",
546+
surface="anthropic",
547+
model=qwen_model,
548+
stream=standard_request.stream,
549+
tools=tool_names,
550+
prompt=prompt,
551+
)
504552
log.info(
505-
"[ANT] model=%s stream=%s tool_enabled=%s tools=%s mcp_tools=%s workspace=%s prompt_len=%s",
553+
"[ANT] model=%s stream=%s tool_enabled=%s tools=%s mcp_tools=%s workspace=%s prompt_len=%s prompt_tail=%r delete_expected=%s persistent_session=%s",
506554
qwen_model,
507555
standard_request.stream,
508556
standard_request.tool_enabled,
509557
tool_names,
510558
[name for name in tool_names if isinstance(name, str) and name.startswith("mcp__")],
511559
standard_request.workspace_root or "-",
512560
len(prompt),
561+
prompt_tail(prompt),
562+
(getattr(standard_request, "chat_type", "t2t") != "t2t") or not bool(getattr(standard_request, "persistent_session", False)),
563+
bool(getattr(standard_request, "persistent_session", False)),
513564
)
514565
history_messages = original_history_messages
515566
current_prompt = prompt
@@ -552,17 +603,24 @@ async def on_delta(evt, text_chunk, _):
552603
raise RuntimeError("empty upstream response after retries")
553604

554605
directive = build_tool_directive(standard_request, execution.state, history_messages=history_messages)
606+
visible_text = tool_directive_visible_text(directive, execution.state.answer_text)
555607
_log_response_tool_blocks("json_response", directive.tool_blocks)
556608
content_blocks: list[dict] = []
557609
if execution.state.reasoning_text:
558610
content_blocks.append({"type": "thinking", "thinking": execution.state.reasoning_text})
559611
content_blocks.extend(directive.tool_blocks)
612+
if (
613+
directive.stop_reason != "tool_use"
614+
and visible_text
615+
and not any(block.get("type") == "text" for block in content_blocks)
616+
):
617+
content_blocks.append({"type": "text", "text": visible_text})
560618

561619
await _add_used_tokens_for_prompt(
562620
users_db=users_db,
563621
token=token,
564622
prompt_text=current_prompt,
565-
answer_text_length=len(execution.state.answer_text),
623+
answer_text_length=len(visible_text),
566624
)
567625
assistant_message = build_anthropic_assistant_history_message(
568626
execution=execution,
@@ -592,7 +650,7 @@ async def on_delta(evt, text_chunk, _):
592650
"content": content_blocks,
593651
"stop_reason": directive.stop_reason,
594652
"stop_sequence": None,
595-
"usage": _anthropic_usage(current_prompt, execution.state.answer_text),
653+
"usage": _anthropic_usage(current_prompt, visible_text),
596654
}
597655
)
598656
except Exception as e:

0 commit comments

Comments
 (0)