Skip to content

Commit 0fb95d1

Browse files
committed
Merge branch 'codex/issue-71-long-tool-context' into trae
# Please enter a commit message to explain why this merge is necessary, # especially if it merges an updated upstream into a topic branch. # # Lines starting with '#' will be ignored, and an empty message aborts # the commit.
2 parents 533868a + fbadc0a commit 0fb95d1

18 files changed

Lines changed: 1148 additions & 116 deletions

backend/adapter/cli_proxy.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from backend.adapter.standard_request import StandardRequest, CLAUDE_CODE_OPENAI_PROFILE
99
from backend.core.config import resolve_model
10+
from backend.runtime.visible_text import sanitize_visible_text, sanitize_visible_text_blocks
1011
from backend.services.model_modes import parse_model_mode
1112
from backend.services.prompt_builder import messages_to_prompt
1213
from backend.services.workspace_context import derive_workspace_root
@@ -189,6 +190,7 @@ def to_openai_response(execution, standard_request: StandardRequest) -> dict:
189190
Returns:
190191
dict: OpenAI 格式的响应
191192
"""
193+
visible_text = sanitize_visible_text(execution.state.answer_text)
192194
return {
193195
"id": f"chatcmpl-{execution.chat_id[:12]}",
194196
"object": "chat.completion",
@@ -199,15 +201,15 @@ def to_openai_response(execution, standard_request: StandardRequest) -> dict:
199201
"index": 0,
200202
"message": {
201203
"role": "assistant",
202-
"content": execution.state.answer_text,
204+
"content": visible_text,
203205
},
204206
"finish_reason": "stop",
205207
}
206208
],
207209
"usage": {
208210
"prompt_tokens": len(standard_request.prompt),
209-
"completion_tokens": len(execution.state.answer_text),
210-
"total_tokens": len(standard_request.prompt) + len(execution.state.answer_text),
211+
"completion_tokens": len(visible_text),
212+
"total_tokens": len(standard_request.prompt) + len(visible_text),
211213
},
212214
}
213215

@@ -231,10 +233,10 @@ def to_anthropic_response(execution, standard_request: StandardRequest, msg_id:
231233

232234
# 添加思考内容
233235
if execution.state.reasoning_text:
234-
content_blocks.append({"type": "thinking", "thinking": execution.state.reasoning_text})
236+
content_blocks.append({"type": "thinking", "thinking": sanitize_visible_text(execution.state.reasoning_text)})
235237

236238
# 添加工具调用块
237-
content_blocks.extend(directive.tool_blocks)
239+
content_blocks.extend(sanitize_visible_text_blocks(directive.tool_blocks))
238240
visible_text = tool_directive_visible_text(directive, execution.state.answer_text)
239241
if (
240242
directive.stop_reason != "tool_use"
@@ -269,11 +271,12 @@ def to_gemini_response(execution, standard_request: StandardRequest) -> dict:
269271
Returns:
270272
dict: Gemini 格式的响应
271273
"""
274+
visible_text = sanitize_visible_text(execution.state.answer_text)
272275
return {
273276
"candidates": [
274277
{
275278
"content": {
276-
"parts": [{"text": execution.state.answer_text}],
279+
"parts": [{"text": visible_text}],
277280
"role": "model",
278281
},
279282
"finishReason": "STOP",
@@ -282,8 +285,8 @@ def to_gemini_response(execution, standard_request: StandardRequest) -> dict:
282285
],
283286
"usageMetadata": {
284287
"promptTokenCount": len(standard_request.prompt),
285-
"candidatesTokenCount": len(execution.state.answer_text),
286-
"totalTokenCount": len(standard_request.prompt) + len(execution.state.answer_text),
288+
"candidatesTokenCount": len(visible_text),
289+
"totalTokenCount": len(standard_request.prompt) + len(visible_text),
287290
},
288291
}
289292

backend/api/admin.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,8 +269,11 @@ async def get_settings(request: Request):
269269
"max_inflight_per_account": backend_settings.MAX_INFLIGHT_PER_ACCOUNT,
270270
"global_max_inflight": getattr(acc_pool, "global_max_inflight", 0),
271271
"max_queue_size": getattr(acc_pool, "max_queue_size", 0),
272+
"account_ready_set_threshold": backend_settings.ACCOUNT_READY_SET_THRESHOLD,
273+
"account_ready_set_enabled": getattr(acc_pool, "ready_set_enabled", False),
272274
"chat_id_pool_target": pool.target if pool else 0,
273275
"chat_id_pool_ttl_seconds": pool.ttl if pool else 0,
276+
"chat_id_pool_max_concurrency": pool.max_concurrency if pool else 0,
274277
"model_aliases": safe_map,
275278
}
276279

@@ -286,6 +289,15 @@ async def update_settings(data: dict, request: Request):
286289
pool.set_max_inflight(val)
287290
except (TypeError, ValueError):
288291
pass
292+
if "account_ready_set_threshold" in data:
293+
try:
294+
val = max(1, int(data["account_ready_set_threshold"]))
295+
settings.ACCOUNT_READY_SET_THRESHOLD = val
296+
pool = getattr(request.app.state, "account_pool", None)
297+
if pool is not None and hasattr(pool, "_reset_concurrency_limits"):
298+
pool._reset_concurrency_limits()
299+
except (TypeError, ValueError):
300+
pass
289301
if "global_max_inflight" in data:
290302
try:
291303
val = int(data["global_max_inflight"])
@@ -294,12 +306,13 @@ async def update_settings(data: dict, request: Request):
294306
pool.global_max_inflight = val
295307
except (TypeError, ValueError):
296308
pass
297-
if "chat_id_pool_target" in data or "chat_id_pool_ttl_seconds" in data:
309+
if "chat_id_pool_target" in data or "chat_id_pool_ttl_seconds" in data or "chat_id_pool_max_concurrency" in data:
298310
cp = getattr(request.app.state, "chat_id_pool", None)
299311
if cp is not None:
300312
await cp.apply_config(
301313
target=data.get("chat_id_pool_target"),
302314
ttl_seconds=data.get("chat_id_pool_ttl_seconds"),
315+
max_concurrency=data.get("chat_id_pool_max_concurrency"),
303316
)
304317
if "model_aliases" in data:
305318
MODEL_MAP.clear()

backend/api/anthropic.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
request_max_attempts,
2121
tool_directive_visible_text,
2222
)
23+
from backend.runtime.visible_text import VisibleTextSanitizer, sanitize_visible_text, sanitize_visible_text_blocks
2324
from backend.services.auth_quota import resolve_auth_context
2425
from backend.services.completion_bridge import force_fresh_chat_after_empty_response, is_empty_upstream_response
2526
from backend.services.context_attachment_manager import prepare_context_attachments, derive_session_key
@@ -75,6 +76,8 @@ def __init__(self, *, msg_id: str, model_name: str, prompt: str):
7576
self.block_index = 0
7677
self.current_block: dict[str, object] = {"type": None, "index": None, "tool_call_id": None}
7778
self.opened_tool_calls: set[str] = set()
79+
self.answer_sanitizer = VisibleTextSanitizer()
80+
self.thinking_sanitizer = VisibleTextSanitizer()
7881

7982
def ensure_message_start(self) -> None:
8083
if not self.pending_chunks:
@@ -122,12 +125,24 @@ def open_tool_block(self, tool_call_id: str, tool_name: str) -> int:
122125
return index
123126

124127
def append_thinking_delta(self, text_chunk: str) -> None:
128+
text_chunk = self.thinking_sanitizer.feed(text_chunk)
129+
if not text_chunk:
130+
return
125131
index = self.open_textual_block("thinking")
126132
self.pending_chunks.append(
127133
stream_presenter.anthropic_content_block_delta(index, {"type": "thinking_delta", "thinking": text_chunk})
128134
)
129135

130136
def buffer_answer_text(self, text_chunk: str) -> None:
137+
thinking_tail = self.thinking_sanitizer.flush()
138+
if thinking_tail:
139+
index = self.open_textual_block("thinking")
140+
self.pending_chunks.append(
141+
stream_presenter.anthropic_content_block_delta(index, {"type": "thinking_delta", "thinking": thinking_tail})
142+
)
143+
text_chunk = self.answer_sanitizer.feed(text_chunk)
144+
if not text_chunk:
145+
return
131146
index = self.open_textual_block("text")
132147
self.answer_text_buffer.append((index, text_chunk))
133148

@@ -150,6 +165,8 @@ def flush_answer_text(self) -> None:
150165

151166
def clear_answer_text(self) -> None:
152167
self.answer_text_buffer = []
168+
self.flushed_answer_text = ""
169+
self.answer_sanitizer.reset()
153170

154171
def answer_text(self) -> str:
155172
return "".join(text_chunk for _, text_chunk in self.answer_text_buffer)
@@ -178,6 +195,18 @@ def buffer_missing_answer_tail(self, final_text: str) -> None:
178195
self.answer_text_buffer = []
179196
self.buffer_answer_text(final_text)
180197

198+
def flush_text_sanitizers(self) -> None:
199+
answer_tail = self.answer_sanitizer.flush()
200+
if answer_tail:
201+
index = self.open_textual_block("text")
202+
self.answer_text_buffer.append((index, answer_tail))
203+
thinking_tail = self.thinking_sanitizer.flush()
204+
if thinking_tail:
205+
index = self.open_textual_block("thinking")
206+
self.pending_chunks.append(
207+
stream_presenter.anthropic_content_block_delta(index, {"type": "thinking_delta", "thinking": thinking_tail})
208+
)
209+
181210

182211
def _build_standard_request(req_data: dict) -> StandardRequest:
183212
"""浣跨敤 CLIProxy 杩涜鍗忚杞崲"""
@@ -448,6 +477,7 @@ async def on_delta(evt, text_chunk, _):
448477

449478
directive = build_tool_directive(standard_request, execution.state, history_messages=history_messages)
450479
visible_text = tool_directive_visible_text(directive, execution.state.answer_text)
480+
stream_state.flush_text_sanitizers()
451481
if directive.stop_reason != "tool_use":
452482
stream_state.buffer_missing_answer_tail(visible_text)
453483
if (
@@ -607,8 +637,8 @@ async def on_delta(evt, text_chunk, _):
607637
_log_response_tool_blocks("json_response", directive.tool_blocks)
608638
content_blocks: list[dict] = []
609639
if execution.state.reasoning_text:
610-
content_blocks.append({"type": "thinking", "thinking": execution.state.reasoning_text})
611-
content_blocks.extend(directive.tool_blocks)
640+
content_blocks.append({"type": "thinking", "thinking": sanitize_visible_text(execution.state.reasoning_text)})
641+
content_blocks.extend(sanitize_visible_text_blocks(directive.tool_blocks))
612642
if (
613643
directive.stop_reason != "tool_use"
614644
and visible_text

backend/api/gemini.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from backend.core.request_logging import new_request_id, request_context, update_request_context
1212
from backend.runtime import stream_presenter
1313
from backend.runtime.execution import collect_completion_run, cleanup_runtime_resources
14+
from backend.runtime.visible_text import VisibleTextSanitizer, sanitize_visible_text
1415
from backend.services.auth_quota import resolve_auth_context
1516
from backend.services.completion_bridge import force_fresh_chat_after_empty_response, is_empty_upstream_response
1617
from backend.services.token_calc import calculate_usage
@@ -74,7 +75,8 @@ async def gemini_generate_content(model: str, request: Request):
7475
log.error(f"Gemini proxy failed: {e}")
7576
raise HTTPException(status_code=500, detail=str(e))
7677

77-
usage = calculate_usage(content, execution.state.answer_text)
78+
visible_text = sanitize_visible_text(execution.state.answer_text)
79+
usage = calculate_usage(content, visible_text)
7880
users = await users_db.get()
7981
for u in users:
8082
if u["id"] == token:
@@ -83,13 +85,13 @@ async def gemini_generate_content(model: str, request: Request):
8385
await users_db.save(users)
8486
await cleanup_runtime_resources(client, execution.acc, execution.chat_id)
8587

86-
log.info(f"[Gemini] Request complete. Generated {len(execution.state.answer_text)} characters.")
88+
log.info(f"[Gemini] Request complete. Generated {len(visible_text)} visible characters.")
8789
return JSONResponse(
8890
{
8991
"candidates": [
9092
{
9193
"content": {
92-
"parts": [{"text": execution.state.answer_text}],
94+
"parts": [{"text": visible_text}],
9395
"role": "model",
9496
}
9597
}
@@ -109,10 +111,13 @@ async def gemini_stream_generate_content(model: str, request: Request):
109111

110112
async def generate():
111113
queue: asyncio.Queue[str | None] = asyncio.Queue()
114+
answer_sanitizer = VisibleTextSanitizer()
112115

113116
async def on_delta(evt, text_chunk, _):
114117
if text_chunk and evt.get("phase") == "answer":
115-
await queue.put(stream_presenter.gemini_text_chunk(text_chunk))
118+
visible_chunk = answer_sanitizer.feed(text_chunk)
119+
if visible_chunk:
120+
await queue.put(stream_presenter.gemini_text_chunk(visible_chunk))
116121

117122
async def runner():
118123
execution = None
@@ -129,18 +134,22 @@ async def runner():
129134
await cleanup_runtime_resources(client, execution.acc, execution.chat_id, preserve_chat=False)
130135
raise RuntimeError("empty upstream response")
131136

132-
usage = calculate_usage(content, execution.state.answer_text)
137+
visible_text = sanitize_visible_text(execution.state.answer_text)
138+
usage = calculate_usage(content, visible_text)
133139
users = await users_db.get()
134140
for u in users:
135141
if u["id"] == token:
136142
u["used_tokens"] += usage["total_tokens"]
137143
break
138144
await users_db.save(users)
139145
await cleanup_runtime_resources(client, execution.acc, execution.chat_id)
140-
log.info(f"[Gemini] Request complete. Generated {len(execution.state.answer_text)} characters.")
146+
log.info(f"[Gemini] Request complete. Generated {len(visible_text)} visible characters.")
141147
except Exception as e:
142148
await queue.put(json.dumps({"error": str(e)}) + "\n")
143149
finally:
150+
visible_tail = answer_sanitizer.flush()
151+
if visible_tail:
152+
await queue.put(stream_presenter.gemini_text_chunk(visible_tail))
144153
await queue.put(None)
145154

146155
task = asyncio.create_task(runner())

backend/api/responses.py

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from backend.core.request_logging import new_request_id, request_context, update_request_context
1515
from backend.core.request_trace import log_test_prompt, prompt_tail
1616
from backend.runtime.execution import build_tool_directive, build_usage_delta_factory, request_max_attempts, tool_directive_visible_text
17+
from backend.runtime.visible_text import VisibleTextSanitizer, sanitize_visible_text
1718
from backend.services.attachment_preprocessor import preprocess_attachments
1819
from backend.services.auth_quota import resolve_auth_context
1920
from backend.services.client_profiles import detect_openai_client_profile
@@ -457,7 +458,7 @@ def build_responses_payload(
457458
output_text = visible_text
458459
content: list[dict[str, Any]] = []
459460
if execution.state.reasoning_text:
460-
content.append({"type": "reasoning_text", "text": execution.state.reasoning_text})
461+
content.append({"type": "reasoning_text", "text": sanitize_visible_text(execution.state.reasoning_text)})
461462
content.append({"type": "output_text", "text": output_text, "annotations": []})
462463
payload["output"] = [{
463464
"id": f"msg_{uuid.uuid4().hex[:12]}",
@@ -490,7 +491,10 @@ def __init__(self, *, response_id: str, created: int, model_name: str, prompt: s
490491
self.started_text = False
491492
self.pending_chunks: list[str] = []
492493
self.answer_fragments: list[str] = []
494+
self.visible_answer_fragments: list[str] = []
493495
self.reasoning_fragments: list[str] = []
496+
self.answer_sanitizer = VisibleTextSanitizer()
497+
self.reasoning_sanitizer = VisibleTextSanitizer()
494498
self.tool_calls_emitted = False
495499

496500
def initial_chunks(self) -> list[str]:
@@ -523,6 +527,9 @@ def _ensure_text_item(self) -> None:
523527

524528
def on_delta(self, evt: dict[str, Any], text_chunk: str | None, tool_calls: list[dict[str, Any]] | None) -> None:
525529
if text_chunk and evt.get("phase") in ("think", "thinking_summary"):
530+
text_chunk = self.reasoning_sanitizer.feed(text_chunk)
531+
if not text_chunk:
532+
return
526533
self.reasoning_fragments.append(text_chunk)
527534
self.pending_chunks.append(_sse("response.reasoning_text.delta", {
528535
"response_id": self.response_id,
@@ -534,8 +541,12 @@ def on_delta(self, evt: dict[str, Any], text_chunk: str | None, tool_calls: list
534541
return
535542

536543
if text_chunk and evt.get("phase") == "answer":
537-
self._ensure_text_item()
538544
self.answer_fragments.append(text_chunk)
545+
text_chunk = self.answer_sanitizer.feed(text_chunk)
546+
if not text_chunk:
547+
return
548+
self._ensure_text_item()
549+
self.visible_answer_fragments.append(text_chunk)
539550
self.pending_chunks.append(_sse("response.output_text.delta", {
540551
"response_id": self.response_id,
541552
"item_id": self.message_id,
@@ -553,6 +564,29 @@ def drain_pending(self) -> list[str]:
553564
self.pending_chunks = []
554565
return chunks
555566

567+
def _flush_visible_sanitizers(self) -> None:
568+
reasoning_tail = self.reasoning_sanitizer.flush()
569+
if reasoning_tail:
570+
self.reasoning_fragments.append(reasoning_tail)
571+
self.pending_chunks.append(_sse("response.reasoning_text.delta", {
572+
"response_id": self.response_id,
573+
"item_id": self.message_id,
574+
"output_index": self.output_index,
575+
"content_index": self.content_index,
576+
"delta": reasoning_tail,
577+
}))
578+
answer_tail = self.answer_sanitizer.flush()
579+
if answer_tail:
580+
self._ensure_text_item()
581+
self.visible_answer_fragments.append(answer_tail)
582+
self.pending_chunks.append(_sse("response.output_text.delta", {
583+
"response_id": self.response_id,
584+
"item_id": self.message_id,
585+
"output_index": self.output_index,
586+
"content_index": self.content_index,
587+
"delta": answer_tail,
588+
}))
589+
556590
def emit_tool_calls(self, tool_calls: list[dict[str, Any]]) -> None:
557591
for tool_call in tool_calls:
558592
block = {
@@ -595,6 +629,7 @@ def emit_tool_calls(self, tool_calls: list[dict[str, Any]]) -> None:
595629
self.tool_calls_emitted = True
596630

597631
def finalize(self, execution, directive) -> list[str]:
632+
self._flush_visible_sanitizers()
598633
chunks = self.drain_pending()
599634
final_text = tool_directive_visible_text(
600635
directive,
@@ -621,9 +656,10 @@ def finalize(self, execution, directive) -> list[str]:
621656
"delta": final_text,
622657
}))
623658
self.answer_fragments.append(final_text)
659+
self.visible_answer_fragments.append(final_text)
624660
chunks.extend(self.drain_pending())
625661
elif directive.stop_reason != "tool_use":
626-
streamed_text = "".join(self.answer_fragments)
662+
streamed_text = "".join(self.visible_answer_fragments)
627663
if final_text.startswith(streamed_text):
628664
missing_tail = final_text[len(streamed_text):]
629665
if missing_tail:
@@ -635,6 +671,7 @@ def finalize(self, execution, directive) -> list[str]:
635671
"delta": missing_tail,
636672
}))
637673
self.answer_fragments.append(missing_tail)
674+
self.visible_answer_fragments.append(missing_tail)
638675

639676
if self.started_text:
640677
chunks.append(_sse("response.output_text.done", {

0 commit comments

Comments
 (0)