Skip to content

Commit bf7d333

Browse files
author
yang
committed
修复 prompt_builder import,补回 client_profiles.py
- 基于 5d1b6f7 基线,补回 backend/services/client_profiles.py(来自 2a5ceec) - 在 prompt_builder.py 中 import sanitize_openclaw_user_text / looks_like_opencode_system_prompt / QWEN_CODE_OPENAI_PROFILE - 新增 _is_heavy_tool_profile helper - 解决 /v1/chat/completions 500 NameError
1 parent 5d1b6f7 commit bf7d333

2 files changed

Lines changed: 332 additions & 1 deletion

File tree

Lines changed: 321 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,321 @@
1+
from __future__ import annotations
2+
3+
import re
4+
from collections.abc import Mapping
5+
from typing import Any
6+
7+
CLAUDE_CODE_OPENAI_PROFILE = "claude_code_openai"
8+
OPENCLAW_OPENAI_PROFILE = "openclaw_openai"
9+
QWEN_CODE_OPENAI_PROFILE = "qwen_code_openai"
10+
11+
OPENCLAW_STARTUP_PATTERNS = (
12+
"A new session was started via /new or /reset.",
13+
"If runtime-provided startup context is included for this first turn",
14+
)
15+
OPENCLAW_UNTRUSTED_METADATA_PREFIX = "Sender (untrusted metadata):"
16+
OPENCODE_SYSTEM_PREFIX = "you are opencode"
17+
18+
QWEN_CODE_SYSTEM_HINTS = ("qwen code", "qwen-code", "you are qwen code", "you are qwen-code")
19+
QWEN_CODE_OPENAI_TOOL_NAMES = frozenset({
20+
"read_file",
21+
"list_directory",
22+
"write_file",
23+
"run_shell_command",
24+
})
25+
QWEN_CODE_PROFILE_TOOL_HINTS = {
26+
"readfile",
27+
"writefile",
28+
"editfile",
29+
"listdirectory",
30+
"listdir",
31+
"listfiles",
32+
"runshellcommand",
33+
"runcommand",
34+
}
35+
QWEN_CODE_TOOL_HINTS = {
36+
"read",
37+
"write",
38+
"edit",
39+
"multiedit",
40+
"notebookedit",
41+
"grep",
42+
"glob",
43+
"bash",
44+
"readfile",
45+
"writefile",
46+
"editfile",
47+
"notebookeditcell",
48+
"runcommand",
49+
"execcommand",
50+
"listdirectory",
51+
"listdir",
52+
"listfiles",
53+
"searchfiles",
54+
}
55+
QWEN_CODE_FILE_TOOL_HINTS = {
56+
"read",
57+
"write",
58+
"edit",
59+
"multiedit",
60+
"notebookedit",
61+
"readfile",
62+
"writefile",
63+
"editfile",
64+
}
65+
QWEN_CODE_NAV_TOOL_HINTS = {
66+
"grep",
67+
"glob",
68+
"bash",
69+
"runcommand",
70+
"execcommand",
71+
"listdirectory",
72+
"listdir",
73+
"listfiles",
74+
"searchfiles",
75+
}
76+
QWEN_CODE_TASK_REGEX = re.compile(
77+
r"(code|coding|program|repo|repository|refactor|debug|fix|implement|patch|file|files|terminal|shell|bash|command|test|build|编程|代码|仓库|文件|脚本|命令|调试|修复|实现|重构|测试)",
78+
re.IGNORECASE,
79+
)
80+
QWEN_CODE_OPENAI_HINT_HEADERS = (
81+
"user-agent",
82+
"x-openai-client-user-agent",
83+
"x-client-user-agent",
84+
)
85+
OPENAI_SDK_FINGERPRINT_HEADERS = (
86+
"x-openai-client-user-agent",
87+
"x-stainless-lang",
88+
"x-stainless-package-version",
89+
"x-stainless-runtime",
90+
)
91+
92+
93+
def header_value(headers: Mapping[str, Any] | Any, header_name: str) -> str:
94+
value = ""
95+
if hasattr(headers, "get"):
96+
value = headers.get(header_name, "")
97+
elif isinstance(headers, Mapping):
98+
value = headers.get(header_name, "")
99+
if isinstance(value, str):
100+
return value
101+
if value is None:
102+
return ""
103+
return str(value)
104+
105+
106+
def normalized_tool_name(value: str) -> str:
107+
return re.sub(r"[^a-z0-9]+", "", str(value or "").strip().lower())
108+
109+
110+
def normalize_tool(tool: dict[str, Any]) -> dict[str, Any]:
111+
if tool.get("type") == "function" and "function" in tool:
112+
fn = tool["function"]
113+
return {
114+
"name": fn.get("name", ""),
115+
"description": fn.get("description", ""),
116+
"parameters": fn.get("parameters", {}),
117+
}
118+
return {
119+
"name": tool.get("name", ""),
120+
"description": tool.get("description", ""),
121+
"parameters": tool.get("input_schema") or tool.get("parameters") or {},
122+
}
123+
124+
125+
def normalize_tools(tools: list[Any] | None) -> list[dict[str, Any]]:
126+
return [normalize_tool(tool) for tool in (tools or []) if isinstance(tool, dict)]
127+
128+
129+
def extract_declared_tool_names(req_data: dict[str, Any] | None) -> set[str]:
130+
if not isinstance(req_data, dict):
131+
return set()
132+
133+
tool_names: set[str] = set()
134+
for tool in req_data.get("tools", []) or []:
135+
if not isinstance(tool, dict):
136+
continue
137+
candidate = tool.get("name")
138+
if not isinstance(candidate, str) or not candidate.strip():
139+
function_payload = tool.get("function")
140+
if isinstance(function_payload, dict):
141+
candidate = function_payload.get("name")
142+
if isinstance(candidate, str) and candidate.strip():
143+
tool_names.add(candidate.strip().lower())
144+
return tool_names
145+
146+
147+
def has_qwen_code_header_hint(headers: Mapping[str, Any] | Any) -> bool:
148+
for header_name in QWEN_CODE_OPENAI_HINT_HEADERS:
149+
value = header_value(headers, header_name).lower()
150+
if "qwen" in value and "code" in value:
151+
return True
152+
return False
153+
154+
155+
def has_openai_sdk_fingerprint(headers: Mapping[str, Any] | Any) -> bool:
156+
return any(header_value(headers, header_name) for header_name in OPENAI_SDK_FINGERPRINT_HEADERS)
157+
158+
159+
def is_qwen_code_openai_request(headers: Mapping[str, Any] | Any, req_data: dict[str, Any] | None) -> bool:
160+
tool_names = extract_declared_tool_names(req_data)
161+
qwen_tool_matches = len(tool_names & QWEN_CODE_OPENAI_TOOL_NAMES)
162+
if qwen_tool_matches >= len(QWEN_CODE_OPENAI_TOOL_NAMES):
163+
return True
164+
if qwen_tool_matches >= 3 and (has_qwen_code_header_hint(headers) or has_openai_sdk_fingerprint(headers)):
165+
return True
166+
return has_qwen_code_header_hint(headers)
167+
168+
169+
def sanitize_openclaw_user_text(text: str) -> str:
170+
cleaned = text.strip()
171+
if not cleaned:
172+
return cleaned
173+
if any(marker in cleaned for marker in OPENCLAW_STARTUP_PATTERNS):
174+
return ""
175+
if cleaned.startswith(OPENCLAW_UNTRUSTED_METADATA_PREFIX):
176+
match = re.search(r"\n\n(\[[^\n]+\]\s*[\s\S]*)$", cleaned)
177+
if match:
178+
cleaned = match.group(1).strip()
179+
else:
180+
return ""
181+
return cleaned
182+
183+
184+
def extract_user_text_only(content: Any, client_profile: str = OPENCLAW_OPENAI_PROFILE) -> str:
185+
if isinstance(content, str):
186+
return sanitize_openclaw_user_text(content) if client_profile == OPENCLAW_OPENAI_PROFILE else content
187+
if not isinstance(content, list):
188+
return ""
189+
190+
text_blocks: list[str] = []
191+
for part in content:
192+
if not isinstance(part, dict) or part.get("type", "") != "text":
193+
continue
194+
block_text = part.get("text", "")
195+
if client_profile == OPENCLAW_OPENAI_PROFILE:
196+
block_text = sanitize_openclaw_user_text(block_text)
197+
if block_text:
198+
text_blocks.append(block_text)
199+
return "\n".join(text_blocks)
200+
201+
202+
def extract_system_prompt(req_data: dict[str, Any], *, client_profile: str = OPENCLAW_OPENAI_PROFILE) -> str:
203+
system_prompt = ""
204+
sys_field = req_data.get("system", "")
205+
if isinstance(sys_field, list):
206+
system_prompt = " ".join(
207+
part.get("text", "")
208+
for part in sys_field
209+
if isinstance(part, dict)
210+
)
211+
elif isinstance(sys_field, str):
212+
system_prompt = sys_field
213+
214+
if system_prompt:
215+
return system_prompt
216+
217+
for msg in req_data.get("messages", []) or []:
218+
if msg.get("role") == "system":
219+
return extract_user_text_only(msg.get("content", ""), client_profile=client_profile)
220+
return ""
221+
222+
223+
def looks_like_opencode_system_prompt(system_prompt: str) -> bool:
224+
if not isinstance(system_prompt, str):
225+
return False
226+
return system_prompt.strip().lower().startswith(OPENCODE_SYSTEM_PREFIX)
227+
228+
229+
def extract_latest_user_text(
230+
messages: list[dict[str, Any]] | None,
231+
*,
232+
client_profile: str = OPENCLAW_OPENAI_PROFILE,
233+
) -> str:
234+
for msg in reversed(messages or []):
235+
if msg.get("role") != "user":
236+
continue
237+
text = extract_user_text_only(msg.get("content", ""), client_profile=client_profile).strip()
238+
if text:
239+
return text
240+
return ""
241+
242+
243+
def request_looks_like_coding_task(
244+
req_data: dict[str, Any],
245+
*,
246+
client_profile: str = OPENCLAW_OPENAI_PROFILE,
247+
) -> bool:
248+
normalized_tools = normalize_tools(req_data.get("tools", []))
249+
normalized_names = {
250+
normalized_tool_name(tool.get("name", ""))
251+
for tool in normalized_tools
252+
if tool.get("name")
253+
}
254+
latest_user_text = extract_latest_user_text(
255+
req_data.get("messages", []),
256+
client_profile=client_profile,
257+
)
258+
if len(normalized_names & QWEN_CODE_FILE_TOOL_HINTS) >= 1 and len(normalized_names & QWEN_CODE_NAV_TOOL_HINTS) >= 1:
259+
return True
260+
if len(normalized_names & QWEN_CODE_TOOL_HINTS) >= 3:
261+
return True
262+
return bool(latest_user_text and QWEN_CODE_TASK_REGEX.search(latest_user_text))
263+
264+
265+
def infer_client_profile(
266+
req_data: dict[str, Any],
267+
*,
268+
fallback_profile: str = OPENCLAW_OPENAI_PROFILE,
269+
) -> str:
270+
if fallback_profile in {CLAUDE_CODE_OPENAI_PROFILE, QWEN_CODE_OPENAI_PROFILE}:
271+
return fallback_profile
272+
273+
system_prompt = extract_system_prompt(req_data, client_profile=fallback_profile)
274+
system_lower = system_prompt.strip().lower()
275+
if looks_like_opencode_system_prompt(system_prompt):
276+
return fallback_profile
277+
if any(hint in system_lower for hint in QWEN_CODE_SYSTEM_HINTS):
278+
return QWEN_CODE_OPENAI_PROFILE
279+
280+
normalized_names = {
281+
normalized_tool_name(tool.get("name", ""))
282+
for tool in normalize_tools(req_data.get("tools", []))
283+
if tool.get("name")
284+
}
285+
qwen_code_tool_matches = normalized_names & QWEN_CODE_PROFILE_TOOL_HINTS
286+
if len(qwen_code_tool_matches) >= 3:
287+
return QWEN_CODE_OPENAI_PROFILE
288+
if len(qwen_code_tool_matches) >= 2 and request_looks_like_coding_task(req_data, client_profile=fallback_profile):
289+
return QWEN_CODE_OPENAI_PROFILE
290+
return fallback_profile
291+
292+
293+
def detect_openai_client_profile(headers: Mapping[str, Any] | Any, req_data: dict[str, Any] | None) -> str:
294+
if header_value(headers, "x-anthropic-billing-header"):
295+
return CLAUDE_CODE_OPENAI_PROFILE
296+
if is_qwen_code_openai_request(headers, req_data):
297+
return QWEN_CODE_OPENAI_PROFILE
298+
return OPENCLAW_OPENAI_PROFILE
299+
300+
301+
__all__ = [
302+
"CLAUDE_CODE_OPENAI_PROFILE",
303+
"OPENCLAW_OPENAI_PROFILE",
304+
"QWEN_CODE_OPENAI_PROFILE",
305+
"detect_openai_client_profile",
306+
"extract_declared_tool_names",
307+
"extract_latest_user_text",
308+
"extract_system_prompt",
309+
"extract_user_text_only",
310+
"has_openai_sdk_fingerprint",
311+
"has_qwen_code_header_hint",
312+
"header_value",
313+
"infer_client_profile",
314+
"is_qwen_code_openai_request",
315+
"looks_like_opencode_system_prompt",
316+
"normalize_tool",
317+
"normalize_tools",
318+
"normalized_tool_name",
319+
"request_looks_like_coding_task",
320+
"sanitize_openclaw_user_text",
321+
]

backend/services/prompt_builder.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@
66
from backend.adapter.standard_request import CLAUDE_CODE_OPENAI_PROFILE, OPENCLAW_OPENAI_PROFILE
77
from backend.core.request_logging import get_request_context
88
from backend.services import file_content_cache
9+
from backend.services.client_profiles import (
10+
QWEN_CODE_OPENAI_PROFILE,
11+
looks_like_opencode_system_prompt as _looks_like_opencode_system_prompt,
12+
sanitize_openclaw_user_text,
13+
)
914
from backend.services.refusal_cleaner import clean_refusal_messages
1015
from backend.services.schema_compressor import compact_schema
1116
from backend.services.tool_few_shot import pick_few_shot_tools, render_few_shot_turn, tool_summary_for_log
@@ -28,6 +33,10 @@ class PromptBuildResult:
2833
tool_enabled: bool
2934

3035

36+
def _is_heavy_tool_profile(client_profile: str) -> bool:
37+
return client_profile in {CLAUDE_CODE_OPENAI_PROFILE, QWEN_CODE_OPENAI_PROFILE}
38+
39+
3140
def _compact_history_tool_input(name: str, input_data: dict, client_profile: str) -> dict:
3241
if client_profile != CLAUDE_CODE_OPENAI_PROFILE or not isinstance(input_data, dict):
3342
return input_data
@@ -840,10 +849,11 @@ def _apply_topic_isolation(messages: list, client_profile: str) -> list:
840849

841850

842851
def messages_to_prompt(req_data: dict, *, client_profile: str = OPENCLAW_OPENAI_PROFILE) -> PromptBuildResult:
852+
resolved_client_profile = client_profile
843853
raw_messages = req_data.get("messages", [])
844854
# 话题隔离:新任务与历史首条 user 实体零重合时,丢弃所有历史,只保留 system + 最新 user。
845855
# 这解决 Claude Code 同 session 多任务时旧对话干扰新任务的问题。
846-
isolated = _apply_topic_isolation(raw_messages, client_profile)
856+
isolated = _apply_topic_isolation(raw_messages, resolved_client_profile)
847857
# Pass: 历史拒绝清洗
848858
cleaned_messages, cleaned_count = clean_refusal_messages(isolated)
849859
if cleaned_count:

0 commit comments

Comments
 (0)