Skip to content

Commit 5d1b6f7

Browse files
author
yang
committed
添加预加载,添加流式传输,添加真实模型,添加提示词约束
1 parent bf006a6 commit 5d1b6f7

36 files changed

Lines changed: 3724 additions & 374 deletions

.env.example

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@ ADMIN_KEY=change-me-now
22
PORT=7860
33
WORKERS=1
44
LOG_LEVEL=INFO
5-
MAX_INFLIGHT=1
5+
MAX_INFLIGHT=2
66
MAX_RETRIES=3
7-
ACCOUNT_MIN_INTERVAL_MS=1200
8-
REQUEST_JITTER_MIN_MS=120
9-
REQUEST_JITTER_MAX_MS=360
7+
ACCOUNT_MIN_INTERVAL_MS=0
8+
REQUEST_JITTER_MIN_MS=0
9+
REQUEST_JITTER_MAX_MS=0
1010
RATE_LIMIT_BASE_COOLDOWN=600
1111
RATE_LIMIT_MAX_COOLDOWN=3600
1212
ACCOUNTS_FILE=/workspace/data/accounts.json

README.md

Lines changed: 521 additions & 0 deletions
Large diffs are not rendered by default.

backend/adapter/cli_proxy.py

Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,258 @@
1+
"""
2+
CLIProxy - 协议转换代理层
3+
统一处理 OpenAI/Claude/Gemini 协议到 StandardRequest 的转换
4+
"""
5+
import logging
6+
from typing import Any
7+
8+
from backend.adapter.standard_request import StandardRequest, CLAUDE_CODE_OPENAI_PROFILE
9+
from backend.core.config import resolve_model
10+
from backend.services.prompt_builder import messages_to_prompt
11+
from backend.toolcall.normalize import build_tool_name_registry
12+
13+
log = logging.getLogger("qwen2api.cli_proxy")
14+
15+
16+
class CLIProxy:
17+
"""
18+
协议转换代理 - 类似 ds2api 的 CLIProxy
19+
负责将不同协议(OpenAI/Claude/Gemini)转换为统一的 StandardRequest
20+
"""
21+
22+
@staticmethod
23+
def from_openai(req_data: dict, *, client_profile: str = CLAUDE_CODE_OPENAI_PROFILE) -> StandardRequest:
24+
"""
25+
OpenAI 协议 -> StandardRequest
26+
27+
Args:
28+
req_data: OpenAI 格式的请求体
29+
client_profile: 客户端配置文件
30+
31+
Returns:
32+
StandardRequest: 统一的标准请求对象
33+
"""
34+
model_name = req_data.get("model", "gpt-4o")
35+
prompt_result = messages_to_prompt(req_data, client_profile=client_profile)
36+
37+
tools = prompt_result.tools
38+
tool_names = [
39+
tool_name
40+
for tool_name in (tool.get("name") for tool in tools)
41+
if isinstance(tool_name, str) and tool_name
42+
]
43+
44+
return StandardRequest(
45+
prompt=prompt_result.prompt,
46+
response_model=model_name,
47+
resolved_model=resolve_model(model_name),
48+
surface="openai",
49+
client_profile=client_profile,
50+
requested_model=model_name,
51+
stream=req_data.get("stream", False),
52+
tools=tools,
53+
tool_names=tool_names,
54+
tool_name_registry=build_tool_name_registry(tool_names),
55+
tool_enabled=prompt_result.tool_enabled,
56+
)
57+
58+
@staticmethod
59+
def from_anthropic(req_data: dict, *, client_profile: str = CLAUDE_CODE_OPENAI_PROFILE) -> StandardRequest:
60+
"""
61+
Anthropic Claude 协议 -> StandardRequest
62+
63+
Args:
64+
req_data: Claude 格式的请求体
65+
client_profile: 客户端配置文件
66+
67+
Returns:
68+
StandardRequest: 统一的标准请求对象
69+
"""
70+
model_name = req_data.get("model", "claude-3-5-sonnet")
71+
prompt_result = messages_to_prompt(req_data, client_profile=client_profile)
72+
73+
tools = prompt_result.tools
74+
tool_names = [
75+
tool_name
76+
for tool_name in (tool.get("name") for tool in tools)
77+
if isinstance(tool_name, str) and tool_name
78+
]
79+
80+
return StandardRequest(
81+
prompt=prompt_result.prompt,
82+
response_model=model_name,
83+
resolved_model=resolve_model(model_name),
84+
surface="anthropic",
85+
client_profile=client_profile,
86+
requested_model=model_name,
87+
stream=req_data.get("stream", False),
88+
tools=tools,
89+
tool_names=tool_names,
90+
tool_name_registry=build_tool_name_registry(tool_names),
91+
tool_enabled=prompt_result.tool_enabled,
92+
)
93+
94+
@staticmethod
95+
def from_gemini(model: str, req_data: dict, *, stream: bool | None = None) -> StandardRequest:
96+
"""
97+
Google Gemini 协议 -> StandardRequest
98+
99+
Args:
100+
model: Gemini 模型名称
101+
req_data: Gemini 格式的请求体
102+
stream: 是否流式输出(None 则从请求体推断)
103+
104+
Returns:
105+
StandardRequest: 统一的标准请求对象
106+
"""
107+
prompt = CLIProxy._extract_gemini_prompt(req_data)
108+
stream_requested = CLIProxy._is_gemini_stream_request(req_data) if stream is None else stream
109+
110+
# Gemini 暂不支持工具调用,后续可扩展
111+
tools = []
112+
tool_names = []
113+
114+
return StandardRequest(
115+
prompt=prompt,
116+
response_model=model,
117+
resolved_model=resolve_model(model),
118+
surface="gemini",
119+
requested_model=model,
120+
content=prompt,
121+
stream=stream_requested,
122+
tools=tools,
123+
tool_names=tool_names,
124+
tool_name_registry={},
125+
tool_enabled=False,
126+
)
127+
128+
@staticmethod
129+
def _extract_gemini_prompt(body: dict) -> str:
130+
"""从 Gemini 请求体中提取 prompt"""
131+
lines: list[str] = []
132+
for message in body.get("contents", []) or []:
133+
if message.get("role") != "user":
134+
continue
135+
for part in message.get("parts", []) or []:
136+
text = part.get("text")
137+
if text:
138+
lines.append(text)
139+
return "\n".join(lines)
140+
141+
@staticmethod
142+
def _is_gemini_stream_request(body: dict[str, Any]) -> bool:
143+
"""判断 Gemini 请求是否为流式"""
144+
if body.get("stream") is True:
145+
return True
146+
generation_config = body.get("generationConfig")
147+
if isinstance(generation_config, dict) and generation_config.get("stream") is True:
148+
return True
149+
return False
150+
151+
@staticmethod
152+
def to_openai_response(execution, standard_request: StandardRequest) -> dict:
153+
"""
154+
StandardRequest 执行结果 -> OpenAI 响应格式
155+
156+
Args:
157+
execution: 执行结果对象
158+
standard_request: 原始标准请求
159+
160+
Returns:
161+
dict: OpenAI 格式的响应
162+
"""
163+
return {
164+
"id": f"chatcmpl-{execution.chat_id[:12]}",
165+
"object": "chat.completion",
166+
"created": int(execution.state.created_at or 0),
167+
"model": standard_request.response_model,
168+
"choices": [
169+
{
170+
"index": 0,
171+
"message": {
172+
"role": "assistant",
173+
"content": execution.state.answer_text,
174+
},
175+
"finish_reason": "stop",
176+
}
177+
],
178+
"usage": {
179+
"prompt_tokens": len(standard_request.prompt),
180+
"completion_tokens": len(execution.state.answer_text),
181+
"total_tokens": len(standard_request.prompt) + len(execution.state.answer_text),
182+
},
183+
}
184+
185+
@staticmethod
186+
def to_anthropic_response(execution, standard_request: StandardRequest, msg_id: str, directive) -> dict:
187+
"""
188+
StandardRequest 执行结果 -> Anthropic Claude 响应格式
189+
190+
Args:
191+
execution: 执行结果对象
192+
standard_request: 原始标准请求
193+
msg_id: 消息 ID
194+
directive: 工具调用指令
195+
196+
Returns:
197+
dict: Claude 格式的响应
198+
"""
199+
content_blocks: list[dict] = []
200+
201+
# 添加思考内容
202+
if execution.state.reasoning_text:
203+
content_blocks.append({"type": "thinking", "thinking": execution.state.reasoning_text})
204+
205+
# 添加工具调用块
206+
content_blocks.extend(directive.tool_blocks)
207+
208+
return {
209+
"id": msg_id,
210+
"type": "message",
211+
"role": "assistant",
212+
"model": standard_request.response_model,
213+
"content": content_blocks,
214+
"stop_reason": directive.stop_reason,
215+
"stop_sequence": None,
216+
"usage": {
217+
"input_tokens": len(standard_request.prompt),
218+
"output_tokens": len(execution.state.answer_text),
219+
},
220+
}
221+
222+
@staticmethod
223+
def to_gemini_response(execution, standard_request: StandardRequest) -> dict:
224+
"""
225+
StandardRequest 执行结果 -> Google Gemini 响应格式
226+
227+
Args:
228+
execution: 执行结果对象
229+
standard_request: 原始标准请求
230+
231+
Returns:
232+
dict: Gemini 格式的响应
233+
"""
234+
return {
235+
"candidates": [
236+
{
237+
"content": {
238+
"parts": [{"text": execution.state.answer_text}],
239+
"role": "model",
240+
},
241+
"finishReason": "STOP",
242+
"index": 0,
243+
}
244+
],
245+
"usageMetadata": {
246+
"promptTokenCount": len(standard_request.prompt),
247+
"candidatesTokenCount": len(execution.state.answer_text),
248+
"totalTokenCount": len(standard_request.prompt) + len(execution.state.answer_text),
249+
},
250+
}
251+
252+
@staticmethod
253+
def log_conversion(surface: str, model: str, prompt_len: int, tool_count: int):
254+
"""记录协议转换日志"""
255+
log.info(
256+
f"[CLIProxy] {surface.upper()} -> StandardRequest: "
257+
f"model={model}, prompt_len={prompt_len}, tools={tool_count}"
258+
)

backend/api/admin.py

Lines changed: 75 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,51 @@ class User(BaseModel):
3333
async def get_system_status(request: Request):
3434
pool = request.app.state.account_pool
3535

36+
# 账号层细粒度 inflight / 状态
37+
per_account = []
38+
for acc in getattr(pool, "accounts", []):
39+
per_account.append({
40+
"email": acc.email,
41+
"status": acc.get_status_code(),
42+
"inflight": getattr(acc, "inflight", 0),
43+
"max_inflight": getattr(pool, "max_inflight_per_account", 0),
44+
"consecutive_failures": getattr(acc, "consecutive_failures", 0),
45+
"rate_limit_strikes": getattr(acc, "rate_limit_strikes", 0),
46+
"last_request_finished": getattr(acc, "last_request_finished", 0),
47+
})
48+
49+
# chat_id 预热池指标(若已启用)
50+
chat_id_pool_stats = None
51+
cp = getattr(request.app.state, "chat_id_pool", None)
52+
if cp is not None:
53+
try:
54+
per_account_pool: dict[str, int] = {}
55+
for acc in getattr(pool, "accounts", []):
56+
per_account_pool[acc.email] = await cp.size(acc.email)
57+
chat_id_pool_stats = {
58+
"total_cached": await cp.total_size(),
59+
"target_per_account": cp._target,
60+
"ttl_seconds": cp._ttl,
61+
"per_account": per_account_pool,
62+
}
63+
except Exception:
64+
chat_id_pool_stats = {"error": "snapshot failed"}
65+
66+
# 向运行时拿全局任务计数 / asyncio 状态
67+
import asyncio
68+
try:
69+
tasks = asyncio.all_tasks()
70+
running_tasks = sum(1 for t in tasks if not t.done())
71+
except Exception:
72+
running_tasks = -1
73+
3674
return {
3775
"accounts": pool.status(),
76+
"per_account": per_account,
77+
"chat_id_pool": chat_id_pool_stats,
78+
"runtime": {
79+
"asyncio_running_tasks": running_tasks,
80+
},
3881
"request_runtime": {
3982
"mode": "direct_http",
4083
"browser_required_for_requests": False,
@@ -226,24 +269,50 @@ async def delete_account(email: str, request: Request):
226269
return {"ok": True}
227270

228271
@router.get("/settings", dependencies=[Depends(verify_admin)])
229-
async def get_settings():
272+
async def get_settings(request: Request):
230273
from backend.core.config import MODEL_MAP
231-
# 从 settings.py 所在的同级导入 VERSION,避免循环导入或未定义报错
232274
from backend.core.config import settings as backend_settings
233275

234-
# 强制将 dict 转换,确保能被 JSON 序列化
235276
safe_map = {k: v for k, v in MODEL_MAP.items()}
277+
pool = getattr(request.app.state, "chat_id_pool", None)
278+
acc_pool = getattr(request.app.state, "account_pool", None)
236279
return {
237280
"version": "2.0.0",
238281
"max_inflight_per_account": backend_settings.MAX_INFLIGHT_PER_ACCOUNT,
239-
"model_aliases": safe_map
282+
"global_max_inflight": getattr(acc_pool, "global_max_inflight", 0),
283+
"max_queue_size": getattr(acc_pool, "max_queue_size", 0),
284+
"chat_id_pool_target": pool.target if pool else 0,
285+
"chat_id_pool_ttl_seconds": pool.ttl if pool else 0,
286+
"model_aliases": safe_map,
240287
}
241288

242289
@router.put("/settings", dependencies=[Depends(verify_admin)])
243-
async def update_settings(data: dict):
290+
async def update_settings(data: dict, request: Request):
244291
from backend.core.config import MODEL_MAP
245292
if "max_inflight_per_account" in data:
246-
settings.MAX_INFLIGHT_PER_ACCOUNT = data["max_inflight_per_account"]
293+
try:
294+
val = int(data["max_inflight_per_account"])
295+
settings.MAX_INFLIGHT_PER_ACCOUNT = val
296+
pool = getattr(request.app.state, "account_pool", None)
297+
if pool is not None and hasattr(pool, "set_max_inflight"):
298+
pool.set_max_inflight(val)
299+
except (TypeError, ValueError):
300+
pass
301+
if "global_max_inflight" in data:
302+
try:
303+
val = int(data["global_max_inflight"])
304+
pool = getattr(request.app.state, "account_pool", None)
305+
if pool is not None and val > 0:
306+
pool.global_max_inflight = val
307+
except (TypeError, ValueError):
308+
pass
309+
if "chat_id_pool_target" in data or "chat_id_pool_ttl_seconds" in data:
310+
cp = getattr(request.app.state, "chat_id_pool", None)
311+
if cp is not None:
312+
cp.update_config(
313+
target=data.get("chat_id_pool_target"),
314+
ttl_seconds=data.get("chat_id_pool_ttl_seconds"),
315+
)
247316
if "model_aliases" in data:
248317
MODEL_MAP.clear()
249318
MODEL_MAP.update(data["model_aliases"])

0 commit comments

Comments
 (0)