Skip to content

Commit 533868a

Browse files
committed
添加视频生成。修复图片生成,修复内容
1 parent ec7866e commit 533868a

17 files changed

Lines changed: 1290 additions & 192 deletions

backend/api/images.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,9 +136,14 @@ def _extract_upstream_failure(text: str) -> str | None:
136136

137137

138138
def _resolve_image_model(requested: str | None) -> str:
139+
from backend.core.config import resolve_model
140+
from backend.services.model_modes import parse_model_mode
141+
139142
if not requested:
140143
return DEFAULT_IMAGE_MODEL
141-
return IMAGE_MODEL_MAP.get(requested, DEFAULT_IMAGE_MODEL)
144+
aliased = IMAGE_MODEL_MAP.get(str(requested).strip(), str(requested).strip())
145+
mode = parse_model_mode(aliased, default_model=DEFAULT_IMAGE_MODEL)
146+
return resolve_model(mode.base_model or DEFAULT_IMAGE_MODEL)
142147

143148

144149
def _normalize_image_size(value: str | None) -> tuple[str, str, int, int]:
@@ -265,4 +270,4 @@ async def create_image(request: Request):
265270
if acc is not None:
266271
client.account_pool.release(acc)
267272
if chat_id:
268-
await client.delete_chat_reliable(acc.token, chat_id, source="image_cleanup")
273+
client.delete_chat_background(acc.token, chat_id, source="image_cleanup")

backend/api/models.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ async def get_model(model_id: str):
4545
if mode.mode == "deep_research":
4646
capabilities["deep_research"] = True
4747
capabilities["search"] = True
48+
if mode.mode == "search":
49+
capabilities["search"] = True
4850
if mode.mode == "image":
4951
capabilities["image_gen"] = True
5052
if mode.mode == "video":

backend/api/videos.py

Lines changed: 359 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,359 @@
1+
"""
2+
视频生成接口 — 兼容 OpenAI 风格的 /v1/videos/generations。
3+
"""
4+
import asyncio
5+
import json
6+
import logging
7+
import re
8+
import time
9+
from typing import Any
10+
11+
from fastapi import APIRouter, HTTPException, Request
12+
from fastapi.responses import JSONResponse
13+
14+
from backend.api.images import _extract_upstream_failure, _normalize_image_size
15+
from backend.services.qwen_client import QwenClient
16+
17+
log = logging.getLogger("qwen2api.videos")
18+
router = APIRouter()
19+
20+
DEFAULT_VIDEO_MODEL = "qwen3.6-plus"
21+
22+
VIDEO_MODEL_MAP = {
23+
"qwen-video": "qwen3.6-plus",
24+
"qwen-video-plus": "qwen3.6-plus",
25+
"qwen-video-turbo": "qwen3.6-plus",
26+
"qwen3.6-plus-video": "qwen3.6-plus",
27+
}
28+
29+
VIDEO_URL_KEYS = {
30+
"url",
31+
"video",
32+
"src",
33+
"videoUrl",
34+
"video_url",
35+
"videoURL",
36+
"preview_url",
37+
"previewUrl",
38+
"download_url",
39+
"downloadUrl",
40+
"origin_url",
41+
"originUrl",
42+
"oss_url",
43+
"ossUrl",
44+
"signed_url",
45+
"signedUrl",
46+
}
47+
48+
TASK_ID_KEYS = {
49+
"task_id",
50+
"taskId",
51+
"wanx_task_id",
52+
"wanxTaskId",
53+
}
54+
55+
VIDEO_RUNNING_STATUSES = {"running", "pending", "queued", "processing", "created"}
56+
VIDEO_SUCCESS_STATUSES = {"success", "succeeded", "finished", "completed"}
57+
58+
59+
def _looks_like_video_url(value: str) -> bool:
60+
if not isinstance(value, str) or not value.startswith(("http://", "https://")):
61+
return False
62+
lowered = value.lower()
63+
if re.search(r"\.(?:mp4|webm|mov|m3u8)(?:[?#][^\s\"'<>]*)?$", lowered):
64+
return True
65+
video_hosts = ("cdn.qwenlm.ai", "wanx.alicdn.com", "alicdn.com")
66+
return any(host in lowered for host in video_hosts) and any(marker in lowered for marker in ("video", "mp4", "t2v"))
67+
68+
69+
def _collect_video_urls_from_obj(value: Any, urls: list[str]) -> None:
70+
if isinstance(value, dict):
71+
for key, item in value.items():
72+
if isinstance(item, str) and (key in VIDEO_URL_KEYS or _looks_like_video_url(item)):
73+
if _looks_like_video_url(item):
74+
urls.append(item)
75+
else:
76+
_collect_video_urls_from_obj(item, urls)
77+
return
78+
if isinstance(value, list):
79+
for item in value:
80+
_collect_video_urls_from_obj(item, urls)
81+
82+
83+
def _collect_task_ids_from_obj(value: Any, task_ids: list[str]) -> None:
84+
if isinstance(value, dict):
85+
for key, item in value.items():
86+
if key in TASK_ID_KEYS and isinstance(item, str) and item:
87+
task_ids.append(item)
88+
continue
89+
_collect_task_ids_from_obj(item, task_ids)
90+
return
91+
if isinstance(value, list):
92+
for item in value:
93+
_collect_task_ids_from_obj(item, task_ids)
94+
95+
96+
def _extract_video_urls(text: str) -> list[str]:
97+
urls: list[str] = []
98+
99+
for u in re.findall(r'!\[.*?\]\((https?://[^\s\)]+)\)', text):
100+
if _looks_like_video_url(u):
101+
urls.append(u.rstrip(").,;"))
102+
103+
for u in re.findall(r'"(?:url|video|src|videoUrl|video_url)"\s*:\s*"(https?://[^"]+)"', text):
104+
if _looks_like_video_url(u):
105+
urls.append(u)
106+
107+
video_pattern = r'https?://[^\s"<>]+\.(?:mp4|webm|mov|m3u8)(?:[^\s"<>]*)'
108+
for u in re.findall(video_pattern, text, re.IGNORECASE):
109+
urls.append(u.rstrip(".,;)\"'>"))
110+
111+
for match in re.finditer(r"[\{\[]", text or ""):
112+
try:
113+
obj, _ = json.JSONDecoder().raw_decode(text[match.start():])
114+
except Exception:
115+
continue
116+
_collect_video_urls_from_obj(obj, urls)
117+
118+
seen: set[str] = set()
119+
result: list[str] = []
120+
for u in urls:
121+
if u not in seen:
122+
seen.add(u)
123+
result.append(u)
124+
return result
125+
126+
127+
def _extract_task_ids(text: str) -> list[str]:
128+
task_ids: list[str] = []
129+
for match in re.finditer(r"[\{\[]", text or ""):
130+
try:
131+
obj, _ = json.JSONDecoder().raw_decode(text[match.start():])
132+
except Exception:
133+
continue
134+
_collect_task_ids_from_obj(obj, task_ids)
135+
136+
seen: set[str] = set()
137+
result: list[str] = []
138+
for task_id in task_ids:
139+
if task_id not in seen:
140+
seen.add(task_id)
141+
result.append(task_id)
142+
return result
143+
144+
145+
def _resolve_video_model(requested: str | None) -> str:
146+
from backend.core.config import resolve_model
147+
from backend.services.model_modes import parse_model_mode
148+
149+
if not requested:
150+
return DEFAULT_VIDEO_MODEL
151+
aliased = VIDEO_MODEL_MAP.get(str(requested).strip(), str(requested).strip())
152+
mode = parse_model_mode(aliased, default_model=DEFAULT_VIDEO_MODEL)
153+
return resolve_model(mode.base_model or DEFAULT_VIDEO_MODEL)
154+
155+
156+
def _get_token(request: Request) -> str:
157+
auth = request.headers.get("Authorization", "")
158+
if auth.startswith("Bearer "):
159+
return auth[7:].strip()
160+
return request.headers.get("x-api-key", "").strip()
161+
162+
163+
def _coerce_duration(value: Any) -> int:
164+
try:
165+
duration = int(value)
166+
except (TypeError, ValueError):
167+
duration = 5
168+
return min(max(duration, 1), 10)
169+
170+
171+
def _build_video_prompt(prompt: str, *, size: str, ratio: str, duration: int) -> str:
172+
return (
173+
f"{prompt}\n\n"
174+
f"视频要求:生成 {duration} 秒视频,宽高比 {ratio},参考画面尺寸 {size}。"
175+
)
176+
177+
178+
async def _poll_video_task(client: QwenClient, token: str, task_id: str, *, timeout_seconds: int = 420) -> str:
179+
started = time.monotonic()
180+
interval = 10.0
181+
snapshots: list[str] = []
182+
last_status = ""
183+
184+
while time.monotonic() - started < timeout_seconds:
185+
res = await client.get_vision_task_status(token, task_id, timeout=30.0)
186+
body_text = str(res.get("body") or "")
187+
snapshots.append(body_text)
188+
189+
if int(res.get("status") or 0) != 200:
190+
log.warning("[T2V] 任务状态查询 HTTP %s task_id=%s body=%r", res.get("status"), task_id, body_text[:300])
191+
await asyncio.sleep(interval)
192+
continue
193+
194+
try:
195+
obj = json.loads(body_text)
196+
except Exception:
197+
obj = {}
198+
data = obj.get("data") if isinstance(obj, dict) and isinstance(obj.get("data"), dict) else {}
199+
status = str(
200+
(obj.get("task_status") if isinstance(obj, dict) else None)
201+
or (obj.get("status") if isinstance(obj, dict) else None)
202+
or data.get("task_status")
203+
or data.get("status")
204+
or ""
205+
).lower()
206+
last_status = status or last_status
207+
208+
if status in VIDEO_SUCCESS_STATUSES:
209+
log.info("[T2V] 视频任务完成 task_id=%s elapsed=%.1fs", task_id, time.monotonic() - started)
210+
return "\n".join(snapshots)
211+
if status and status not in VIDEO_RUNNING_STATUSES:
212+
raise RuntimeError(f"Video task failed status={status} body={body_text[:500]}")
213+
if not status:
214+
log.info("[T2V] 任务状态未识别 task_id=%s body=%r", task_id, body_text[:300])
215+
216+
await asyncio.sleep(interval)
217+
218+
raise RuntimeError(f"Video task timed out task_id={task_id} last_status={last_status or '-'}")
219+
220+
221+
async def _collect_chat_detail_text(client: QwenClient, token: str, chat_id: str) -> str:
222+
res = await client.get_chat_detail(token, chat_id, timeout=30.0)
223+
if int(res.get("status") or 0) != 200:
224+
return ""
225+
return str(res.get("body") or "")
226+
227+
228+
async def _create_video_with_account(
229+
client: QwenClient,
230+
token: str,
231+
*,
232+
model: str,
233+
prompt_text: str,
234+
video_options: dict,
235+
) -> tuple[str, list[str], str]:
236+
chat_id = await client.create_chat(token, model, chat_type="t2v", use_prewarmed=False)
237+
payload = client._build_payload(
238+
chat_id,
239+
model,
240+
prompt_text,
241+
has_custom_tools=False,
242+
chat_type="t2v",
243+
image_options=video_options,
244+
)
245+
payload["stream"] = False
246+
247+
res = await client.post_chat_completion_once(token, chat_id, payload, timeout=90.0)
248+
body_text = str(res.get("body") or "")
249+
if int(res.get("status") or 0) != 200:
250+
raise RuntimeError(f"video completion HTTP {res.get('status')}: {body_text[:500]}")
251+
252+
answer_text = body_text
253+
upstream_failure = _extract_upstream_failure(answer_text)
254+
if upstream_failure:
255+
raise RuntimeError(upstream_failure)
256+
257+
video_urls = _extract_video_urls(answer_text)
258+
task_ids = _extract_task_ids(answer_text)
259+
log.info("[T2V] 非流式响应 chat_id=%s task_ids=%s video_urls=%s body_tail=%r", chat_id, task_ids, len(video_urls), body_text[-500:])
260+
261+
if not video_urls and task_ids:
262+
answer_text += "\n" + await _poll_video_task(client, token, task_ids[0])
263+
video_urls = _extract_video_urls(answer_text)
264+
265+
if not video_urls:
266+
detail_text = await _collect_chat_detail_text(client, token, chat_id)
267+
if detail_text:
268+
answer_text += "\n" + detail_text
269+
video_urls = _extract_video_urls(answer_text)
270+
271+
return chat_id, video_urls, answer_text
272+
273+
274+
@router.post("/v1/videos/generations")
275+
@router.post("/videos/generations")
276+
async def create_video(request: Request):
277+
from backend.core.config import API_KEYS, settings
278+
279+
client: QwenClient = request.app.state.qwen_client
280+
281+
token = _get_token(request)
282+
if API_KEYS:
283+
if token != settings.ADMIN_KEY and token not in API_KEYS:
284+
raise HTTPException(status_code=401, detail="Invalid API Key")
285+
286+
try:
287+
body = await request.json()
288+
except Exception:
289+
raise HTTPException(400, "Invalid JSON body")
290+
291+
prompt: str = str(body.get("prompt", "")).strip()
292+
if not prompt:
293+
raise HTTPException(400, "prompt is required")
294+
295+
n = min(max(int(body.get("n", 1)), 1), 2)
296+
model = _resolve_video_model(body.get("model"))
297+
duration = _coerce_duration(body.get("duration"))
298+
size, ratio, width, height = _normalize_image_size(
299+
body.get("size") or body.get("ratio") or body.get("aspect_ratio")
300+
)
301+
video_options = {"size": size, "ratio": ratio, "width": width, "height": height, "duration": duration}
302+
303+
log.info("[T2V] model=%s n=%s size=%s ratio=%s duration=%ss prompt=%r", model, n, size, ratio, duration, prompt[:80])
304+
305+
prompt_text = _build_video_prompt(prompt, size=size, ratio=ratio, duration=duration)
306+
exclude: set[str] = set()
307+
last_error: str | None = None
308+
309+
for attempt in range(max(1, int(settings.MAX_RETRIES))):
310+
acc = None
311+
chat_id = None
312+
try:
313+
acc = await client.account_pool.acquire_wait(timeout=60, exclude=exclude)
314+
if acc is None:
315+
raise RuntimeError("No available accounts in pool (all busy or rate limited)")
316+
317+
log.info("[T2V] 使用账号=%s 第%s/%s次", acc.email, attempt + 1, settings.MAX_RETRIES)
318+
chat_id, video_urls, answer_text = await _create_video_with_account(
319+
client,
320+
acc.token,
321+
model=model,
322+
prompt_text=prompt_text,
323+
video_options=video_options,
324+
)
325+
326+
log.info("[T2V] 提取到 %s 个视频 URL chat_id=%s answer_tail=%r", len(video_urls), chat_id, answer_text[-500:])
327+
if not video_urls:
328+
raise RuntimeError(f"Video generation produced no video URL (chat_id={chat_id})")
329+
330+
data = [
331+
{
332+
"url": url,
333+
"revised_prompt": prompt,
334+
"size": size,
335+
"ratio": ratio,
336+
"width": width,
337+
"height": height,
338+
"duration": duration,
339+
}
340+
for url in video_urls[:n]
341+
]
342+
return JSONResponse({"created": int(time.time()), "data": data})
343+
344+
except Exception as e:
345+
last_error = str(e)
346+
if acc is not None:
347+
exclude.add(acc.email)
348+
log.warning("[T2V] 尝试失败 第%s/%s次 账号=%s 错误=%s", attempt + 1, settings.MAX_RETRIES, getattr(acc, "email", "-"), last_error)
349+
finally:
350+
if acc is not None:
351+
client.account_pool.release(acc)
352+
if chat_id:
353+
client.delete_chat_background(acc.token, chat_id, source="video_cleanup")
354+
355+
detail = f"All {settings.MAX_RETRIES} attempts failed. Last error: {last_error or 'unknown'}"
356+
log.error("[T2V] 生成失败: %s", detail)
357+
if "Qwen upstream error" in detail:
358+
raise HTTPException(status_code=502, detail=detail)
359+
raise HTTPException(status_code=500, detail=detail)

0 commit comments

Comments
 (0)