Skip to content

Commit 8ebae28

Browse files
refactor(backend): extract video generator helpers
1 parent e8e2555 commit 8ebae28

3 files changed

Lines changed: 270 additions & 340 deletions

File tree

packages/backend/app/node/video/config/generator.py

Lines changed: 9 additions & 340 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,14 @@
44
2. Scene Designer: Generate complete config (without timing)
55
"""
66

7+
import functools
78
import json
89
import requests
910
import time
10-
import functools
11-
from typing import Dict, List, Any, Tuple
1211
from concurrent.futures import ThreadPoolExecutor, as_completed
13-
import pandas as pd
12+
from typing import Dict, List, Any, Tuple
1413

15-
# Force all print statements to flush immediately for real-time logging
16-
print = functools.partial(print, flush=True)
14+
import pandas as pd
1715

1816
from app.core.config import settings
1917

@@ -32,81 +30,16 @@
3230
format_scene_planner_prompt,
3331
format_scene_animation_generator_prompt
3432
)
33+
from .response_parser import parse_llm_json_response
34+
from .retry import calculate_retry_wait_time, should_retry_on_error
35+
36+
# Force all print statements to flush immediately for real-time logging
37+
print = functools.partial(print, flush=True)
3538

3639

3740
# MAX_TOKENS: from env/config LLM_MAX_TOKENS
3841
MAX_TOKENS = settings.LLM_MAX_TOKENS
3942

40-
# ============================================================================
41-
# Retry Helper Functions
42-
# ============================================================================
43-
44-
def should_retry_on_error(error_msg: str, attempt: int, elapsed_time: float, max_general_retries: int = 10) -> tuple[bool, str]:
45-
"""
46-
判断是否应该根据错误类型重试
47-
48-
Args:
49-
error_msg: 错误信息
50-
attempt: 当前尝试次数
51-
elapsed_time: 已经过的时间(秒)
52-
max_general_retries: 普通错误的最大重试次数
53-
54-
Returns:
55-
(should_retry, reason): 是否应该重试和原因
56-
"""
57-
error_lower = str(error_msg).lower()
58-
59-
# 永久性错误:不应重试
60-
if any(keyword in error_lower for keyword in ['余额不足', 'insufficient', 'quota exceeded', 'no credit']):
61-
return False, "余额不足(永久性错误)"
62-
63-
if any(keyword in error_lower for keyword in ['401', '403', 'unauthorized', 'forbidden']):
64-
return False, "认证失败(永久性错误)"
65-
66-
if '400' in error_lower and 'format' in error_lower:
67-
return False, "请求格式错误(永久性错误)"
68-
69-
# Context length exceeded:不应重试(prompt 太长,重试只会浪费 token)
70-
if any(keyword in error_lower for keyword in ['context length', 'maximum context']):
71-
if 'exceeded' in error_lower or 'too long' in error_lower or '128000' in error_lower:
72-
return False, "Context length 超出限制(永久性错误,重试会浪费 token)"
73-
74-
# 429 Rate Limit:允许长时间重试(最多30分钟)
75-
if any(keyword in error_lower for keyword in ['429', 'rate limit', 'throttling', 'too many requests']):
76-
max_time = 30 * 60 # 30分钟
77-
if elapsed_time < max_time:
78-
return True, f"Rate Limit(允许重试至{max_time/60:.0f}分钟)"
79-
return False, f"Rate Limit 超过最大时间限制({max_time/60:.0f}分钟)"
80-
81-
# 其他临时性错误:有限重试
82-
if attempt < max_general_retries:
83-
return True, f"临时性错误(最多{max_general_retries}次)"
84-
85-
return False, f"已达到最大重试次数({max_general_retries}次)"
86-
87-
88-
def calculate_retry_wait_time(error_msg: str, attempt: int) -> int:
89-
"""
90-
根据错误类型和尝试次数计算等待时间(指数退避)
91-
92-
Args:
93-
error_msg: 错误信息
94-
attempt: 当前尝试次数
95-
96-
Returns:
97-
等待时间(秒)
98-
"""
99-
error_lower = str(error_msg).lower()
100-
101-
# 429 Rate Limit:使用指数退避,最多60秒
102-
if any(keyword in error_lower for keyword in ['429', 'rate limit', 'throttling', 'too many requests']):
103-
wait_time = min(2 ** attempt, 60) # 2, 4, 8, 16, 32, 60, 60...
104-
return wait_time
105-
106-
# 其他错误:固定2秒
107-
return 2
108-
109-
11043
# ============================================================================
11144
# Custom Exceptions for Fatal Errors
11245
# ============================================================================
@@ -426,272 +359,8 @@ def call_with_json_mode(
426359
Returns:
427360
Tuple[Dict, Dict]: (parsed_json, usage) where usage contains token information
428361
"""
429-
import re
430-
431362
response, usage = self.call(prompt, temperature, max_tokens, verbose=verbose)
432-
433-
def _format_response_for_debug(raw: str, head: int = 500, tail: int = 300) -> str:
434-
"""Build a helpful diagnostic string for bad JSON responses."""
435-
if raw is None:
436-
return f"model={self.model} raw=None"
437-
raw_len = len(raw)
438-
stripped = raw.strip()
439-
stripped_len = len(stripped)
440-
if stripped_len == 0:
441-
return f"model={self.model} raw_len={raw_len} stripped_len=0 (empty/whitespace)"
442-
head_txt = stripped[:head]
443-
tail_txt = stripped[-tail:] if stripped_len > tail else stripped
444-
return (
445-
f"model={self.model} raw_len={raw_len} stripped_len={stripped_len}\n"
446-
f"--- response_head ---\n{head_txt}\n"
447-
f"--- response_tail ---\n{tail_txt}\n"
448-
)
449-
450-
def _clean_json_control_chars(json_str: str) -> str:
451-
"""清理 JSON 字符串中的无效控制字符
452-
453-
LLM 有时会返回包含未转义控制字符的 JSON(如真实的换行符而非 \\n)
454-
这会导致 JSONDecodeError: Invalid control character
455-
456-
Args:
457-
json_str: 原始 JSON 字符串
458-
459-
Returns:
460-
清理后的 JSON 字符串
461-
"""
462-
# 转义常见的控制字符
463-
# 注意:只替换真实的控制字符,不影响已经转义的 \n, \t 等
464-
json_str = json_str.replace('\n', '\\n') # 真实换行符 → \\n 转义
465-
json_str = json_str.replace('\r', '\\r') # 回车符 → \\r 转义
466-
json_str = json_str.replace('\t', '\\t') # 制表符 → \\t 转义
467-
json_str = json_str.replace('\b', '\\b') # 退格符 → \\b 转义
468-
json_str = json_str.replace('\f', '\\f') # 换页符 → \\f 转义
469-
470-
# 移除其他控制字符(ASCII 0-31,除了已处理的)
471-
# \x00-\x08: NUL to BS (除了 \b 已处理)
472-
# \x0b-\x0c: VT, FF (除了 \f 已处理)
473-
# \x0e-\x1f: SO to US
474-
json_str = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f]', '', json_str)
475-
476-
return json_str
477-
478-
# Try to extract JSON
479-
json_str = None
480-
try:
481-
parsed_json = json.loads(response)
482-
483-
# 自动修复:如果返回的是单元素数组,提取第一个元素
484-
if isinstance(parsed_json, list) and len(parsed_json) == 1:
485-
if verbose:
486-
print(f" ⚠️ LLM returned array instead of object, auto-extracting first element")
487-
parsed_json = parsed_json[0]
488-
489-
return parsed_json, usage
490-
except json.JSONDecodeError:
491-
# Try to extract content from ```json ... ```
492-
if "```json" in response:
493-
start = response.find("```json") + 7
494-
end = response.find("```", start)
495-
json_str = response[start:end].strip()
496-
elif "```" in response:
497-
start = response.find("```") + 3
498-
end = response.find("```", start)
499-
json_str = response[start:end].strip()
500-
else:
501-
# Try to find JSON object in response
502-
# Look for { ... } pattern (non-greedy to avoid matching too much)
503-
# Use balanced braces matching
504-
json_str = None
505-
brace_count = 0
506-
start_idx = -1
507-
for i, char in enumerate(response):
508-
if char == '{':
509-
if start_idx == -1:
510-
start_idx = i
511-
brace_count += 1
512-
elif char == '}':
513-
brace_count -= 1
514-
if brace_count == 0 and start_idx != -1:
515-
json_str = response[start_idx:i+1]
516-
break
517-
518-
# Fallback: use regex if balanced matching failed
519-
if not json_str:
520-
match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', response, re.DOTALL)
521-
if match:
522-
json_str = match.group(0)
523-
else:
524-
raise ValueError(
525-
"Cannot parse JSON response (no JSON object found).\n"
526-
+ _format_response_for_debug(response)
527-
)
528-
529-
# Clean and parse JSON string
530-
if json_str:
531-
try:
532-
# 🔧 Step 1: Remove trailing commas before closing braces/brackets
533-
json_str_cleaned = re.sub(r',\s*}', '}', json_str)
534-
json_str_cleaned = re.sub(r',\s*]', ']', json_str_cleaned)
535-
536-
# 🔧 Step 2: Remove comments (// and /* */)
537-
json_str_cleaned = re.sub(r'//.*?$', '', json_str_cleaned, flags=re.MULTILINE)
538-
json_str_cleaned = re.sub(r'/\*.*?\*/', '', json_str_cleaned, flags=re.DOTALL)
539-
540-
# 🔧 Step 3: Try to parse directly first (don't touch control chars unless necessary)
541-
try:
542-
parsed_json = json.loads(json_str_cleaned)
543-
return parsed_json, usage
544-
except json.JSONDecodeError as e:
545-
# Only if we get "Invalid control character" error, then clean them
546-
if "Invalid control character" in str(e) or "control character" in str(e):
547-
json_str_cleaned = _clean_json_control_chars(json_str_cleaned)
548-
parsed_json = json.loads(json_str_cleaned)
549-
return parsed_json, usage
550-
else:
551-
# Re-raise for other JSON errors
552-
raise
553-
except json.JSONDecodeError as e:
554-
# Try to fix common JSON errors
555-
try:
556-
json_str_fixed = json_str
557-
558-
# 🔧 Fix: number followed by quote (missing comma) - most common issue
559-
# Pattern: number" -> number,"
560-
# Handle both with and without whitespace
561-
try:
562-
json_str_fixed = re.sub(r'(\d+)\s*"', r'\1, "', json_str_fixed)
563-
except re.error:
564-
pass # 正则失败,跳过这个修复
565-
566-
# Fix: number followed by newline and quote
567-
try:
568-
json_str_fixed = re.sub(r'(\d+)\s*\n\s*"', r'\1,\n"', json_str_fixed)
569-
except re.error:
570-
pass
571-
572-
# Fix: quote followed by number (missing comma)
573-
try:
574-
json_str_fixed = re.sub(r'"\s*(\d+)', r'", \1', json_str_fixed)
575-
except re.error:
576-
pass
577-
578-
# Fix: boolean/null followed by quote (missing comma)
579-
try:
580-
json_str_fixed = re.sub(r'(true|false|null)\s*"', r'\1, "', json_str_fixed)
581-
except re.error:
582-
pass
583-
584-
# Fix: closing brace/bracket followed by quote (missing comma)
585-
try:
586-
json_str_fixed = re.sub(r'([}\])\s*"', r'\1, "', json_str_fixed)
587-
except re.error:
588-
pass
589-
590-
# Fix: number followed by quote and comma (like "2332",)
591-
try:
592-
json_str_fixed = re.sub(r'(\d+)\s*",', r'\1,', json_str_fixed)
593-
except re.error:
594-
pass
595-
596-
# Fix: number followed by quote and newline (array/object item)
597-
try:
598-
json_str_fixed = re.sub(r'(\d+)\s*"\s*\n', r'\1,\n', json_str_fixed)
599-
except re.error:
600-
pass
601-
602-
# Remove trailing commas again after fixes
603-
try:
604-
json_str_fixed = re.sub(r',\s*}', '}', json_str_fixed)
605-
json_str_fixed = re.sub(r',\s*]', ']', json_str_fixed)
606-
except re.error:
607-
pass
608-
609-
parsed_json = json.loads(json_str_fixed)
610-
return parsed_json, usage
611-
except (json.JSONDecodeError, ValueError) as e2:
612-
# Try to fix unterminated string errors
613-
if "Unterminated string" in str(e2) or "Unterminated string" in str(e):
614-
try:
615-
error_pos = e2.pos if hasattr(e2, 'pos') else (e.pos if hasattr(e, 'pos') else 0)
616-
617-
# Find the last unclosed quote before error position
618-
# Look backwards from error_pos to find the opening quote
619-
quote_pos = -1
620-
621-
# Scan backwards to find the opening quote
622-
for i in range(error_pos - 1, max(0, error_pos - 200), -1):
623-
char = json_str_fixed[i]
624-
# Check if this quote is escaped
625-
if char == '"':
626-
# Count backslashes before this quote
627-
backslash_count = 0
628-
j = i - 1
629-
while j >= 0 and json_str_fixed[j] == '\\':
630-
backslash_count += 1
631-
j -= 1
632-
# If even number of backslashes, quote is not escaped
633-
if backslash_count % 2 == 0:
634-
quote_pos = i
635-
break
636-
637-
if quote_pos >= 0:
638-
# Found opening quote, now look forward for closing quote or object end
639-
# Look for next unescaped quote or end of object/array
640-
end_pos = len(json_str_fixed)
641-
found_closing = False
642-
643-
# Look forward from error_pos
644-
for i in range(error_pos, min(len(json_str_fixed), error_pos + 500)):
645-
char = json_str_fixed[i]
646-
if char == '"':
647-
# Check if escaped
648-
backslash_count = 0
649-
j = i - 1
650-
while j >= 0 and json_str_fixed[j] == '\\':
651-
backslash_count += 1
652-
j -= 1
653-
if backslash_count % 2 == 0:
654-
# Found closing quote
655-
end_pos = i + 1
656-
found_closing = True
657-
break
658-
elif char in ['}', ']', ',', '\n']:
659-
# If we hit object/array end or comma/newline, try to close the string
660-
# Check if we're in a reasonable position (after a colon or in a value)
661-
if i > quote_pos + 1:
662-
# Try inserting closing quote before this character
663-
end_pos = i
664-
break
665-
666-
if not found_closing and end_pos < len(json_str_fixed):
667-
# Insert closing quote
668-
json_str_fixed = json_str_fixed[:end_pos] + '"' + json_str_fixed[end_pos:]
669-
if verbose:
670-
print(f" 🔧 Fixed unterminated string: inserted closing quote at position {end_pos}")
671-
672-
# Try parsing again
673-
parsed_json = json.loads(json_str_fixed)
674-
return parsed_json, usage
675-
elif found_closing:
676-
# String was actually closed, might be a different issue
677-
pass
678-
except Exception:
679-
# If fix attempt fails, fall through to error message
680-
pass
681-
682-
# If fixes don't work, provide better error message
683-
error_pos = e2.pos if hasattr(e2, 'pos') else (e.pos if hasattr(e, 'pos') else 0)
684-
context_start = max(0, error_pos - 50)
685-
context_end = min(len(json_str), error_pos + 50)
686-
raise ValueError(
687-
f"JSON parse error at position {error_pos}: {e2.msg if hasattr(e2, 'msg') else str(e2)}\n"
688-
f"Context: {json_str[context_start:context_end]}"
689-
)
690-
691-
raise ValueError(
692-
"Cannot parse JSON response.\n"
693-
+ _format_response_for_debug(response)
694-
)
363+
return parse_llm_json_response(response, usage, model=self.model, verbose=verbose)
695364

696365

697366
class SimpleConfigGenerator:

0 commit comments

Comments
 (0)