Skip to content

Commit 43dfd21

Browse files
committed
refactor(asr): centralize env init and runtime lifecycle
- unify main process env setup/logging - add ASR runtime manager for preload/ready/reload - log python worker effective config on startup
1 parent ee4f73f commit 43dfd21

20 files changed

Lines changed: 1662 additions & 1185 deletions

desktop/backend/asr/asr_funasr_worker.py

Lines changed: 47 additions & 425 deletions
Large diffs are not rendered by default.

desktop/backend/asr/lib/cache.py

Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
import os
2+
import sys
3+
from typing import Optional
4+
5+
6+
def resolve_local_model_path(model_id: str, require_offline_mode: bool = True, offline_mode: bool = False) -> Optional[str]:
7+
"""
8+
解析本地模型路径。检查 MODELSCOPE_CACHE、ASR_CACHE_DIR 和默认 ~/.cache/modelscope。
9+
require_offline_mode=True 时仅在离线模式下返回结果(与旧逻辑兼容)。
10+
"""
11+
if require_offline_mode and not offline_mode:
12+
return None
13+
14+
ms_cache = os.environ.get("MODELSCOPE_CACHE")
15+
asr_cache = os.environ.get("ASR_CACHE_DIR")
16+
home_cache = os.path.join(os.path.expanduser("~"), ".cache", "modelscope")
17+
18+
cache_bases = []
19+
for c in [ms_cache, asr_cache, home_cache]:
20+
if c:
21+
cache_bases.append(c)
22+
# 如果路径以 hub 结尾,也检查父目录
23+
if os.path.basename(c).lower() == "hub":
24+
cache_bases.append(os.path.dirname(c))
25+
26+
cache_bases = list(dict.fromkeys(cache_bases))
27+
28+
# ModelScope 同一个模型在不同版本可能落在 damo/ 或 iic/ 命名空间
29+
# 这里做一次别名尝试,避免缓存存在却因前缀不同找不到
30+
alt_ids = [model_id]
31+
if "/" in model_id:
32+
ns, rest = model_id.split("/", 1)
33+
if ns == "damo":
34+
alt_ids.append(f"iic/{rest}")
35+
elif ns == "iic":
36+
alt_ids.append(f"damo/{rest}")
37+
38+
for cache_dir in cache_bases:
39+
if not cache_dir:
40+
continue
41+
42+
for mid in alt_ids:
43+
candidates = [
44+
os.path.join(cache_dir, "hub", "models", mid),
45+
os.path.join(cache_dir, mid),
46+
os.path.join(cache_dir, "models", mid),
47+
os.path.join(cache_dir, "hub", mid),
48+
os.path.join(cache_dir, "modelscope", "hub", "models", mid),
49+
os.path.join(cache_dir, "modelscope", "models", mid),
50+
os.path.join(cache_dir, "modelscope", mid),
51+
]
52+
53+
for candidate in candidates:
54+
if os.path.isdir(candidate):
55+
try:
56+
files = os.listdir(candidate)
57+
if any(f.endswith(('.onnx', '.bin', '.json', '.yaml')) for f in files):
58+
sys.stderr.write(f"[FunASR Worker] Found local model: {candidate}\n")
59+
sys.stderr.flush()
60+
return candidate
61+
except Exception:
62+
continue
63+
64+
return None
65+
66+
67+
def ensure_vad_compatibility(model_dir: str):
68+
"""
69+
修复 funasr_onnx VAD 模型的兼容性问题:
70+
- config.yaml -> vad.yaml
71+
- am.mvn -> vad.mvn
72+
- 缺少 vad_post_conf 时,从 model_conf 复制(同时写入 config.yaml 与 vad.yaml)
73+
"""
74+
if not model_dir or not os.path.exists(model_dir):
75+
return
76+
77+
import shutil
78+
import yaml
79+
80+
vad_yaml = os.path.join(model_dir, "vad.yaml")
81+
config_yaml = os.path.join(model_dir, "config.yaml")
82+
83+
if not os.path.exists(vad_yaml) and os.path.exists(config_yaml):
84+
try:
85+
sys.stderr.write(f"[FunASR Worker] Compatibility fix: copying config.yaml to vad.yaml...\n")
86+
shutil.copy2(config_yaml, vad_yaml)
87+
except Exception as e:
88+
sys.stderr.write(f"[FunASR Worker] Warning: failed to copy vad.yaml: {e}\n")
89+
90+
vad_mvn = os.path.join(model_dir, "vad.mvn")
91+
am_mvn = os.path.join(model_dir, "am.mvn")
92+
93+
if not os.path.exists(vad_mvn) and os.path.exists(am_mvn):
94+
try:
95+
sys.stderr.write(f"[FunASR Worker] Compatibility fix: copying am.mvn to vad.mvn...\n")
96+
shutil.copy2(am_mvn, vad_mvn)
97+
except Exception as e:
98+
sys.stderr.write(f"[FunASR Worker] Warning: failed to copy vad.mvn: {e}\n")
99+
100+
config = None
101+
if os.path.exists(config_yaml):
102+
try:
103+
with open(config_yaml, "r", encoding="utf-8") as f:
104+
config = yaml.safe_load(f)
105+
106+
if config and "model_conf" in config and "vad_post_conf" not in config:
107+
config["vad_post_conf"] = config["model_conf"].copy()
108+
with open(config_yaml, "w", encoding="utf-8") as f:
109+
yaml.dump(config, f, default_flow_style=False, allow_unicode=True)
110+
sys.stderr.write(f"[FunASR Worker] Compatibility fix: added vad_post_conf to config.yaml\n")
111+
sys.stderr.flush()
112+
except Exception as e:
113+
sys.stderr.write(f"[FunASR Worker] Warning: failed to fix vad_post_conf in config.yaml: {e}\n")
114+
115+
# funasr_onnx 某些版本读取 vad.yaml,此文件也需要包含 vad_post_conf
116+
if os.path.exists(vad_yaml):
117+
try:
118+
with open(vad_yaml, "r", encoding="utf-8") as f:
119+
vad_conf = yaml.safe_load(f) or {}
120+
if isinstance(vad_conf, dict) and "vad_post_conf" not in vad_conf:
121+
source_conf = None
122+
if config and "vad_post_conf" in config:
123+
source_conf = config["vad_post_conf"]
124+
elif config and "model_conf" in config:
125+
source_conf = config["model_conf"]
126+
if source_conf:
127+
vad_conf["vad_post_conf"] = source_conf.copy()
128+
with open(vad_yaml, "w", encoding="utf-8") as f:
129+
yaml.dump(vad_conf, f, default_flow_style=False, allow_unicode=True)
130+
sys.stderr.write(f"[FunASR Worker] Compatibility fix: added vad_post_conf to vad.yaml\n")
131+
sys.stderr.flush()
132+
except Exception as e:
133+
sys.stderr.write(f"[FunASR Worker] Warning: failed to fix vad_post_conf in vad.yaml: {e}\n")
134+
135+
136+
def ensure_asr_compatibility(model_dir: str):
137+
"""
138+
修复 ASR/Punc 模型配置文件兼容性问题:
139+
- token_list: 从 tokens.json 补入 config.yaml
140+
- punc_list: 如存在 model_conf.punc_list 则提到顶层
141+
"""
142+
if not model_dir or not os.path.exists(model_dir):
143+
return
144+
145+
config_yaml = os.path.join(model_dir, "config.yaml")
146+
tokens_json = os.path.join(model_dir, "tokens.json")
147+
148+
if not os.path.exists(config_yaml):
149+
return
150+
151+
try:
152+
import yaml
153+
import json as json_module
154+
155+
with open(config_yaml, "r", encoding="utf-8") as f:
156+
config = yaml.safe_load(f)
157+
158+
if not config:
159+
return
160+
161+
modified = False
162+
163+
if "token_list" not in config and os.path.exists(tokens_json):
164+
with open(tokens_json, "r", encoding="utf-8") as f:
165+
tokens = json_module.load(f)
166+
if isinstance(tokens, list):
167+
config["token_list"] = tokens
168+
modified = True
169+
sys.stderr.write(f"[FunASR Worker] Compatibility fix: added token_list ({len(tokens)} tokens)\n")
170+
171+
if "punc_list" not in config and "model_conf" in config:
172+
model_conf = config.get("model_conf", {})
173+
if isinstance(model_conf, dict) and "punc_list" in model_conf:
174+
config["punc_list"] = model_conf["punc_list"].copy()
175+
modified = True
176+
sys.stderr.write(f"[FunASR Worker] Compatibility fix: added top-level punc_list\n")
177+
178+
if modified:
179+
with open(config_yaml, "w", encoding="utf-8") as f:
180+
yaml.dump(config, f, default_flow_style=False, allow_unicode=True)
181+
sys.stderr.write(f"[FunASR Worker] Config file updated: {config_yaml}\n")
182+
sys.stderr.flush()
183+
184+
except ImportError:
185+
sys.stderr.write(f"[FunASR Worker] Warning: yaml/json library not available for config fix\n")
186+
except Exception as e:
187+
sys.stderr.write(f"[FunASR Worker] Warning: failed to fix ASR config: {e}\n")
188+
189+
190+
def ensure_punc_yaml(model_dir: str):
191+
"""
192+
某些版本的 funasr_onnx 标点模型会读取 punc.yaml;若缺失则从 config.yaml 复制兜底。
193+
"""
194+
if not model_dir or not os.path.exists(model_dir):
195+
return
196+
config_yaml = os.path.join(model_dir, "config.yaml")
197+
punc_yaml = os.path.join(model_dir, "punc.yaml")
198+
if os.path.exists(config_yaml) and not os.path.exists(punc_yaml):
199+
try:
200+
import shutil
201+
shutil.copy2(config_yaml, punc_yaml)
202+
sys.stderr.write(f"[FunASR Worker] Compatibility fix: copying config.yaml to punc.yaml...\n")
203+
except Exception as e:
204+
sys.stderr.write(f"[FunASR Worker] Warning: failed to copy punc.yaml: {e}\n")
205+
206+
207+
def try_fix_local_model_dir(model_id: str, model_type: str, offline_mode: bool = False):
208+
"""
209+
尝试查找本地模型目录并做一次兼容性修复,即使当前不是离线模式。
210+
"""
211+
path = resolve_local_model_path(model_id, require_offline_mode=False, offline_mode=offline_mode)
212+
if not path:
213+
return
214+
if model_type == "vad":
215+
ensure_vad_compatibility(path)
216+
elif model_type == "punc":
217+
ensure_asr_compatibility(path)
218+
ensure_punc_yaml(path)
219+
else:
220+
ensure_asr_compatibility(path)

desktop/backend/asr/lib/config.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
import os
2+
import sys
3+
from dataclasses import dataclass
4+
from typing import Optional
5+
6+
from .device import env_device_config
7+
8+
9+
@dataclass
10+
class ASRRuntimeConfig:
11+
worker_id: str
12+
offline_mode: bool
13+
sample_rate: int
14+
chunk_ms: int
15+
silence_threshold_chunks: int
16+
silence_buffer_keep: int
17+
min_sentence_chars: int
18+
asr_device: str
19+
asr_device_id: int
20+
modelscope_cache: Optional[str]
21+
asr_cache_dir: Optional[str]
22+
asr_model: str
23+
24+
25+
26+
def _get_int(name: str, default: int) -> int:
27+
try:
28+
return int(os.environ.get(name, str(default)))
29+
except Exception:
30+
return default
31+
32+
33+
def _get_bool(name: str) -> bool:
34+
return os.environ.get(name, "").lower() in ("1", "true", "yes")
35+
36+
37+
def load_runtime_config() -> ASRRuntimeConfig:
38+
asr_device, asr_device_id = env_device_config()
39+
return ASRRuntimeConfig(
40+
worker_id=os.environ.get("FUNASR_WORKER_ID", "default"),
41+
offline_mode=_get_bool("MODELSCOPE_OFFLINE"),
42+
sample_rate=_get_int("ASR_SAMPLE_RATE", 16000),
43+
chunk_ms=_get_int("ASR_CHUNK_MS", 200),
44+
silence_threshold_chunks=_get_int("ASR_SILENCE_CHUNKS", 3),
45+
silence_buffer_keep=_get_int("ASR_SILENCE_BUFFER_KEEP", 2),
46+
min_sentence_chars=_get_int("MIN_SENTENCE_CHARS", 2),
47+
asr_device=asr_device,
48+
asr_device_id=asr_device_id,
49+
modelscope_cache=os.environ.get("MODELSCOPE_CACHE") or os.environ.get("MODELSCOPE_CACHE_HOME"),
50+
asr_cache_dir=os.environ.get("ASR_CACHE_DIR"),
51+
asr_model=os.environ.get("ASR_MODEL", "funasr-paraformer"),
52+
)
53+
54+
55+
def log_runtime_config(config: ASRRuntimeConfig, stream=sys.stderr) -> None:
56+
try:
57+
stream.write(
58+
"[FunASR Worker] Effective config: "
59+
+ str({
60+
"worker_id": config.worker_id,
61+
"offline_mode": config.offline_mode,
62+
"sample_rate": config.sample_rate,
63+
"chunk_ms": config.chunk_ms,
64+
"silence_threshold_chunks": config.silence_threshold_chunks,
65+
"silence_buffer_keep": config.silence_buffer_keep,
66+
"min_sentence_chars": config.min_sentence_chars,
67+
"asr_device": config.asr_device,
68+
"asr_device_id": config.asr_device_id,
69+
"modelscope_cache": config.modelscope_cache or "",
70+
"asr_cache_dir": config.asr_cache_dir or "",
71+
"asr_model": config.asr_model,
72+
})
73+
+ "\n"
74+
)
75+
stream.flush()
76+
except Exception:
77+
pass

0 commit comments

Comments
 (0)