Skip to content

Commit 76834ea

Browse files
committed
add picoclaw app
1 parent 7a12fe7 commit 76834ea

16 files changed

Lines changed: 2250 additions & 0 deletions

File tree

projects/app_picoclaw/app.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
id: picoclaw
2+
name: PicoClaw
3+
name[zh]: PicoClaw
4+
version: 1.0.0
5+
icon: img/icon.png
6+
author: Sipeed Ltd
7+
desc: PicoClaw
8+
desc[zh]: PicoClaw
9+
files:
10+
app.yaml: app.yaml
11+
asr: asr
12+
img: img
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import importlib
2+
import logging
3+
4+
from .config import load_asr_config
5+
6+
logger = logging.getLogger(__name__)
7+
8+
_BACKEND_REGISTRY: list[tuple[str, str]] = [
9+
("qwen3-asr-flash-realtime", ".qwen_realtime"),
10+
("qwen3-asr-flash", ".qwen"),
11+
("whisper", ".whisper"),
12+
("scribe_v1", ".elevenlabs"),
13+
]
14+
15+
16+
class ASRNotConfiguredError(Exception):
17+
"""Raised when no ASR model is configured."""
18+
19+
20+
def _resolve_backend(model: str) -> str:
21+
"""Return the module path for the given model name."""
22+
for prefix, module_path in _BACKEND_REGISTRY:
23+
if model.startswith(prefix):
24+
return module_path
25+
raise ValueError(
26+
f"No ASR backend registered for model '{model}'. "
27+
f"Known prefixes: {[p for p, _ in _BACKEND_REGISTRY]}"
28+
)
29+
30+
31+
def get_asr_backend(use_cache: bool = True):
32+
prefixes = [p for p, _ in _BACKEND_REGISTRY]
33+
model, api_key = load_asr_config(use_cache=use_cache, prefixes=prefixes)
34+
if not model:
35+
raise ASRNotConfiguredError(
36+
"No ASR model configured."
37+
)
38+
39+
module_path = _resolve_backend(model)
40+
logger.info("ASR routing: model=%s → %s", model, module_path)
41+
42+
mod = importlib.import_module(module_path, package=__name__)
43+
return mod.asr_session
44+
45+
46+
try:
47+
asr_session = get_asr_backend()
48+
except ASRNotConfiguredError:
49+
asr_session = None
50+
51+
__all__ = ["asr_session", "get_asr_backend", "ASRNotConfiguredError"]
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
import logging
2+
import os
3+
from pathlib import Path
4+
5+
logger = logging.getLogger(__name__)
6+
7+
SECURITY_YML_PATH = Path(
8+
os.environ.get("PICOCLAW_SECURITY_YML", "/root/.picoclaw/.security.yml")
9+
)
10+
11+
_cached_config: tuple[str, str] | None = None
12+
13+
14+
def load_asr_config(prefixes: list[str] | None = None, use_cache: bool = True) -> tuple[str, str]:
15+
global _cached_config
16+
if use_cache and _cached_config is not None:
17+
return _cached_config
18+
env_model = os.environ.get("ASR_MODEL", "").strip()
19+
env_key = os.environ.get("DASHSCOPE_API_KEY", "").strip()
20+
21+
if env_model and env_key:
22+
_cached_config = (env_model, env_key)
23+
return _cached_config
24+
25+
# Try .security.yml
26+
yml_result = _load_from_yml(prefixes)
27+
if yml_result is not None:
28+
yml_model, yml_key = yml_result
29+
model = env_model or yml_model
30+
key = env_key or yml_key
31+
if model and key:
32+
_cached_config = (model, key)
33+
return _cached_config
34+
35+
# Fallback
36+
model = env_model or ""
37+
key = env_key or ""
38+
result = (model, key)
39+
if model and key:
40+
_cached_config = result
41+
return result
42+
43+
44+
def _load_from_yml(prefixes: list[str] | None = None) -> tuple[str, str] | None:
45+
try:
46+
if not SECURITY_YML_PATH.exists():
47+
return None
48+
text = SECURITY_YML_PATH.read_text(encoding="utf-8")
49+
return _parse_yml(text, prefixes)
50+
except Exception as exc:
51+
logger.debug("Failed to read %s: %s", SECURITY_YML_PATH, exc)
52+
return None
53+
54+
55+
def _parse_yml(text: str, prefixes: list[str] | None = None) -> tuple[str, str] | None:
56+
"""Extract first model block (matching prefixes) with an api_key.
57+
58+
Expected structure (indent = 2 spaces per level):
59+
<model-name>:0:
60+
api_keys:
61+
- <key>
62+
"""
63+
lines = text.splitlines()
64+
found_model = ""
65+
in_model = False
66+
in_api_keys = False
67+
68+
for raw in lines:
69+
line = raw.rstrip()
70+
stripped = line.strip()
71+
if not stripped or stripped.startswith("#"):
72+
continue
73+
74+
indent = len(line) - len(line.lstrip(" "))
75+
76+
if not in_model:
77+
if indent == 2 and stripped.endswith(":0:"):
78+
candidate = stripped[:-3]
79+
if prefixes is None or any(candidate.startswith(p) for p in prefixes):
80+
found_model = candidate
81+
in_model = True
82+
continue
83+
84+
# Moved to another top-level model block
85+
if indent <= 2 and stripped.endswith(":0:"):
86+
# Check if this new block also matches
87+
candidate = stripped[:-3]
88+
in_model = False
89+
in_api_keys = False
90+
if prefixes is None or any(candidate.startswith(p) for p in prefixes):
91+
found_model = candidate
92+
in_model = True
93+
continue
94+
if indent == 0:
95+
break
96+
97+
if indent == 4 and stripped == "api_keys:":
98+
in_api_keys = True
99+
continue
100+
101+
if in_api_keys:
102+
if indent <= 4:
103+
in_api_keys = False
104+
continue
105+
if stripped.startswith("- "):
106+
key = stripped[2:].strip().strip('"').strip("'")
107+
if key:
108+
return found_model, key
109+
110+
return None
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import asyncio
2+
import io
3+
import logging
4+
import wave
5+
6+
import numpy as np
7+
import requests
8+
9+
from .config import load_asr_config
10+
11+
logger = logging.getLogger(__name__)
12+
13+
API_URL = "https://api.elevenlabs.io/v1/speech-to-text"
14+
15+
16+
def _pcm_to_wav_bytes(pcm_int16: np.ndarray, sample_rate: int = 16000) -> bytes:
17+
"""Convert int16 PCM samples to WAV file bytes."""
18+
buf = io.BytesIO()
19+
with wave.open(buf, "wb") as wf:
20+
wf.setnchannels(1)
21+
wf.setsampwidth(2) # 16-bit
22+
wf.setframerate(sample_rate)
23+
wf.writeframes(pcm_int16.tobytes())
24+
return buf.getvalue()
25+
26+
27+
async def asr_session(pcm_data: np.ndarray) -> str:
28+
if len(pcm_data) < 3200:
29+
logger.info("Audio too short (%d samples), skipping recognition", len(pcm_data))
30+
return ""
31+
32+
# Convert normalized float32 PCM to int16 PCM
33+
pcm_int16 = (pcm_data * 32768).clip(-32768, 32767).astype(np.int16)
34+
35+
model, api_key = load_asr_config()
36+
if not api_key:
37+
logger.error("API key not found")
38+
return ""
39+
40+
logger.debug("ASR model: %s (ElevenLabs)", model)
41+
42+
wav_bytes = _pcm_to_wav_bytes(pcm_int16)
43+
44+
headers = {
45+
"Xi-Api-Key": api_key,
46+
}
47+
files = {
48+
"file": ("audio.wav", wav_bytes, "audio/wav"),
49+
}
50+
data = {
51+
"model_id": model,
52+
}
53+
54+
loop = asyncio.get_event_loop()
55+
resp = await loop.run_in_executor(
56+
None,
57+
lambda: requests.post(API_URL, headers=headers, files=files, data=data, timeout=120),
58+
)
59+
60+
if resp.status_code != 200:
61+
logger.error("ElevenLabs API error %d: %s", resp.status_code, resp.text)
62+
return ""
63+
64+
try:
65+
result = resp.json()
66+
transcript = result["text"]
67+
logger.info("Recognized: %s", transcript)
68+
return transcript.strip()
69+
except (KeyError, TypeError) as exc:
70+
logger.error("Failed to parse ElevenLabs response: %s — %s", exc, resp.text)
71+
return ""

projects/app_picoclaw/asr/qwen.py

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import base64
2+
import io
3+
import logging
4+
import wave
5+
6+
import numpy as np
7+
import requests
8+
9+
from .config import load_asr_config
10+
11+
logger = logging.getLogger(__name__)
12+
13+
API_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions"
14+
15+
16+
def _pcm_to_base64_wav(pcm_int16: np.ndarray, sample_rate: int = 16000) -> str:
17+
"""Convert int16 PCM samples to a base64-encoded WAV data URI."""
18+
buf = io.BytesIO()
19+
with wave.open(buf, "wb") as wf:
20+
wf.setnchannels(1)
21+
wf.setsampwidth(2) # 16-bit
22+
wf.setframerate(sample_rate)
23+
wf.writeframes(pcm_int16.tobytes())
24+
wav_bytes = buf.getvalue()
25+
b64 = base64.b64encode(wav_bytes).decode("utf-8")
26+
return f"data:audio/wav;base64,{b64}"
27+
28+
29+
async def asr_session(pcm_data: np.ndarray) -> str:
30+
if len(pcm_data) < 3200:
31+
logger.info("Audio too short (%d samples), skipping recognition", len(pcm_data))
32+
return ""
33+
34+
# Convert normalized float32 PCM to int16 PCM
35+
pcm_int16 = (pcm_data * 32768).clip(-32768, 32767).astype(np.int16)
36+
37+
model, api_key = load_asr_config()
38+
if not api_key:
39+
logger.error("API key not found (DASHSCOPE_API_KEY / .security.yml)")
40+
return ""
41+
42+
logger.debug("ASR model: %s (non-realtime)", model)
43+
44+
data_uri = _pcm_to_base64_wav(pcm_int16)
45+
46+
payload = {
47+
"model": model,
48+
"messages": [
49+
{
50+
"role": "user",
51+
"content": [
52+
{"type": "input_audio", "input_audio": {"data": data_uri, "format": "wav"}}
53+
],
54+
}
55+
],
56+
"stream": False,
57+
"asr_options": {"enable_itn": False},
58+
}
59+
60+
headers = {
61+
"Authorization": f"Bearer {api_key}",
62+
"Content-Type": "application/json",
63+
}
64+
65+
import asyncio
66+
loop = asyncio.get_event_loop()
67+
resp = await loop.run_in_executor(
68+
None,
69+
lambda: requests.post(API_URL, json=payload, headers=headers, timeout=120),
70+
)
71+
72+
if resp.status_code != 200:
73+
logger.error("ASR API error %d: %s", resp.status_code, resp.text)
74+
return ""
75+
76+
try:
77+
data = resp.json()
78+
transcript = data["choices"][0]["message"]["content"]
79+
logger.info("Recognized: %s", transcript)
80+
return transcript.strip()
81+
except (KeyError, IndexError, TypeError) as exc:
82+
logger.error("Failed to parse ASR response: %s — %s", exc, resp.text)
83+
return ""

0 commit comments

Comments
 (0)