Skip to content

Commit 7f74337

Browse files
Fix voice-assistant agent routing and TTS escape sequence handling
- Resolve agent route via bindings (matching Telegram/Discord pattern) so voice-agent binding in openclaw.json is respected - Handle literal escape sequences (\n, \r, \t) before stripping backslashes in TTS sanitizer Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 1024409 commit 7f74337

2 files changed

Lines changed: 92 additions & 71 deletions

File tree

extensions/voice-assistant/src/channel.ts

Lines changed: 87 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ import type {
1818
VoiceAssistantConfig,
1919
VoiceSpeakMessage,
2020
} from "./types.js";
21-
import { getVoiceAssistantRuntime } from "./runtime.js";
2221
import { createVoiceGateway, getVoiceGateway, stopVoiceGateway } from "./gateway.js";
22+
import { getVoiceAssistantRuntime } from "./runtime.js";
2323
import { sanitizeForTTS } from "./tts-sanitize.js";
2424

2525
// =============================================================================
@@ -131,79 +131,95 @@ export const voiceAssistantPlugin: ChannelPlugin<ResolvedVoiceAssistantAccount>
131131
const gateway = createVoiceGateway(wsPort);
132132

133133
// Handle transcriptions from voice clients
134-
gateway.on("transcription", async (event: {
135-
clientId: string;
136-
accountId: string;
137-
text: string;
138-
sessionId?: string;
139-
timestamp: number;
140-
isFinal: boolean;
141-
}) => {
142-
if (!event.isFinal) {
143-
// Skip partial transcriptions
144-
return;
145-
}
134+
gateway.on(
135+
"transcription",
136+
async (event: {
137+
clientId: string;
138+
accountId: string;
139+
text: string;
140+
sessionId?: string;
141+
timestamp: number;
142+
isFinal: boolean;
143+
}) => {
144+
if (!event.isFinal) {
145+
// Skip partial transcriptions
146+
return;
147+
}
146148

147-
log?.info?.(`Transcription received: "${event.text}"`);
148-
149-
try {
150-
const sessionKey = `voice:${event.accountId}:${event.sessionId ?? "main"}`;
151-
const from = `voice:${event.clientId}`;
152-
const agentRuntime = getVoiceAssistantRuntime();
153-
154-
// Load latest config for dispatch
155-
const currentCfg = await agentRuntime.config.loadConfig();
156-
157-
// Build MsgContext matching the pattern used by Telegram/Discord
158-
const msgCtx = {
159-
Body: event.text,
160-
BodyForAgent: event.text,
161-
RawBody: event.text,
162-
CommandBody: event.text,
163-
From: from,
164-
To: CHANNEL_ID,
165-
SessionKey: sessionKey,
166-
AccountId: event.accountId,
167-
ChatType: "direct",
168-
Provider: CHANNEL_ID,
169-
Surface: CHANNEL_ID,
170-
OriginatingChannel: CHANNEL_ID,
171-
OriginatingTo: from,
172-
SenderName: account.name || "Voice User",
173-
SenderId: event.clientId,
174-
Timestamp: event.timestamp,
175-
WasMentioned: true, // Direct voice input is always "mentioned"
176-
};
177-
178-
// Dispatch using the same API as Telegram/Discord
179-
await agentRuntime.channel.reply.dispatchReplyWithBufferedBlockDispatcher({
180-
ctx: msgCtx,
181-
cfg: currentCfg,
182-
dispatcherOptions: {
183-
deliver: async (payload) => {
184-
if (payload.text) {
185-
const spokenText = sanitizeForTTS(payload.text);
186-
if (!spokenText) return;
187-
log?.info?.(`Sending TTS response: "${spokenText.slice(0, 80)}..."`);
188-
const speakMsg: VoiceSpeakMessage = {
189-
type: "speak",
190-
text: spokenText,
191-
sourceChannel: CHANNEL_ID,
192-
priority: 1,
193-
interrupt: false,
194-
};
195-
gateway.broadcast(speakMsg);
196-
}
149+
log?.info?.(`Transcription received: "${event.text}"`);
150+
151+
try {
152+
const from = `voice:${event.clientId}`;
153+
const agentRuntime = getVoiceAssistantRuntime();
154+
155+
// Load latest config for dispatch
156+
const currentCfg = await agentRuntime.config.loadConfig();
157+
158+
// Resolve agent route via bindings (same pattern as Telegram/Discord)
159+
// This ensures voice-agent binding in openclaw.json is respected,
160+
// routing to the correct agent workspace + SOUL.md
161+
const route = agentRuntime.channel.routing.resolveAgentRoute({
162+
cfg: currentCfg,
163+
channel: CHANNEL_ID,
164+
accountId: event.accountId,
165+
peer: {
166+
kind: "direct" as "direct",
167+
id: from,
197168
},
198-
onError: (err) => {
199-
log?.error?.(`Voice reply delivery error: ${err}`);
169+
});
170+
const sessionKey = route.sessionKey;
171+
172+
// Build MsgContext matching the pattern used by Telegram/Discord
173+
const msgCtx = {
174+
Body: event.text,
175+
BodyForAgent: event.text,
176+
RawBody: event.text,
177+
CommandBody: event.text,
178+
From: from,
179+
To: CHANNEL_ID,
180+
SessionKey: sessionKey,
181+
AccountId: route.accountId,
182+
ChatType: "direct",
183+
Provider: CHANNEL_ID,
184+
Surface: CHANNEL_ID,
185+
OriginatingChannel: CHANNEL_ID,
186+
OriginatingTo: from,
187+
SenderName: account.name || "Voice User",
188+
SenderId: event.clientId,
189+
Timestamp: event.timestamp,
190+
WasMentioned: true, // Direct voice input is always "mentioned"
191+
};
192+
193+
// Dispatch using the same API as Telegram/Discord
194+
await agentRuntime.channel.reply.dispatchReplyWithBufferedBlockDispatcher({
195+
ctx: msgCtx,
196+
cfg: currentCfg,
197+
dispatcherOptions: {
198+
deliver: async (payload) => {
199+
if (payload.text) {
200+
const spokenText = sanitizeForTTS(payload.text);
201+
if (!spokenText) return;
202+
log?.info?.(`Sending TTS response: "${spokenText.slice(0, 80)}..."`);
203+
const speakMsg: VoiceSpeakMessage = {
204+
type: "speak",
205+
text: spokenText,
206+
sourceChannel: CHANNEL_ID,
207+
priority: 1,
208+
interrupt: false,
209+
};
210+
gateway.broadcast(speakMsg);
211+
}
212+
},
213+
onError: (err) => {
214+
log?.error?.(`Voice reply delivery error: ${err}`);
215+
},
200216
},
201-
},
202-
});
203-
} catch (error) {
204-
log?.error?.(`Failed to dispatch transcription: ${error}`);
205-
}
206-
});
217+
});
218+
} catch (error) {
219+
log?.error?.(`Failed to dispatch transcription: ${error}`);
220+
}
221+
},
222+
);
207223

208224
// Handle abort signal
209225
abortSignal.addEventListener("abort", () => {

extensions/voice-assistant/src/tts-sanitize.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,11 @@ export function sanitizeForTTS(text: string): string {
7676

7777
// --- Clean up special characters ---
7878

79+
// Literal escape sequences (\n, \r, \t) → actual characters before stripping backslashes
80+
result = result.replace(/\\n/g, "\n");
81+
result = result.replace(/\\r/g, "");
82+
result = result.replace(/\\t/g, " ");
83+
7984
// HTML-like tags (angle brackets)
8085
result = result.replace(/<[^>]*>/g, "");
8186

0 commit comments

Comments
 (0)