Skip to content

Commit 54dd2ee

Browse files
committed
fix(voice): return agent reply items for remote run_input
handleRunInput routed run_input through the registered text-input callback, which is fire-and-forget (interrupt + generateReply) and never surfaces the resulting chat items, so the remote driver always received empty responses even though the agent replied. Drive the reply through session.run() directly and capture result.events, mirroring the Python SessionHost. Remove the now-unused SessionHost.registerTextInput; participant text input is handled by RoomIO itself.
1 parent b37c37b commit 54dd2ee

2 files changed

Lines changed: 25 additions & 35 deletions

File tree

agents/src/voice/agent_session.ts

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -81,12 +81,7 @@ import {
8181
import { AgentInput, AgentOutput } from './io.js';
8282
import { RecorderIO } from './recorder_io/index.js';
8383
import { RoomSessionTransport, SessionHost } from './remote_session.js';
84-
import {
85-
DEFAULT_TEXT_INPUT_CALLBACK,
86-
RoomIO,
87-
type RoomInputOptions,
88-
type RoomOutputOptions,
89-
} from './room_io/index.js';
84+
import { RoomIO, type RoomInputOptions, type RoomOutputOptions } from './room_io/index.js';
9085
import type { UnknownUserData } from './run_context.js';
9186
import type { SpeechHandle } from './speech_handle.js';
9287
import { RunResult } from './testing/run_result.js';
@@ -537,11 +532,6 @@ export class AgentSession<
537532
const transport = new RoomSessionTransport(room, this._roomIO);
538533
this.sessionHost = new SessionHost(transport);
539534
this.sessionHost.registerSession(this);
540-
if (inputOptions?.textEnabled !== false) {
541-
this.sessionHost.registerTextInput(
542-
inputOptions?.textInputCallback ?? DEFAULT_TEXT_INPUT_CALLBACK,
543-
);
544-
}
545535
}
546536

547537
const ctx = getJobContext(false);

agents/src/voice/remote_session.ts

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -626,7 +626,6 @@ export class SessionHost {
626626
private eventsRegistered = false;
627627
private recvTask: Task<void> | undefined;
628628
private readonly tasks = new Set<Task<void>>();
629-
private textInputCb: TextInputCallback | undefined;
630629

631630
constructor(
632631
transport: SessionTransport,
@@ -654,10 +653,6 @@ export class SessionHost {
654653
}
655654
}
656655

657-
registerTextInput(textInputCb: TextInputCallback): void {
658-
this.textInputCb = textInputCb;
659-
}
660-
661656
async start(): Promise<void> {
662657
if (this.started) return;
663658
this.started = true;
@@ -991,26 +986,31 @@ export class SessionHost {
991986
let items: pb.ChatContext_ChatItem[] = [];
992987
let error: string | undefined;
993988

994-
if (text) {
995-
if (this.textInputCb) {
996-
const cbResult = this.textInputCb(this.session!, { text });
997-
if (cbResult instanceof Promise) {
998-
await cbResult;
999-
}
1000-
} else {
1001-
try {
1002-
await this.session!.interrupt({ force: true }).await;
1003-
} catch {
1004-
// ignore
1005-
}
989+
if (!text) {
990+
error = 'empty run_input text';
991+
} else {
992+
// Drive the reply through session.run() directly and capture its events,
993+
// ignoring any registered text-input callback. The room's default text
994+
// input callback is fire-and-forget (interrupt + generateReply) and never
995+
// surfaces the resulting chat items, so routing run_input through it would
996+
// always return empty responses to the remote driver. This mirrors the
997+
// Python SessionHost behavior.
998+
try {
999+
await this.session!.interrupt({ force: true }).await;
1000+
} catch {
1001+
// ignore
1002+
}
10061003

1007-
const result = this.session!.run({ userInput: text });
1008-
try {
1009-
await result.wait();
1010-
} catch (e) {
1011-
error = e instanceof Error ? e.message : String(e);
1012-
}
1013-
items = chatItemsToProto(result.events.map((ev) => ev.item));
1004+
const result = this.session!.run({ userInput: text });
1005+
try {
1006+
await result.wait();
1007+
} catch (e) {
1008+
error = e instanceof Error ? e.message : String(e);
1009+
}
1010+
items = chatItemsToProto(result.events.map((ev) => ev.item));
1011+
1012+
if (items.length === 0 && !error) {
1013+
error = 'agent produced no response items';
10141014
}
10151015
}
10161016

0 commit comments

Comments
 (0)