Skip to content

Commit 4cdc2ac

Browse files
committed
feat: reviewed change for microphone and audio input
1 parent 82190bc commit 4cdc2ac

19 files changed

+1782
-46
lines changed

node_modules/.package-lock.json

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package-lock.json

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@rapidaai/react",
3-
"version": "1.1.60",
3+
"version": "1.1.61",
44
"description": "An easy to use react client for building generative ai application using Rapida platform.",
55
"repository": {
66
"type": "git",

src/agents/voice-agent.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,21 @@ export class VoiceAgent extends Agent {
146146
this.disconnect();
147147
}
148148
},
149+
onConversationEvent: (event) => {
150+
this.agentCallbacks.forEach((cb) => {
151+
cb.onConversationEvent?.(event);
152+
});
153+
},
154+
onMetric: (metric) => {
155+
this.agentCallbacks.forEach((cb) => {
156+
cb.onMetric?.(metric);
157+
});
158+
},
159+
onConversationError: (error) => {
160+
this.agentCallbacks.forEach((cb) => {
161+
cb.onConversationError?.(error);
162+
});
163+
},
149164
onInitialization: (config) => {
150165
// console.log(`${LOG_PREFIX} callback -> onInitialization`, config);
151166
if (config?.assistantconversationid) {

src/audio/audio-media-manager.ts

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
*/
2424

2525
import { AgentConfig } from "@/rapida/types/agent-config";
26-
import { isChrome, isEdge, isWindows, isFirefox, isSinkIdSupported } from "@/rapida/utils";
26+
import { isChrome, isEdge, isWindows, isSinkIdSupported } from "@/rapida/utils";
2727

2828
/** Sample rate for Opus */
2929
const OPUS_SAMPLE_RATE = 48000;
@@ -69,17 +69,30 @@ export class AudioMediaManager {
6969

7070
/** Capture the local microphone stream */
7171
async setupLocalMedia(): Promise<void> {
72+
// Clean up any existing mic resources before re-capturing.
73+
// Without this, a second call (e.g. hot reconnect) would overwrite
74+
// localStream while the old tracks keep running and the mic indicator stays on.
75+
if (this.localStream) {
76+
this.localStream.getTracks().forEach(t => t.stop());
77+
this.localStream = null;
78+
}
79+
if (this._inputAnalyser) {
80+
this._inputAnalyser.disconnect();
81+
this._inputAnalyser = null;
82+
}
83+
7284
const constraints = this.getAudioConstraints();
7385

7486
try {
7587
this.localStream = await navigator.mediaDevices.getUserMedia({ audio: constraints, video: false });
7688
} catch (error: any) {
77-
// OverconstrainedError can occur on any platform when an 'exact' deviceId
78-
// constraint cannot be satisfied (e.g. the device was unplugged).
79-
// Retry with simplified constraints so the call degrades gracefully
80-
// to the default device rather than failing completely.
81-
if (error?.name === "OverconstrainedError") {
82-
console.warn("[AudioMediaManager] Retrying with simplified audio constraints after OverconstrainedError");
89+
// OverconstrainedError — 'exact' deviceId constraint cannot be satisfied
90+
// (Firefox throws this when the device is gone)
91+
// NotFoundError — requested device does not exist
92+
// (Chrome throws this for the same situation)
93+
// Both mean: fall back to the default device rather than failing completely.
94+
if (error?.name === "OverconstrainedError" || error?.name === "NotFoundError") {
95+
console.warn("[AudioMediaManager] Retrying with simplified audio constraints:", error.name);
8396
this.localStream = await navigator.mediaDevices.getUserMedia({
8497
audio: this.getSimplifiedAudioConstraints(),
8598
video: false,
@@ -117,6 +130,11 @@ export class AudioMediaManager {
117130
* Windows browsers (especially Edge) may need special handling.
118131
*/
119132
private async setupAudioContext(): Promise<void> {
133+
// Re-use an existing running context rather than silently abandoning it.
134+
// Callers that need a fresh context (e.g. after close()) must null
135+
// this.audioContext themselves (disconnectAudio does this).
136+
if (this.audioContext && this.audioContext.state !== "closed") return;
137+
120138
const AudioContextClass = getAudioContextClass();
121139
if (!AudioContextClass) {
122140
console.warn("[AudioMediaManager] AudioContext not available in this browser");
@@ -458,16 +476,20 @@ export class AudioMediaManager {
458476
async setInputDevice(deviceId: string): Promise<void> {
459477
this.agentConfig.inputOptions.device = deviceId;
460478
this.localStream?.getTracks().forEach(t => t.stop());
479+
// Null immediately so that if getUserMedia throws, getLocalAudioTrack()
480+
// returns undefined rather than a stream full of stopped (silent) tracks.
481+
this.localStream = null;
461482

462483
try {
463484
this.localStream = await navigator.mediaDevices.getUserMedia({
464485
audio: this.getAudioConstraints(),
465486
video: false,
466487
});
467488
} catch (error: any) {
468-
// On Windows, retry with simplified constraints if device selection fails
469-
if (isWindows() && (error?.name === "OverconstrainedError" || error?.name === "NotFoundError")) {
470-
console.warn("[AudioMediaManager] Device selection failed, retrying with simplified constraints");
489+
// Same cross-platform fallback as setupLocalMedia:
490+
// Chrome → NotFoundError, Firefox → OverconstrainedError, for missing 'exact' device.
491+
if (error?.name === "OverconstrainedError" || error?.name === "NotFoundError") {
492+
console.warn("[AudioMediaManager] Device selection failed, retrying with simplified constraints:", error.name);
471493
this.localStream = await navigator.mediaDevices.getUserMedia({
472494
audio: this.getSimplifiedAudioConstraints(),
473495
video: false,
@@ -478,7 +500,7 @@ export class AudioMediaManager {
478500
}
479501

480502
// Reconnect analyser (disconnect old source first to avoid leaking audio nodes)
481-
if (this.audioContext && this._inputAnalyser) {
503+
if (this.audioContext && this._inputAnalyser && this.localStream) {
482504
this._inputAnalyser.disconnect();
483505
const source = this.audioContext.createMediaStreamSource(this.localStream);
484506
this._inputAnalyser = this.audioContext.createAnalyser();

src/audio/compatibility.ts

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,23 +22,19 @@
2222
* Author: Prashant <prashant@rapida.ai>
2323
*
2424
*/
25-
export function isIosDevice() {
25+
export function isIosDevice(): boolean {
26+
if (typeof navigator === "undefined") return false;
2627
return (
27-
[
28-
"iPad Simulator",
29-
"iPhone Simulator",
30-
"iPod Simulator",
31-
"iPad",
32-
"iPhone",
33-
"iPod",
34-
].includes(navigator.platform) ||
35-
// iPad on iOS 13 detection
28+
/iPad|iPhone|iPod/.test(navigator.userAgent) ||
29+
// iPad on iOS 13+ reports as "MacIntel" with touch support
30+
(navigator.platform === "MacIntel" && navigator.maxTouchPoints > 1) ||
3631
(navigator.userAgent.includes("Mac") && "ontouchend" in document)
3732
);
3833
}
3934

40-
export function isAndroidDevice() {
41-
return /android/i.test(navigator.userAgent);
35+
export function isAndroidDevice(): boolean {
36+
if (typeof navigator === "undefined") return false;
37+
return /Android/i.test(navigator.userAgent);
4238
}
4339

4440
export function isWindowsDevice() {

src/audio/message-protocol-handler.ts

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ import type { AudioMediaManager } from "./audio-media-manager";
4848
* This is pure dispatch logic — it holds no state of its own.
4949
*/
5050
export class MessageProtocolHandler {
51+
// Serialization chain ensures signaling messages (CONFIG → OFFER → ICE)
52+
// are processed in order even though gRPC callbacks fire without await.
53+
private processingChain: Promise<void> = Promise.resolve();
54+
5155
constructor(
5256
private callbacks: AgentCallback,
5357
private signaling: GrpcSignalingManager,
@@ -60,6 +64,13 @@ export class MessageProtocolHandler {
6064
// ---------------------------------------------------------------------------
6165

6266
async handleMessage(response: WebTalkResponse): Promise<void> {
67+
this.processingChain = this.processingChain
68+
.then(() => this._handleMessage(response))
69+
.catch(err => console.error("[Protocol] Message processing error", err));
70+
return this.processingChain;
71+
}
72+
73+
private async _handleMessage(response: WebTalkResponse): Promise<void> {
6374
// Server signaling (SDP, ICE, config, ready, clear, error)
6475
if (response.hasSignaling()) {
6576
const sig = response.getSignaling();
@@ -109,6 +120,24 @@ export class MessageProtocolHandler {
109120
const directive = response.getDirective();
110121
if (directive) this.callbacks.onDirective?.(directive.toObject());
111122
}
123+
124+
// Pipeline conversation event (STT, TTS, LLM, session, etc.)
125+
if (response.hasEvent()) {
126+
const event = response.getEvent();
127+
if (event) this.callbacks.onConversationEvent?.(event.toObject());
128+
}
129+
130+
// Conversation error (server-side error with conversation ID and details)
131+
if (response.hasError()) {
132+
const error = response.getError();
133+
if (error) this.callbacks.onError?.(new Error(error.getMessage()));
134+
}
135+
136+
// Metric (server-side performance/latency data)
137+
if (response.hasMetric()) {
138+
const metric = response.getMetric();
139+
if (metric) this.callbacks.onMetric?.(metric.toObject());
140+
}
112141
}
113142

114143
// ---------------------------------------------------------------------------
@@ -122,9 +151,16 @@ export class MessageProtocolHandler {
122151
const messageCase = signaling.getMessageCase();
123152

124153
switch (messageCase) {
125-
case ServerSignaling.MessageCase.CONFIG:
126-
this.peer.setup();
154+
case ServerSignaling.MessageCase.CONFIG: {
155+
const config = signaling.getConfig();
156+
const iceServers = config?.getIceserversList()?.map(srv => ({
157+
urls: srv.getUrlsList(),
158+
username: srv.getUsername() || undefined,
159+
credential: srv.getCredential() || undefined,
160+
})) as RTCIceServer[] | undefined;
161+
this.peer.setup(iceServers);
127162
break;
163+
}
128164

129165
case ServerSignaling.MessageCase.SDP: {
130166
try {

src/audio/webrtc-peer-manager.ts

Lines changed: 38 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ import { AgentCallback } from "@/rapida/types/agent-callback";
2828
// WebRTC Constants
2929
// ============================================================================
3030

31-
const ICE_SERVERS: RTCIceServer[] = [
31+
const DEFAULT_ICE_SERVERS: RTCIceServer[] = [
3232
{ urls: "stun:stun.l.google.com:19302" },
3333
{ urls: "stun:stun1.l.google.com:19302" },
3434
];
@@ -59,17 +59,22 @@ export class WebRTCPeerManager {
5959
// Lifecycle
6060
// ---------------------------------------------------------------------------
6161

62-
/** Create a new RTCPeerConnection */
63-
setup(): void {
64-
// Close existing connection before creating a new one to prevent leaks
62+
/** Create a new RTCPeerConnection, optionally using server-provided ICE servers */
63+
setup(iceServers?: RTCIceServer[]): void {
64+
// Close existing connection before creating a new one to prevent leaks.
65+
// Detach handlers BEFORE closing so the "closed" state change does not
66+
// spuriously fire onDisconnected when the peer is simply being replaced.
6567
if (this.peerConnection) {
68+
this.peerConnection.onconnectionstatechange = null;
69+
this.peerConnection.ontrack = null;
70+
this.peerConnection.onicecandidate = null;
6671
try { this.peerConnection.close(); } catch { }
6772
this.peerConnection = null;
6873
this._isConnected = false;
6974
}
7075

7176
this.peerConnection = new RTCPeerConnection({
72-
iceServers: ICE_SERVERS,
77+
iceServers: iceServers?.length ? iceServers : DEFAULT_ICE_SERVERS,
7378
iceTransportPolicy: ICE_TRANSPORT_POLICY,
7479
bundlePolicy: BUNDLE_POLICY,
7580
});
@@ -81,10 +86,14 @@ export class WebRTCPeerManager {
8186
this.onRemoteTrack(stream);
8287
};
8388

84-
// ICE candidates
89+
// ICE candidates — Safari may lack toJSON() on RTCIceCandidate
8590
this.peerConnection.onicecandidate = (event) => {
8691
if (event.candidate) {
87-
this.onICECandidate(event.candidate.toJSON());
92+
const c = event.candidate;
93+
const json = typeof c.toJSON === "function"
94+
? c.toJSON()
95+
: { candidate: c.candidate, sdpMid: c.sdpMid, sdpMLineIndex: c.sdpMLineIndex };
96+
this.onICECandidate(json);
8897
}
8998
};
9099

@@ -150,7 +159,18 @@ export class WebRTCPeerManager {
150159

151160
const track = localStream?.getAudioTracks()[0];
152161
const transceivers = this.peerConnection!.getTransceivers();
153-
const audioTransceiver = transceivers.find(t => t.receiver.track?.kind === "audio");
162+
163+
// Safari (pre-14.1) leaves receiver.track null after setRemoteDescription
164+
// until the first remote packet arrives, so the receiver-track check alone
165+
// is unreliable. Use a cascade of fallbacks:
166+
// 1. receiver.track.kind — Chrome, Firefox, Safari 14.1+
167+
// 2. sender.track.kind — if the sender already has a track attached
168+
// 3. transceivers[0] — voice-only sessions always have exactly one
169+
// audio m-line, so the first transceiver is it
170+
const audioTransceiver =
171+
transceivers.find(t => t.receiver.track?.kind === "audio") ??
172+
transceivers.find(t => t.sender.track?.kind === "audio") ??
173+
transceivers[0];
154174

155175
if (!audioTransceiver) {
156176
console.warn("No audio transceiver found in offer");
@@ -160,13 +180,21 @@ export class WebRTCPeerManager {
160180
}
161181

162182
if (track) {
163-
// Audio mode — bidirectional
183+
// Audio mode — bidirectional.
184+
// Set direction first so the answer SDP reflects sendrecv intent.
164185
audioTransceiver.direction = "sendrecv";
186+
187+
// replaceTrack must succeed — if it throws, revert direction so the
188+
// answer SDP does not falsely advertise a send-capable track, and
189+
// re-throw so the caller can surface the failure rather than silently
190+
// completing negotiation with no mic audio reaching the server.
165191
try {
166192
await audioTransceiver.sender.replaceTrack(track);
167193
} catch (error) {
168-
console.error("Failed to replace audio track", error);
194+
audioTransceiver.direction = "recvonly";
195+
throw error;
169196
}
197+
170198
this.setCodecPreferences(audioTransceiver);
171199
} else {
172200
// Text-only mode — receive only (no microphone)

src/clients/protos/artifacts

0 commit comments

Comments
 (0)