1+ import { PUBLIC_SERVICE_URL } from "$env/static/public" ;
12import { AudioRecordingWorklet } from "$lib/helpers/pcmProcessor" ;
23
3- // @ts -ignore
4- const SpeechRecognition = window . SpeechRecognition || window . webkitSpeechRecognition ;
5-
64// @ts -ignore
75const AudioContext = window . AudioContext || window . webkitAudioContext ;
86
7+ const sampleRate = 24000 ;
8+
9+ /** @type {AudioContext } */
10+ let audioCtx = new AudioContext ( ) ;
11+
12+ /** @type {any[] } */
13+ let audioQueue = [ ] ;
14+
15+ /** @type {boolean } */
16+ let isPlaying = false ;
17+
918export const realtimeChat = {
1019
1120 /** @type {WebSocket | null } */
1221 socket : null ,
1322
14- /** @type {MediaRecorder | null } */
15- mediaRecorder : null ,
16-
1723 /** @type {MediaStream | null } */
1824 mediaStream : null ,
1925
20- /** @type {SpeechRecognition | null } */
21- recognition : null ,
26+ /** @type {AudioWorkletNode | null } */
27+ workletNode : null ,
28+
29+ /** @type {MediaStreamAudioSourceNode | null } */
30+ micSource : null ,
2231
2332 /**
2433 * @param {string } agentId
2534 * @param {string } conversationId
2635 */
2736 start ( agentId , conversationId ) {
28- this . socket = new WebSocket ( `ws://localhost:5100/chat/stream/${ agentId } /${ conversationId } ` ) ;
37+ reset ( ) ;
38+ const wsUrl = buildWebsocketUrl ( ) ;
39+ this . socket = new WebSocket ( `${ wsUrl } /chat/stream/${ agentId } /${ conversationId } ` ) ;
2940
3041 this . socket . onopen = async ( ) => {
3142 console . log ( "WebSocket connected" ) ;
@@ -35,17 +46,17 @@ export const realtimeChat = {
3546 } ) ) ;
3647
3748 this . mediaStream = await navigator . mediaDevices . getUserMedia ( { audio : true } ) ;
38- const audioCtx = new AudioContext ( { sampleRate : 16000 } ) ;
49+ audioCtx = new AudioContext ( { sampleRate : sampleRate } ) ;
3950
4051 const workletName = "audio-recorder-worklet" ;
41- const src = createWorketFromSrc ( workletName , AudioRecordingWorklet ) ;
52+ const src = createWorkletFromSrc ( workletName , AudioRecordingWorklet ) ;
4253 await audioCtx . audioWorklet . addModule ( src ) ;
4354
44- const workletNode = new AudioWorkletNode ( audioCtx , workletName ) ;
45- const micSource = audioCtx . createMediaStreamSource ( this . mediaStream ) ;
46- micSource . connect ( workletNode ) ;
55+ this . workletNode = new AudioWorkletNode ( audioCtx , workletName ) ;
56+ this . micSource = audioCtx . createMediaStreamSource ( this . mediaStream ) ;
57+ this . micSource . connect ( this . workletNode ) ;
4758
48- workletNode . port . onmessage = event => {
59+ this . workletNode . port . onmessage = event => {
4960 const arrayBuffer = event . data . data . int16arrayBuffer ;
5061 if ( arrayBuffer && this . socket ?. readyState === WebSocket . OPEN ) {
5162 const arrayBufferString = arrayBufferToBase64 ( arrayBuffer ) ;
@@ -55,92 +66,41 @@ export const realtimeChat = {
5566 } ) ) ;
5667 }
5768 } ;
69+ } ;
5870
59- // this.recognition = new SpeechRecognition();
60- // this.recognition.continuous = true;
61- // this.recognition.interimResults = false;
62- // this.recognition.lang = "en-US";
63-
64- // this.recognition.onresult = (/** @type { any } */ event) => {
65- // const lastResult = event.results[event.results.length - 1];
66- // const transcript = lastResult[0].transcript.trim();
67-
68- // console.log("Recognized:", transcript);
69-
70- // const message = {
71- // event: "media",
72- // payload: transcript
73- // };
74-
75- // if (this.socket?.readyState === WebSocket.OPEN) {
76- // this.socket.send(JSON.stringify(message));
77- // }
78- // };
79-
80- // this.recognition.onend = () => {
81- // console.log('Speech recognition closed.');
82- // };
83- // this.recognition.start();
84-
85- // navigator.mediaDevices.getUserMedia({ audio: true })
86- // .then(stream => {
87- // this.mediaStream = stream;
88- // this.mediaRecorder = new MediaRecorder(stream, { mimeType: "audio/webm" });
89- // /** @type {any[] } */
90- // let audioChunks = [];
91- // this.mediaRecorder.ondataavailable = (/** @type {any } */ event) => {
92- // if (event.data.size > 0) {
93- // // audioChunks.push(event.data);
94- // }
95- // };
96-
97- // this.mediaRecorder.onstop = async () => {
98- // console.log('mediaRecorder stopped');
99- // // const blob = new Blob(audioChunks, { type: 'audio/webm' });
100- // // const arrayBuffer = await blob.arrayBuffer();
101-
102- // // // Decode audio and downsample to PCM16
103- // // const audioCtx = new AudioContext({ sampleRate: 16000 });
104- // // const audioBuffer = await audioCtx.decodeAudioData(arrayBuffer);
105-
106- // // const channelData = audioBuffer.getChannelData(0); // mono
107- // // const pcm16 = new Int16Array(channelData.length);
108-
109- // // for (let i = 0; i < channelData.length; i++) {
110- // // pcm16[i] = Math.max(-1, Math.min(1, channelData[i])) * 32767;
111- // // }
112-
113- // // const pcmBytes = new Uint8Array(pcm16.buffer);
114- // // const base64 = btoa(String.fromCharCode(...pcmBytes));
115- // // console.log(base64);
116- // };
117-
118- // this.mediaRecorder.start();
119- // })
120- // .catch((err) => {
121- // console.error("Failed to access microphone", err);
122- // });
71+ this . socket . onmessage = ( /** @type {MessageEvent } */ e ) => {
72+ try {
73+ const json = JSON . parse ( e . data ) ;
74+ if ( json . event === 'media' && ! ! json . media . payload ) {
75+ const data = json . media . payload ;
76+ enqueueAudioChunk ( data ) ;
77+ }
78+ } catch {
79+ // console.error('Error when parsing message');
80+ }
12381 } ;
12482
12583 this . socket . onclose = ( ) => {
12684 console . log ( "Websocket closed" ) ;
127- }
85+ } ;
12886
129- this . socket . onerror = ( /** @type {any } */ e ) => console . error ( 'WebSocket error' , e ) ;
87+ this . socket . onerror = ( /** @type {Event } */ e ) => {
88+ console . error ( 'WebSocket error' , e ) ;
89+ } ;
13090 } ,
13191
13292 stop ( ) {
133- if ( this . mediaRecorder ) {
134- this . mediaRecorder . stop ( ) ;
135- }
136-
93+ reset ( ) ;
94+
13795 if ( this . mediaStream ) {
13896 this . mediaStream . getTracks ( ) . forEach ( t => t . stop ( ) ) ;
13997 this . mediaStream = null ;
14098 }
14199
142- if ( this . recognition ) {
143- this . recognition . stop ( ) ;
100+ if ( this . workletNode ) {
101+ this . micSource ?. disconnect ( this . workletNode ) ;
102+ this . workletNode . port . close ( ) ;
103+ this . workletNode . disconnect ( ) ;
144104 }
145105
146106 if ( this . socket ?. readyState === WebSocket . OPEN ) {
@@ -152,24 +112,66 @@ export const realtimeChat = {
152112 }
153113} ;
154114
115+
116+ function buildWebsocketUrl ( ) {
117+ let url = '' ;
118+ const host = PUBLIC_SERVICE_URL . split ( '://' ) ;
119+
120+ if ( PUBLIC_SERVICE_URL . startsWith ( 'https' ) ) {
121+ url = `wss:${ host [ 1 ] } ` ;
122+ } else if ( PUBLIC_SERVICE_URL . startsWith ( 'http' ) ) {
123+ url = `ws:${ host [ 1 ] } ` ;
124+ }
125+
126+ return url ;
127+ }
128+
129+ function reset ( ) {
130+ isPlaying = false ;
131+ audioQueue = [ ] ;
132+ }
133+
155134/**
156- * @param {ArrayBuffer } buffer
135+ * @param {string } base64Audio
157136 */
158- function arrayBufferToBase64 ( buffer ) {
159- var binary = "" ;
160- var bytes = new Uint8Array ( buffer ) ;
161- var len = bytes . byteLength ;
162- for ( var i = 0 ; i < len ; i ++ ) {
163- binary += String . fromCharCode ( bytes [ i ] ) ;
137+ function enqueueAudioChunk ( base64Audio ) {
138+ const arrayBuffer = base64ToArrayBuffer ( base64Audio ) ;
139+ const float32Data = convert16BitPCMToFloat32 ( arrayBuffer ) ;
140+
141+ const audioBuffer = audioCtx . createBuffer ( 1 , float32Data . length , sampleRate ) ;
142+ audioBuffer . getChannelData ( 0 ) . set ( float32Data ) ;
143+ audioQueue . push ( audioBuffer ) ;
144+
145+ if ( ! isPlaying ) {
146+ playNext ( ) ;
164147 }
165- return window . btoa ( binary ) ;
166148}
167149
150+ function playNext ( ) {
151+ if ( audioQueue . length === 0 ) {
152+ isPlaying = false ;
153+ return ;
154+ }
155+
156+ isPlaying = true ;
157+ const buffer = audioQueue . shift ( ) ;
158+
159+ const source = audioCtx . createBufferSource ( ) ;
160+ source . buffer = buffer ;
161+ source . connect ( audioCtx . destination ) ;
162+
163+ source . onended = ( ) => {
164+ playNext ( ) ;
165+ } ;
166+ source . start ( ) ;
167+ }
168+
169+
168170/**
169171 * @param {string } workletName
170172 * @param {string } workletSrc
171173 */
172- function createWorketFromSrc ( workletName , workletSrc ) {
174+ function createWorkletFromSrc ( workletName , workletSrc ) {
173175 const script = new Blob (
174176 [ `registerProcessor("${ workletName } ", ${ workletSrc } )` ] ,
175177 {
@@ -178,4 +180,51 @@ function createWorketFromSrc(workletName, workletSrc) {
178180 ) ;
179181
180182 return URL . createObjectURL ( script ) ;
183+ } ;
184+
185+
186+ /**
187+ * @param {ArrayBuffer } buffer
188+ */
189+ function arrayBufferToBase64 ( buffer ) {
190+ var binary = "" ;
191+ var bytes = new Uint8Array ( buffer ) ;
192+ var len = bytes . byteLength ;
193+ for ( var i = 0 ; i < len ; i ++ ) {
194+ binary += String . fromCharCode ( bytes [ i ] ) ;
195+ }
196+ return btoa ( binary ) ;
197+ } ;
198+
199+ /**
200+ * @param {string } base64
201+ */
202+ function base64ToArrayBuffer ( base64 ) {
203+ const binaryStr = atob ( base64 ) ;
204+ const len = binaryStr . length ;
205+ const bytes = new Uint8Array ( len ) ;
206+
207+ for ( let i = 0 ; i < len ; i ++ ) {
208+ bytes [ i ] = binaryStr . charCodeAt ( i ) ;
209+ }
210+ return bytes . buffer ;
211+ } ;
212+
213+ /**
214+ * @param {ArrayBuffer } buffer
215+ */
216+ function convert16BitPCMToFloat32 ( buffer ) {
217+ const chunk = new Uint8Array ( buffer ) ;
218+ const output = new Float32Array ( chunk . length / 2 ) ;
219+ const dataView = new DataView ( chunk . buffer ) ;
220+
221+ for ( let i = 0 ; i < chunk . length / 2 ; i ++ ) {
222+ try {
223+ const int16 = dataView . getInt16 ( i * 2 , true ) ;
224+ output [ i ] = int16 / 32767 ;
225+ } catch ( e ) {
226+ console . error ( e ) ;
227+ }
228+ }
229+ return output ;
181230} ;
0 commit comments