Skip to content

Commit 9966804

Browse files
author
Jicheng Lu
committed
init
1 parent 0f665ab commit 9966804

5 files changed

Lines changed: 251 additions & 12 deletions

File tree

src/lib/common/audio-player/AudioPlayer.svelte

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@
242242
if (loop === "none") {
243243
if (order === "list") {
244244
if ($playList.playingIndex < audios.length - 1) {
245-
const promise = buildNextSongPromise(nextIdx);
245+
const promise = buildNextAudioPromise(nextIdx);
246246
promise.then(() => play());
247247
} else {
248248
$playList.playingIndex = ($playList.playingIndex + 1) % audios.length;
@@ -257,21 +257,21 @@
257257
} else {
258258
targetIdx = randomIdx;
259259
}
260-
const promise = buildNextSongPromise(targetIdx);
260+
const promise = buildNextAudioPromise(targetIdx);
261261
promise.then(() => play());
262262
}
263263
} else if (loop === "one") {
264264
player.currentTime = 0;
265265
} else if (loop === "all") {
266-
const promise = buildNextSongPromise(nextIdx);
266+
const promise = buildNextAudioPromise(nextIdx);
267267
promise.then(() => play());
268268
}
269269
};
270270
271271
/**
272272
* @param {number} idx
273273
*/
274-
function buildNextSongPromise(idx) {
274+
function buildNextAudioPromise(idx) {
275275
return new Promise((/** @type {any} */ resolve) => {
276276
$playList.playingIndex = idx;
277277
player.currentTime = 0;
@@ -287,8 +287,8 @@
287287
/**
288288
* @param {number} idx
289289
*/
290-
function switchSong(idx) {
291-
const promise = buildNextSongPromise(idx);
290+
function switchAudio(idx) {
291+
const promise = buildNextAudioPromise(idx);
292292
if (autoPlayNextOnClick) {
293293
promise.then(() => {
294294
play();
@@ -490,7 +490,7 @@
490490
{#each $audioList as song, idx}
491491
<!-- svelte-ignore a11y-click-events-have-key-events -->
492492
<!-- svelte-ignore a11y-no-noninteractive-element-interactions -->
493-
<li on:click={() => switchSong(idx) }>
493+
<li on:click={() => switchAudio(idx) }>
494494
{#if idx === $playList.playingIndex}
495495
<span class="aplayer-list-cur" />
496496
{/if}

src/lib/helpers/pcmProcessor.js

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
2+
export const AudioRecordingWorklet = `
3+
class AudioProcessingWorklet extends AudioWorkletProcessor {
4+
5+
// send and clear buffer every 2048 samples,
6+
// which at 16khz is about 8 times a second
7+
buffer = new Int16Array(2048);
8+
9+
// current write index
10+
bufferWriteIndex = 0;
11+
12+
constructor() {
13+
super();
14+
this.hasAudio = false;
15+
}
16+
17+
/**
18+
* @param inputs Float32Array[][] [input#][channel#][sample#] so to access first inputs 1st channel inputs[0][0]
19+
* @param outputs Float32Array[][]
20+
*/
21+
process(inputs) {
22+
if (inputs[0].length) {
23+
const channel0 = inputs[0][0];
24+
this.processChunk(channel0);
25+
}
26+
return true;
27+
}
28+
29+
sendAndClearBuffer(){
30+
this.port.postMessage({
31+
event: "chunk",
32+
data: {
33+
int16arrayBuffer: this.buffer.slice(0, this.bufferWriteIndex).buffer,
34+
},
35+
});
36+
this.bufferWriteIndex = 0;
37+
}
38+
39+
processChunk(float32Array) {
40+
const l = float32Array.length;
41+
42+
for (let i = 0; i < l; i++) {
43+
// convert float32 -1 to 1 to int16 -32768 to 32767
44+
const int16Value = float32Array[i] * 32768;
45+
this.buffer[this.bufferWriteIndex++] = int16Value;
46+
if(this.bufferWriteIndex >= this.buffer.length) {
47+
this.sendAndClearBuffer();
48+
}
49+
}
50+
51+
if(this.bufferWriteIndex >= this.buffer.length) {
52+
this.sendAndClearBuffer();
53+
}
54+
}
55+
}
56+
`;

src/lib/services/llm-realtime-service.js

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import { endpoints } from '$lib/services/api-endpoints.js';
22
import { replaceUrl } from '$lib/helpers/http';
33
import axios from 'axios';
4-
import { json } from '@sveltejs/kit';
54

65
export const llmRealtime = {
76
/** @type {RTCPeerConnection} */
Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
import { AudioRecordingWorklet } from "$lib/helpers/pcmProcessor";
2+
3+
// @ts-ignore
4+
const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
5+
6+
// @ts-ignore
7+
const AudioContext = window.AudioContext || window.webkitAudioContext;
8+
9+
export const realtimeChat = {
10+
11+
/** @type {WebSocket | null} */
12+
socket: null,
13+
14+
/** @type {MediaRecorder | null} */
15+
mediaRecorder: null,
16+
17+
/** @type {MediaStream | null} */
18+
mediaStream: null,
19+
20+
/** @type {SpeechRecognition | null} */
21+
recognition: null,
22+
23+
/**
24+
* @param {string} agentId
25+
* @param {string} conversationId
26+
*/
27+
start(agentId, conversationId) {
28+
this.socket = new WebSocket(`ws://localhost:5100/chat/stream/${agentId}/${conversationId}`);
29+
30+
this.socket.onopen = async () => {
31+
console.log("WebSocket connected");
32+
33+
this.socket?.send(JSON.stringify({
34+
event: "start"
35+
}));
36+
37+
this.mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
38+
const audioCtx = new AudioContext({ sampleRate: 16000 });
39+
40+
const workletName = "audio-recorder-worklet";
41+
const src = createWorketFromSrc(workletName, AudioRecordingWorklet);
42+
await audioCtx.audioWorklet.addModule(src);
43+
44+
const workletNode = new AudioWorkletNode(audioCtx, workletName);
45+
const micSource = audioCtx.createMediaStreamSource(this.mediaStream);
46+
micSource.connect(workletNode);
47+
48+
workletNode.port.onmessage = event => {
49+
const arrayBuffer = event.data.data.int16arrayBuffer;
50+
if (arrayBuffer && this.socket?.readyState === WebSocket.OPEN) {
51+
const arrayBufferString = arrayBufferToBase64(arrayBuffer);
52+
this.socket.send(JSON.stringify({
53+
event: 'media',
54+
payload: arrayBufferString
55+
}));
56+
}
57+
};
58+
59+
// this.recognition = new SpeechRecognition();
60+
// this.recognition.continuous = true;
61+
// this.recognition.interimResults = false;
62+
// this.recognition.lang = "en-US";
63+
64+
// this.recognition.onresult = (/** @type { any } */ event) => {
65+
// const lastResult = event.results[event.results.length - 1];
66+
// const transcript = lastResult[0].transcript.trim();
67+
68+
// console.log("Recognized:", transcript);
69+
70+
// const message = {
71+
// event: "media",
72+
// payload: transcript
73+
// };
74+
75+
// if (this.socket?.readyState === WebSocket.OPEN) {
76+
// this.socket.send(JSON.stringify(message));
77+
// }
78+
// };
79+
80+
// this.recognition.onend = () => {
81+
// console.log('Speech recognition closed.');
82+
// };
83+
// this.recognition.start();
84+
85+
// navigator.mediaDevices.getUserMedia({ audio: true })
86+
// .then(stream => {
87+
// this.mediaStream = stream;
88+
// this.mediaRecorder = new MediaRecorder(stream, { mimeType: "audio/webm" });
89+
// /** @type {any[]} */
90+
// let audioChunks = [];
91+
// this.mediaRecorder.ondataavailable = (/** @type {any} */ event) => {
92+
// if (event.data.size > 0) {
93+
// // audioChunks.push(event.data);
94+
// }
95+
// };
96+
97+
// this.mediaRecorder.onstop = async () => {
98+
// console.log('mediaRecorder stopped');
99+
// // const blob = new Blob(audioChunks, { type: 'audio/webm' });
100+
// // const arrayBuffer = await blob.arrayBuffer();
101+
102+
// // // Decode audio and downsample to PCM16
103+
// // const audioCtx = new AudioContext({ sampleRate: 16000 });
104+
// // const audioBuffer = await audioCtx.decodeAudioData(arrayBuffer);
105+
106+
// // const channelData = audioBuffer.getChannelData(0); // mono
107+
// // const pcm16 = new Int16Array(channelData.length);
108+
109+
// // for (let i = 0; i < channelData.length; i++) {
110+
// // pcm16[i] = Math.max(-1, Math.min(1, channelData[i])) * 32767;
111+
// // }
112+
113+
// // const pcmBytes = new Uint8Array(pcm16.buffer);
114+
// // const base64 = btoa(String.fromCharCode(...pcmBytes));
115+
// // console.log(base64);
116+
// };
117+
118+
// this.mediaRecorder.start();
119+
// })
120+
// .catch((err) => {
121+
// console.error("Failed to access microphone", err);
122+
// });
123+
};
124+
125+
this.socket.onclose = () => {
126+
console.log("Websocket closed");
127+
}
128+
129+
this.socket.onerror = (/** @type {any} */ e) => console.error('WebSocket error', e);
130+
},
131+
132+
stop() {
133+
if (this.mediaRecorder) {
134+
this.mediaRecorder.stop();
135+
}
136+
137+
if (this.mediaStream) {
138+
this.mediaStream.getTracks().forEach(t => t.stop());
139+
this.mediaStream = null;
140+
}
141+
142+
if (this.recognition) {
143+
this.recognition.stop();
144+
}
145+
146+
if (this.socket?.readyState === WebSocket.OPEN) {
147+
this.socket.send(JSON.stringify({
148+
event: 'disconnect'
149+
}));
150+
this.socket.close();
151+
}
152+
}
153+
};
154+
155+
/**
156+
* @param {ArrayBuffer} buffer
157+
*/
158+
function arrayBufferToBase64(buffer) {
159+
var binary = "";
160+
var bytes = new Uint8Array(buffer);
161+
var len = bytes.byteLength;
162+
for (var i = 0; i < len; i++) {
163+
binary += String.fromCharCode(bytes[i]);
164+
}
165+
return window.btoa(binary);
166+
}
167+
168+
/**
169+
* @param {string} workletName
170+
* @param {string} workletSrc
171+
*/
172+
function createWorketFromSrc(workletName, workletSrc) {
173+
const script = new Blob(
174+
[`registerProcessor("${workletName}", ${workletSrc})`],
175+
{
176+
type: "application/javascript",
177+
},
178+
);
179+
180+
return URL.createObjectURL(script);
181+
};

src/routes/chat/[agentId]/[conversationId]/chat-box.svelte

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
import PersistLog from './persist-log/persist-log.svelte';
7272
import InstantLog from './instant-log/instant-log.svelte';
7373
import LocalStorageManager from '$lib/helpers/utils/storage-manager';
74+
import { realtimeChat } from '$lib/services/realtime-chat-service';
7475
7576
7677
const options = {
@@ -673,13 +674,15 @@
673674
if (disableSpeech) return;
674675
675676
if (!isListening) {
676-
llmRealtime.start(params.agentId, (/** @type {any} */ message) => {
677-
console.log(message);
678-
});
677+
// llmRealtime.start(params.agentId, (/** @type {any} */ message) => {
678+
// console.log(message);
679+
// });
680+
realtimeChat.start(params.agentId, params.conversationId);
679681
isListening = true;
680682
microphoneIcon = "microphone";
681683
} else {
682-
llmRealtime.stop();
684+
// llmRealtime.stop();
685+
realtimeChat.stop();
683686
isListening = false;
684687
microphoneIcon = "microphone-off";
685688
}

0 commit comments

Comments
 (0)