Skip to content

Commit 5fd4a7e

Browse files
jackshen310shenxiaojie.316
andauthored
feat: refactor and fix aec bug on HarmonyOS (#271)
* feat: refactor and fix aec bug on HarmonyOS * chore: Publish feat/refactor * fix(api): Fix sentence bug --------- Co-authored-by: shenxiaojie.316 <shenxiaojie.316@bytedance.com>
1 parent 2f9d9c3 commit 5fd4a7e

14 files changed

Lines changed: 348 additions & 185 deletions

File tree

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"changes": [
3+
{
4+
"packageName": "@coze/api",
5+
"comment": "refactor and fix aec bug on HarmonyOS",
6+
"type": "minor"
7+
}
8+
],
9+
"packageName": "@coze/api",
10+
"email": "shenxiaojie.316@bytedance.com"
11+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"changes": [
3+
{
4+
"packageName": "@coze/api",
5+
"comment": "Publish feat/refactor",
6+
"type": "patch"
7+
}
8+
],
9+
"packageName": "@coze/api",
10+
"email": "shenxiaojie.316@bytedance.com"
11+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"changes": [
3+
{
4+
"packageName": "@coze/api",
5+
"comment": "Fix sentence bug",
6+
"type": "patch"
7+
}
8+
],
9+
"packageName": "@coze/api",
10+
"email": "shenxiaojie.316@bytedance.com"
11+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"changes": [
3+
{
4+
"packageName": "@coze/api",
5+
"comment": "Fix sentence bug",
6+
"type": "patch"
7+
}
8+
],
9+
"packageName": "@coze/api",
10+
"email": "shenxiaojie.316@bytedance.com"
11+
}

examples/coze-js-web/src/pages/chat-x/use-ws-api.ts

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ const useWsAPI = (
216216

217217
transcriptionClientRef.current = transcriptionClient;
218218

219-
transcriptionClient.on('data', data => {
219+
transcriptionClient.on(WebsocketsEventType.ALL, data => {
220220
console.log('[transcriptions] ws data', data);
221221

222222
if (
@@ -237,8 +237,7 @@ const useWsAPI = (
237237
}, []);
238238

239239
const stopTranscriptions = useCallback(async () => {
240-
const finalAudio = await transcriptionClientRef.current?.stop();
241-
console.log('[transcriptions] finalAudio:', finalAudio);
240+
await transcriptionClientRef.current?.stop();
242241
}, []);
243242

244243
const interruptAudio = useCallback(() => {

examples/realtime-websocket/src/pages/chat/sentence-message.tsx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ const SentenceMessage = forwardRef(
124124
case ClientEventType.AUDIO_SENTENCE_PLAYBACK_START: {
125125
// 处理句子开始事件
126126
const { content } = (event as AudioSentencePlaybackStartEvent).data;
127+
console.log('handleAudioSentencePlaybackStart', event);
127128
if (isFirstSentenceRef.current) {
128129
// 首个句子,创建新消息
129130
setMessageList(prev => [

packages/coze-js/package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@coze/api",
3-
"version": "1.3.0",
3+
"version": "1.3.0-beta.1",
44
"description": "Official Coze Node.js SDK for seamless AI integration into your applications | 扣子官方 Node.js SDK,助您轻松集成 AI 能力到应用中",
55
"keywords": [
66
"coze",
@@ -116,4 +116,4 @@
116116
"agora-rtc-sdk-ng": "$agora-rtc-sdk-ng"
117117
}
118118
}
119-
}
119+
}

packages/coze-js/src/ws-tools/chat/base.ts

Lines changed: 23 additions & 155 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,16 @@
11
import { v4 as uuid } from 'uuid';
22

33
import { type WavStreamPlayer } from '../wavtools';
4+
import SentenceSynchronizer from './sentence-synchronizer';
45
import {
56
type WsChatClientOptions,
67
WsChatEventNames,
78
type WsChatCallbackHandler,
89
type WsChatEventData,
9-
type SentenceItem,
10-
ClientEventType,
1110
} from '../types';
1211
import {
1312
APIError,
1413
type AudioCodec,
15-
type ConversationAudioSentenceStartEvent,
1614
COZE_CN_BASE_WS_URL,
1715
CozeAPI,
1816
type CreateChatWsReq,
@@ -30,19 +28,11 @@ abstract class BaseWsChatClient {
3028
protected trackId = 'default';
3129
protected api: CozeAPI;
3230
protected audioDeltaList: string[] = [];
33-
/** 句子列表队列 */
34-
protected sentenceList: SentenceItem[] = [];
35-
/** 首个音频delta的时间戳(用于计算实际经过的时间)*/
36-
protected firstAudioDeltaTime: number | null = null;
37-
// 当前播放的句子索引
38-
protected currentSentenceIndex = -1;
39-
// 句子切换定时器
40-
protected sentenceSwitchTimer: NodeJS.Timeout | null = null;
41-
// 音频完成定时器
42-
protected audioCompletedTimer: NodeJS.Timeout | null = null;
4331
public config: WsChatClientOptions;
4432
protected outputAudioCodec: AudioCodec = 'pcm';
4533
protected outputAudioSampleRate = 24000;
34+
// 音字同步器实例
35+
protected sentenceSynchronizer: SentenceSynchronizer;
4636

4737
constructor(config: WsChatClientOptions) {
4838
this.api = new CozeAPI({
@@ -52,6 +42,11 @@ abstract class BaseWsChatClient {
5242
});
5343

5444
this.config = config;
45+
46+
// 初始化音字同步器,传入事件发射器
47+
this.sentenceSynchronizer = new SentenceSynchronizer({
48+
eventEmitter: (eventName, eventData) => this.emit(eventName, eventData),
49+
});
5550
}
5651

5752
protected async init() {
@@ -116,20 +111,19 @@ abstract class BaseWsChatClient {
116111
break;
117112

118113
case WebsocketsEventType.CONVERSATION_AUDIO_SENTENCE_START:
119-
this.handleSentenceStart(data);
114+
this.sentenceSynchronizer.handleSentenceStart(data);
120115
break;
121116

122117
case WebsocketsEventType.INPUT_AUDIO_BUFFER_SPEECH_STARTED:
118+
// 打断当前播放
123119
this.clear();
124120
break;
125121

126122
case WebsocketsEventType.CONVERSATION_AUDIO_COMPLETED:
127-
this.handleAudioCompleted();
123+
this.sentenceSynchronizer.handleAudioCompleted();
128124
break;
129125

130126
case WebsocketsEventType.CONVERSATION_CHAT_CANCELED:
131-
// this.isInterrupted = false;
132-
this.emitSentenceEnd();
133127
this.clear();
134128
break;
135129
default:
@@ -170,7 +164,6 @@ abstract class BaseWsChatClient {
170164
}
171165

172166
sendTextMessage(text: string) {
173-
this.clear();
174167
this.sendMessage({
175168
id: uuid(),
176169
event_type: WebsocketsEventType.CONVERSATION_MESSAGE_CREATE,
@@ -220,14 +213,13 @@ abstract class BaseWsChatClient {
220213
}
221214

222215
async clear() {
223-
this.audioDeltaList.length = 0;
224-
225-
// 重置音字同步状态
226-
this.resetSentenceSyncState();
227-
216+
this.audioDeltaList = [];
228217
// 打断当前播放
229-
await this.wavStreamPlayer?.interrupt();
230218
this.trackId = `my-track-id-${uuid()}`;
219+
await this.wavStreamPlayer?.interrupt();
220+
221+
// 重置音字同步状态
222+
this.sentenceSynchronizer.resetSentenceSyncState();
231223
}
232224

233225
protected emit(eventName: string, event: WsChatEventData) {
@@ -249,19 +241,13 @@ abstract class BaseWsChatClient {
249241
view[i] = decodedContent.charCodeAt(i);
250242
}
251243

252-
// 记录首个音频delta的时间
253-
if (this.firstAudioDeltaTime === null) {
254-
this.firstAudioDeltaTime = performance.now();
255-
}
244+
// 设置首个音频 Delta 时间
245+
this.sentenceSynchronizer.setFirstAudioDeltaTime();
256246

257-
if (this.sentenceList.length > 0) {
258-
// 计算音频时长
259-
// 例如:PCM 16bit 采样率为24000的计算公式: (字节数 / 2) / 24000 * 1000 毫秒
260-
const audioDurationMs =
261-
(decodedContent.length / 2 / this.outputAudioSampleRate) * 1000;
262-
this.sentenceList[this.sentenceList.length - 1].audioDuration +=
263-
audioDurationMs; // 更新当前句子的音频时长
264-
}
247+
// 更新最后一个句子的音频时长
248+
this.sentenceSynchronizer.updateLatestSentenceAudioDuration(
249+
decodedContent.length,
250+
);
265251

266252
try {
267253
await this.wavStreamPlayer?.add16BitPCM(arrayBuffer, this.trackId);
@@ -275,132 +261,14 @@ abstract class BaseWsChatClient {
275261
}
276262
};
277263

278-
private handleAudioCompleted() {
279-
// 标记最后一个句子
280-
this.audioCompletedTimer = setInterval(() => {
281-
// 确保音频delta列表为空
282-
if (this.audioDeltaList.length === 0) {
283-
if (this.sentenceList.length > 0) {
284-
this.sentenceList[this.sentenceList.length - 1].isLastSentence = true;
285-
}
286-
this.audioCompletedTimer && clearInterval(this.audioCompletedTimer);
287-
}
288-
}, 50);
289-
}
290-
291-
/**
292-
* 处理句子开始事件
293-
* @param event 句子开始事件
294-
*/
295-
private handleSentenceStart(
296-
event: ConversationAudioSentenceStartEvent,
297-
): void {
298-
// 将句子加入队列,存储文本和初始音频累计时长
299-
const sentenceItem = {
300-
id: event.id,
301-
content: event.data.text,
302-
audioDuration: 0, // 初始时该句子的音频累计时长为0
303-
isLastSentence: false,
304-
};
305-
this.sentenceList.push(sentenceItem);
306-
307-
// 如果是首个句子,立即触发客户端句子开始事件
308-
if (this.sentenceList.length === 1 && this.currentSentenceIndex === -1) {
309-
this.currentSentenceIndex = 0;
310-
this.emitSentenceStart(sentenceItem);
311-
this.scheduleSentenceSwitch();
312-
}
313-
}
314-
315-
private scheduleSentenceSwitch(): void {
316-
if (this.sentenceSwitchTimer) {
317-
clearTimeout(this.sentenceSwitchTimer);
318-
}
319-
320-
const { isLastSentence, audioDuration } =
321-
this.sentenceList[this.currentSentenceIndex];
322-
323-
// 是否还有下一个句子
324-
const hasNextSentence =
325-
this.currentSentenceIndex + 1 < this.sentenceList.length;
326-
327-
let delay = 0;
328-
if (this.currentSentenceIndex === 0) {
329-
// 处理第一个句子 delay = 句子已累计时长 - 已播放时长
330-
delay =
331-
audioDuration -
332-
(performance.now() - (this.firstAudioDeltaTime || performance.now()));
333-
if (delay <= 0) {
334-
// postpone until we have a meaningful duration
335-
this.sentenceSwitchTimer = setTimeout(
336-
() => this.scheduleSentenceSwitch(),
337-
50,
338-
);
339-
return;
340-
}
341-
} else {
342-
// 处理后续句子 delay = 句子累计时长
343-
delay = audioDuration;
344-
}
345-
346-
this.sentenceSwitchTimer = setTimeout(() => {
347-
if (hasNextSentence) {
348-
this.currentSentenceIndex++;
349-
const nextSentence = this.sentenceList[this.currentSentenceIndex];
350-
this.emitSentenceStart(nextSentence);
351-
}
352-
if (isLastSentence) {
353-
this.emitSentenceEnd();
354-
} else {
355-
this.scheduleSentenceSwitch();
356-
}
357-
}, delay);
358-
}
359-
360-
/**
361-
* 发送客户端句子开始事件
362-
* @param sentenceItem 句子开始事件
363-
*/
364-
private emitSentenceStart(sentenceItem: SentenceItem): void {
365-
this.emit(WsChatEventNames.AUDIO_SENTENCE_PLAYBACK_START, {
366-
event_type: ClientEventType.AUDIO_SENTENCE_PLAYBACK_START,
367-
data: {
368-
content: sentenceItem.content,
369-
id: sentenceItem.id,
370-
},
371-
});
372-
}
373-
374-
/**
375-
* 发送客户端句子结束事件
376-
*/
377-
private emitSentenceEnd(): void {
378-
this.emit(WsChatEventNames.AUDIO_SENTENCE_PLAYBACK_ENDED, {
379-
event_type: ClientEventType.AUDIO_SENTENCE_PLAYBACK_ENDED,
380-
});
381-
}
382-
383-
private resetSentenceSyncState() {
384-
this.currentSentenceIndex = -1;
385-
this.sentenceList = [];
386-
this.firstAudioDeltaTime = null;
387-
if (this.sentenceSwitchTimer) {
388-
clearTimeout(this.sentenceSwitchTimer);
389-
}
390-
if (this.audioCompletedTimer) {
391-
clearInterval(this.audioCompletedTimer);
392-
}
393-
this.sentenceSwitchTimer = null;
394-
this.audioCompletedTimer = null;
395-
}
396-
397264
// eslint-disable-next-line @typescript-eslint/no-explicit-any
398265
protected log(...args: any[]) {
399266
if (this.config.debug) {
400267
console.log('[WsChatClient]', ...args);
401268
}
402269
return true;
403270
}
271+
404272
// eslint-disable-next-line @typescript-eslint/no-explicit-any
405273
protected warn(...args: any[]) {
406274
if (this.config.debug) {

0 commit comments

Comments
 (0)