11import { v4 as uuid } from 'uuid' ;
22
33import { type WavStreamPlayer } from '../wavtools' ;
4+ import SentenceSynchronizer from './sentence-synchronizer' ;
45import {
56 type WsChatClientOptions ,
67 WsChatEventNames ,
78 type WsChatCallbackHandler ,
89 type WsChatEventData ,
9- type SentenceItem ,
10- ClientEventType ,
1110} from '../types' ;
1211import {
1312 APIError ,
1413 type AudioCodec ,
15- type ConversationAudioSentenceStartEvent ,
1614 COZE_CN_BASE_WS_URL ,
1715 CozeAPI ,
1816 type CreateChatWsReq ,
@@ -30,19 +28,11 @@ abstract class BaseWsChatClient {
3028 protected trackId = 'default' ;
3129 protected api : CozeAPI ;
3230 protected audioDeltaList : string [ ] = [ ] ;
33- /** 句子列表队列 */
34- protected sentenceList : SentenceItem [ ] = [ ] ;
35- /** 首个音频delta的时间戳(用于计算实际经过的时间)*/
36- protected firstAudioDeltaTime : number | null = null ;
37- // 当前播放的句子索引
38- protected currentSentenceIndex = - 1 ;
39- // 句子切换定时器
40- protected sentenceSwitchTimer : NodeJS . Timeout | null = null ;
41- // 音频完成定时器
42- protected audioCompletedTimer : NodeJS . Timeout | null = null ;
4331 public config : WsChatClientOptions ;
4432 protected outputAudioCodec : AudioCodec = 'pcm' ;
4533 protected outputAudioSampleRate = 24000 ;
34+ // 音字同步器实例
35+ protected sentenceSynchronizer : SentenceSynchronizer ;
4636
4737 constructor ( config : WsChatClientOptions ) {
4838 this . api = new CozeAPI ( {
@@ -52,6 +42,11 @@ abstract class BaseWsChatClient {
5242 } ) ;
5343
5444 this . config = config ;
45+
46+ // 初始化音字同步器,传入事件发射器
47+ this . sentenceSynchronizer = new SentenceSynchronizer ( {
48+ eventEmitter : ( eventName , eventData ) => this . emit ( eventName , eventData ) ,
49+ } ) ;
5550 }
5651
5752 protected async init ( ) {
@@ -116,20 +111,19 @@ abstract class BaseWsChatClient {
116111 break ;
117112
118113 case WebsocketsEventType . CONVERSATION_AUDIO_SENTENCE_START :
119- this . handleSentenceStart ( data ) ;
114+ this . sentenceSynchronizer . handleSentenceStart ( data ) ;
120115 break ;
121116
122117 case WebsocketsEventType . INPUT_AUDIO_BUFFER_SPEECH_STARTED :
118+ // 打断当前播放
123119 this . clear ( ) ;
124120 break ;
125121
126122 case WebsocketsEventType . CONVERSATION_AUDIO_COMPLETED :
127- this . handleAudioCompleted ( ) ;
123+ this . sentenceSynchronizer . handleAudioCompleted ( ) ;
128124 break ;
129125
130126 case WebsocketsEventType . CONVERSATION_CHAT_CANCELED :
131- // this.isInterrupted = false;
132- this . emitSentenceEnd ( ) ;
133127 this . clear ( ) ;
134128 break ;
135129 default :
@@ -170,7 +164,6 @@ abstract class BaseWsChatClient {
170164 }
171165
172166 sendTextMessage ( text : string ) {
173- this . clear ( ) ;
174167 this . sendMessage ( {
175168 id : uuid ( ) ,
176169 event_type : WebsocketsEventType . CONVERSATION_MESSAGE_CREATE ,
@@ -220,14 +213,13 @@ abstract class BaseWsChatClient {
220213 }
221214
222215 async clear ( ) {
223- this . audioDeltaList . length = 0 ;
224-
225- // 重置音字同步状态
226- this . resetSentenceSyncState ( ) ;
227-
216+ this . audioDeltaList = [ ] ;
228217 // 打断当前播放
229- await this . wavStreamPlayer ?. interrupt ( ) ;
230218 this . trackId = `my-track-id-${ uuid ( ) } ` ;
219+ await this . wavStreamPlayer ?. interrupt ( ) ;
220+
221+ // 重置音字同步状态
222+ this . sentenceSynchronizer . resetSentenceSyncState ( ) ;
231223 }
232224
233225 protected emit ( eventName : string , event : WsChatEventData ) {
@@ -249,19 +241,13 @@ abstract class BaseWsChatClient {
249241 view [ i ] = decodedContent . charCodeAt ( i ) ;
250242 }
251243
252- // 记录首个音频delta的时间
253- if ( this . firstAudioDeltaTime === null ) {
254- this . firstAudioDeltaTime = performance . now ( ) ;
255- }
244+ // 设置首个音频 Delta 时间
245+ this . sentenceSynchronizer . setFirstAudioDeltaTime ( ) ;
256246
257- if ( this . sentenceList . length > 0 ) {
258- // 计算音频时长
259- // 例如:PCM 16bit 采样率为24000的计算公式: (字节数 / 2) / 24000 * 1000 毫秒
260- const audioDurationMs =
261- ( decodedContent . length / 2 / this . outputAudioSampleRate ) * 1000 ;
262- this . sentenceList [ this . sentenceList . length - 1 ] . audioDuration +=
263- audioDurationMs ; // 更新当前句子的音频时长
264- }
247+ // 更新最后一个句子的音频时长
248+ this . sentenceSynchronizer . updateLatestSentenceAudioDuration (
249+ decodedContent . length ,
250+ ) ;
265251
266252 try {
267253 await this . wavStreamPlayer ?. add16BitPCM ( arrayBuffer , this . trackId ) ;
@@ -275,132 +261,14 @@ abstract class BaseWsChatClient {
275261 }
276262 } ;
277263
278- private handleAudioCompleted ( ) {
279- // 标记最后一个句子
280- this . audioCompletedTimer = setInterval ( ( ) => {
281- // 确保音频delta列表为空
282- if ( this . audioDeltaList . length === 0 ) {
283- if ( this . sentenceList . length > 0 ) {
284- this . sentenceList [ this . sentenceList . length - 1 ] . isLastSentence = true ;
285- }
286- this . audioCompletedTimer && clearInterval ( this . audioCompletedTimer ) ;
287- }
288- } , 50 ) ;
289- }
290-
291- /**
292- * 处理句子开始事件
293- * @param event 句子开始事件
294- */
295- private handleSentenceStart (
296- event : ConversationAudioSentenceStartEvent ,
297- ) : void {
298- // 将句子加入队列,存储文本和初始音频累计时长
299- const sentenceItem = {
300- id : event . id ,
301- content : event . data . text ,
302- audioDuration : 0 , // 初始时该句子的音频累计时长为0
303- isLastSentence : false ,
304- } ;
305- this . sentenceList . push ( sentenceItem ) ;
306-
307- // 如果是首个句子,立即触发客户端句子开始事件
308- if ( this . sentenceList . length === 1 && this . currentSentenceIndex === - 1 ) {
309- this . currentSentenceIndex = 0 ;
310- this . emitSentenceStart ( sentenceItem ) ;
311- this . scheduleSentenceSwitch ( ) ;
312- }
313- }
314-
315- private scheduleSentenceSwitch ( ) : void {
316- if ( this . sentenceSwitchTimer ) {
317- clearTimeout ( this . sentenceSwitchTimer ) ;
318- }
319-
320- const { isLastSentence, audioDuration } =
321- this . sentenceList [ this . currentSentenceIndex ] ;
322-
323- // 是否还有下一个句子
324- const hasNextSentence =
325- this . currentSentenceIndex + 1 < this . sentenceList . length ;
326-
327- let delay = 0 ;
328- if ( this . currentSentenceIndex === 0 ) {
329- // 处理第一个句子 delay = 句子已累计时长 - 已播放时长
330- delay =
331- audioDuration -
332- ( performance . now ( ) - ( this . firstAudioDeltaTime || performance . now ( ) ) ) ;
333- if ( delay <= 0 ) {
334- // postpone until we have a meaningful duration
335- this . sentenceSwitchTimer = setTimeout (
336- ( ) => this . scheduleSentenceSwitch ( ) ,
337- 50 ,
338- ) ;
339- return ;
340- }
341- } else {
342- // 处理后续句子 delay = 句子累计时长
343- delay = audioDuration ;
344- }
345-
346- this . sentenceSwitchTimer = setTimeout ( ( ) => {
347- if ( hasNextSentence ) {
348- this . currentSentenceIndex ++ ;
349- const nextSentence = this . sentenceList [ this . currentSentenceIndex ] ;
350- this . emitSentenceStart ( nextSentence ) ;
351- }
352- if ( isLastSentence ) {
353- this . emitSentenceEnd ( ) ;
354- } else {
355- this . scheduleSentenceSwitch ( ) ;
356- }
357- } , delay ) ;
358- }
359-
360- /**
361- * 发送客户端句子开始事件
362- * @param sentenceItem 句子开始事件
363- */
364- private emitSentenceStart ( sentenceItem : SentenceItem ) : void {
365- this . emit ( WsChatEventNames . AUDIO_SENTENCE_PLAYBACK_START , {
366- event_type : ClientEventType . AUDIO_SENTENCE_PLAYBACK_START ,
367- data : {
368- content : sentenceItem . content ,
369- id : sentenceItem . id ,
370- } ,
371- } ) ;
372- }
373-
374- /**
375- * 发送客户端句子结束事件
376- */
377- private emitSentenceEnd ( ) : void {
378- this . emit ( WsChatEventNames . AUDIO_SENTENCE_PLAYBACK_ENDED , {
379- event_type : ClientEventType . AUDIO_SENTENCE_PLAYBACK_ENDED ,
380- } ) ;
381- }
382-
383- private resetSentenceSyncState ( ) {
384- this . currentSentenceIndex = - 1 ;
385- this . sentenceList = [ ] ;
386- this . firstAudioDeltaTime = null ;
387- if ( this . sentenceSwitchTimer ) {
388- clearTimeout ( this . sentenceSwitchTimer ) ;
389- }
390- if ( this . audioCompletedTimer ) {
391- clearInterval ( this . audioCompletedTimer ) ;
392- }
393- this . sentenceSwitchTimer = null ;
394- this . audioCompletedTimer = null ;
395- }
396-
397264 // eslint-disable-next-line @typescript-eslint/no-explicit-any
398265 protected log ( ...args : any [ ] ) {
399266 if ( this . config . debug ) {
400267 console . log ( '[WsChatClient]' , ...args ) ;
401268 }
402269 return true ;
403270 }
271+
404272 // eslint-disable-next-line @typescript-eslint/no-explicit-any
405273 protected warn ( ...args : any [ ] ) {
406274 if ( this . config . debug ) {
0 commit comments