Skip to content

Commit 6c26367

Browse files
feat(speech/transcription): 音播报提供释放播放器功能并支持语音识别自定义配置 (#324)
* feat(speech/transcription): 音播报提供释放播放器功能并支持语音识别自定义配置 * fix: Clear timeout and close WebSocket before destroying player --------- Co-authored-by: duwenhan2byte <duwenhan@bytedance.com>
1 parent ad4c087 commit 6c26367

6 files changed

Lines changed: 95 additions & 12 deletions

File tree

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"changes": [
3+
{
4+
"packageName": "@coze/api",
5+
"comment": "音播报提供释放播放器功能并支持语音识别自定义配置",
6+
"type": "minor"
7+
}
8+
],
9+
"packageName": "@coze/api",
10+
"email": "447258925@qq.com"
11+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"changes": [
3+
{
4+
"packageName": "@coze/api",
5+
"comment": "Clear timeout and close WebSocket before destroying player",
6+
"type": "patch"
7+
}
8+
],
9+
"packageName": "@coze/api",
10+
"email": "447258925@qq.com"
11+
}

packages/coze-js/src/resources/websockets/types.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ export interface CommonErrorEvent extends BaseEventWithDetail {
193193
};
194194
}
195195

196-
interface AudioConfig {
196+
export interface AudioConfig {
197197
/** Input audio format, supports pcm/wav/ogg */
198198
format?: 'pcm' | 'wav' | 'ogg';
199199
/** Input audio codec, supports pcm/opus/g711a/g711u */

packages/coze-js/src/ws-tools/speech/index.ts

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,30 @@ class WsSpeechClient {
186186
this.closeWs();
187187
}
188188

189+
/**
190+
* Releases wavStreamPlayer resources so callers can dispose the instance.
191+
* In a mobile browser environment, if the WsSpeechClient is instantiated multiple times,
192+
* you can additionally call the destroyPlayer method to release resources and prevent issues with speech playback.
193+
*/
194+
async destroyPlayer() {
195+
// Clear any pending timeout first
196+
if (this.playbackTimeout) {
197+
clearTimeout(this.playbackTimeout);
198+
this.playbackTimeout = null;
199+
}
200+
201+
// Ensure WebSocket is closed
202+
this.closeWs();
203+
204+
// Now safe to destroy player and reset state
205+
await this.wavStreamPlayer.destroy();
206+
this.totalDuration = 0;
207+
this.playbackStartTime = null;
208+
this.playbackPauseTime = null;
209+
this.elapsedBeforePause = 0;
210+
this.audioDeltaList.length = 0;
211+
}
212+
189213
append(message: string) {
190214
this.ws?.send({
191215
id: uuid(),

packages/coze-js/src/ws-tools/transcription/index.ts

Lines changed: 42 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
import { v4 as uuid } from 'uuid';
22

3-
import { WebsocketsEventType } from '../..';
4-
import BaseWsTranscriptionClient from './base';
53
import {
64
type AIDenoiserProcessorLevel,
75
type AIDenoiserProcessorMode,
86
} from '../recorder/pcm-recorder';
7+
import {
8+
type AudioConfig,
9+
type TranscriptionsUpdateEvent,
10+
} from '../../resources/websockets/types';
11+
import { WebsocketsEventType } from '../..';
12+
import BaseWsTranscriptionClient from './base';
913

1014
class WsTranscriptionClient extends BaseWsTranscriptionClient {
1115
private isRecording = false;
@@ -17,18 +21,45 @@ class WsTranscriptionClient extends BaseWsTranscriptionClient {
1721
this.ws?.send({
1822
id: uuid(),
1923
event_type: WebsocketsEventType.TRANSCRIPTIONS_UPDATE,
20-
data: {
21-
input_audio: {
22-
format: 'pcm',
23-
codec: 'pcm',
24-
sample_rate: sampleRate,
25-
channel: 1,
26-
bit_depth: 16,
27-
},
28-
},
24+
data: this.getInitialUpdateData(sampleRate),
2925
});
3026
}
3127

28+
private getInitialUpdateData(
29+
sampleRate: number,
30+
): TranscriptionsUpdateEvent['data'] {
31+
const defaultInputAudio: AudioConfig = {
32+
format: 'pcm',
33+
codec: 'pcm',
34+
sample_rate: sampleRate,
35+
channel: 1,
36+
bit_depth: 16,
37+
};
38+
39+
const customUpdateData = this.config.transcriptionUpdateData;
40+
41+
if (!customUpdateData) {
42+
return {
43+
input_audio: defaultInputAudio,
44+
};
45+
}
46+
47+
if (!customUpdateData.input_audio) {
48+
return {
49+
...customUpdateData,
50+
input_audio: defaultInputAudio,
51+
};
52+
}
53+
54+
return {
55+
...customUpdateData,
56+
input_audio: {
57+
...defaultInputAudio,
58+
...customUpdateData.input_audio,
59+
},
60+
};
61+
}
62+
3263
destroy() {
3364
this.recorder.destroy();
3465
this.listeners.clear();

packages/coze-js/src/ws-tools/types.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import { type TranscriptionsUpdateEvent } from '../resources/websockets/types';
12
import {
23
type AIDenoiserProcessorLevel,
34
type AIDenoiserProcessorMode,
@@ -428,6 +429,11 @@ export interface WsTranscriptionClientOptions extends WsToolsOptions {
428429
wavRecordConfig?: WavRecordConfig;
429430
entityType?: 'bot' | 'workflow';
430431
entityId?: string;
432+
/**
433+
* en: Custom data payload for initial transcription update
434+
* zh: 初始语音识别更新的接口配置数据
435+
*/
436+
transcriptionUpdateData?: TranscriptionsUpdateEvent['data'];
431437
}
432438

433439
export type WsSimultInterpretationClientOptions = WsTranscriptionClientOptions;

0 commit comments

Comments
 (0)