@@ -10,6 +10,7 @@ import {
1010 tts ,
1111} from '@livekit/agents' ;
1212import { Mistral } from '@mistralai/mistralai' ;
13+ import * as crypto from 'node:crypto' ;
1314import type { MistralTTSModels } from './models.js' ;
1415
1516// Confirmed from WAV header: Mistral TTS PCM output is 24000 Hz, mono, 16-bit signed
@@ -92,8 +93,12 @@ export class TTS extends tts.TTS {
9293 } ) ) ;
9394 }
9495
95- synthesize ( text : string , connOptions ?: APIConnectOptions ) : ChunkedStream {
96- return new ChunkedStream ( this , text , this . #client, this . #opts, connOptions ) ;
96+ synthesize (
97+ text : string ,
98+ connOptions ?: APIConnectOptions ,
99+ abortSignal ?: AbortSignal ,
100+ ) : ChunkedStream {
101+ return new ChunkedStream ( this , text , this . #client, this . #opts, connOptions , abortSignal ) ;
97102 }
98103
99104 stream ( ) : tts . SynthesizeStream {
@@ -117,8 +122,9 @@ export class ChunkedStream extends tts.ChunkedStream {
117122 client : Mistral ,
118123 opts : TTSOptions ,
119124 connOptions ?: APIConnectOptions ,
125+ abortSignal ?: AbortSignal ,
120126 ) {
121- super ( text , ttsInstance , connOptions ) ;
127+ super ( text , ttsInstance , connOptions , abortSignal ) ;
122128 this . #client = client ;
123129 this . #opts = opts ;
124130 this . #text = text ;
@@ -127,15 +133,21 @@ export class ChunkedStream extends tts.ChunkedStream {
127133 protected async run ( ) : Promise < void > {
128134 const logger = log ( ) ;
129135 try {
130- const eventStream = await this . #client. audio . speech . complete ( {
131- input : this . #text,
132- model : this . #opts. model ?? 'voxtral-mini-tts-2603' ,
133- voiceId : this . #opts. voiceId ,
134- responseFormat : 'pcm' ,
135- stream : true ,
136- } ) ;
136+ const eventStream = await this . #client. audio . speech . complete (
137+ {
138+ input : this . #text,
139+ model : this . #opts. model ?? 'voxtral-mini-tts-2603' ,
140+ voiceId : this . #opts. voiceId ,
141+ responseFormat : 'pcm' ,
142+ stream : true ,
143+ } ,
144+ {
145+ fetchOptions : { signal : this . abortController ?. signal } ,
146+ } ,
147+ ) ;
137148
138- const requestId = this . #text. slice ( 0 , 8 ) ;
149+ const requestId = crypto . randomUUID ( ) ;
150+ const segmentId = crypto . randomUUID ( ) ;
139151 const audioByteStream = new AudioByteStream ( MISTRAL_TTS_SAMPLE_RATE , MISTRAL_TTS_CHANNELS ) ;
140152
141153 let lastFrame : import ( '@livekit/rtc-node' ) . AudioFrame | undefined ;
@@ -152,7 +164,7 @@ export class ChunkedStream extends tts.ChunkedStream {
152164 const pcmBytes = Buffer . from ( event . data . audioData , 'base64' ) ;
153165 const frames = audioByteStream . write ( pcmBytes ) ;
154166 for ( const frame of frames ) {
155- sendLastFrame ( requestId , false ) ;
167+ sendLastFrame ( segmentId , false ) ;
156168 lastFrame = frame ;
157169 }
158170 } else if ( event . data . type === 'speech.audio.done' ) {
@@ -163,11 +175,11 @@ export class ChunkedStream extends tts.ChunkedStream {
163175 // Flush any remaining buffered audio
164176 const flushFrames = audioByteStream . flush ( ) ;
165177 for ( const frame of flushFrames ) {
166- sendLastFrame ( requestId , false ) ;
178+ sendLastFrame ( segmentId , false ) ;
167179 lastFrame = frame ;
168180 }
169181
170- sendLastFrame ( requestId , true ) ;
182+ sendLastFrame ( segmentId , true ) ;
171183 this . queue . close ( ) ;
172184 } catch ( error : unknown ) {
173185 if ( this . abortController ?. signal . aborted ) return ;
@@ -204,8 +216,6 @@ export class ChunkedStream extends tts.ChunkedStream {
204216 message : `Mistral TTS: ${ err . message ?? 'unknown error' } ` ,
205217 options : { retryable : true } ,
206218 } ) ;
207- } finally {
208- this . queue . close ( ) ;
209219 }
210220 }
211221}
0 commit comments