@@ -19,11 +19,38 @@ pub struct TtsResult {
1919/// Text-to-speech engine.
2020pub struct TtsEngine {
2121 config : TtsConfig ,
22+ /// Optional override for OpenAI TTS base URL. When set, the engine POSTs
23+ /// to `<openai_base_url>/v1/audio/speech` instead of the hardcoded
24+ /// `https://api.openai.com/v1/audio/speech`. Sourced from
25+ /// `MediaConfig.tts_openai_base_url`. Closes #1051.
26+ openai_base_url : Option < String > ,
27+ /// Optional override for ElevenLabs TTS base URL. When set, the engine
28+ /// POSTs to `<elevenlabs_base_url>/v1/text-to-speech/{voice_id}` instead
29+ /// of the hardcoded `https://api.elevenlabs.io/...`. Sourced from
30+ /// `MediaConfig.tts_elevenlabs_base_url`. Closes #1051.
31+ elevenlabs_base_url : Option < String > ,
2232}
2333
2434impl TtsEngine {
2535 pub fn new ( config : TtsConfig ) -> Self {
26- Self { config }
36+ Self {
37+ config,
38+ openai_base_url : None ,
39+ elevenlabs_base_url : None ,
40+ }
41+ }
42+
43+ /// Attach optional base-URL overrides from `MediaConfig`. Use this to
44+ /// route TTS calls at a local OpenAI-compatible service (e.g.
45+ /// Lemonade/Kokoro, LM Studio) or an ElevenLabs proxy. Closes #1051.
46+ pub fn with_base_urls (
47+ mut self ,
48+ openai_base_url : Option < String > ,
49+ elevenlabs_base_url : Option < String > ,
50+ ) -> Self {
51+ self . openai_base_url = openai_base_url;
52+ self . elevenlabs_base_url = elevenlabs_base_url;
53+ self
2754 }
2855
2956 /// Detect which TTS provider is available based on environment variables.
@@ -100,9 +127,21 @@ impl TtsEngine {
100127 "speed" : self . config. openai. speed,
101128 } ) ;
102129
130+ // `tts_openai_base_url` (config.media.tts_openai_base_url) overrides
131+ // the hardcoded provider URL when set, allowing the same OpenAI-compat
132+ // JSON wire format to be sent to a local TTS service (Lemonade/Kokoro,
133+ // LM Studio, etc.) instead of the cloud provider. The Authorization
134+ // header is still built from `OPENAI_API_KEY`; local services typically
135+ // accept any non-empty bearer token. Closes #1051.
136+ let url = self
137+ . openai_base_url
138+ . as_deref ( )
139+ . map ( |base| format ! ( "{}/v1/audio/speech" , base. trim_end_matches( '/' ) ) )
140+ . unwrap_or_else ( || "https://api.openai.com/v1/audio/speech" . to_string ( ) ) ;
141+
103142 let client = reqwest:: Client :: new ( ) ;
104143 let response = client
105- . post ( "https://api.openai.com/v1/audio/speech" )
144+ . post ( & url )
106145 . header ( "Authorization" , format ! ( "Bearer {}" , api_key) )
107146 . header ( "Content-Type" , "application/json" )
108147 . json ( & body)
@@ -161,7 +200,17 @@ impl TtsEngine {
161200 std:: env:: var ( "ELEVENLABS_API_KEY" ) . map_err ( |_| "ELEVENLABS_API_KEY not set" ) ?;
162201
163202 let voice_id = voice_override. unwrap_or ( & self . config . elevenlabs . voice_id ) ;
164- let url = format ! ( "https://api.elevenlabs.io/v1/text-to-speech/{}" , voice_id) ;
203+ // `tts_elevenlabs_base_url` (config.media.tts_elevenlabs_base_url)
204+ // overrides the hardcoded provider URL when set, allowing the same
205+ // ElevenLabs JSON wire format to be routed through a proxy or
206+ // self-hosted ElevenLabs-compatible gateway. The `xi-api-key` header
207+ // still comes from `ELEVENLABS_API_KEY`. Closes #1051.
208+ let base = self
209+ . elevenlabs_base_url
210+ . as_deref ( )
211+ . map ( |b| b. trim_end_matches ( '/' ) . to_string ( ) )
212+ . unwrap_or_else ( || "https://api.elevenlabs.io" . to_string ( ) ) ;
213+ let url = format ! ( "{}/v1/text-to-speech/{}" , base, voice_id) ;
165214
166215 let body = serde_json:: json!( {
167216 "text" : text,
@@ -306,4 +355,88 @@ mod tests {
306355 fn test_max_audio_constant ( ) {
307356 assert_eq ! ( MAX_AUDIO_RESPONSE_BYTES , 10 * 1024 * 1024 ) ;
308357 }
358+
359+ #[ test]
360+ fn test_with_base_urls_sets_overrides ( ) {
361+ let engine = TtsEngine :: new ( default_config ( ) ) . with_base_urls (
362+ Some ( "http://127.0.0.1:8000" . to_string ( ) ) ,
363+ Some ( "http://127.0.0.1:9000" . to_string ( ) ) ,
364+ ) ;
365+ assert_eq ! (
366+ engine. openai_base_url. as_deref( ) ,
367+ Some ( "http://127.0.0.1:8000" )
368+ ) ;
369+ assert_eq ! (
370+ engine. elevenlabs_base_url. as_deref( ) ,
371+ Some ( "http://127.0.0.1:9000" )
372+ ) ;
373+ }
374+
375+ /// Closes #1051: when the OpenAI TTS base URL is overridden, the URL
376+ /// building logic must append `/v1/audio/speech` and strip any trailing
377+ /// slash. When unset, the hardcoded provider URL is used.
378+ #[ test]
379+ fn test_tts_openai_base_url_override_logic ( ) {
380+ // Helper mirroring the URL construction in `synthesize_openai`.
381+ fn build ( base : Option < & str > ) -> String {
382+ base. map ( |b| format ! ( "{}/v1/audio/speech" , b. trim_end_matches( '/' ) ) )
383+ . unwrap_or_else ( || "https://api.openai.com/v1/audio/speech" . to_string ( ) )
384+ }
385+
386+ // Default: hardcoded URL preserved (backward compatibility).
387+ assert_eq ! ( build( None ) , "https://api.openai.com/v1/audio/speech" ) ;
388+
389+ // Override applied.
390+ assert_eq ! (
391+ build( Some ( "http://127.0.0.1:8000" ) ) ,
392+ "http://127.0.0.1:8000/v1/audio/speech"
393+ ) ;
394+
395+ // Trailing slash on the user-supplied base is stripped.
396+ assert_eq ! (
397+ build( Some ( "http://127.0.0.1:8000/" ) ) ,
398+ "http://127.0.0.1:8000/v1/audio/speech"
399+ ) ;
400+ assert_eq ! (
401+ build( Some ( "https://tts.example.com/" ) ) ,
402+ "https://tts.example.com/v1/audio/speech"
403+ ) ;
404+ }
405+
406+ /// Closes #1051: when the ElevenLabs TTS base URL is overridden, the URL
407+ /// building logic must append `/v1/text-to-speech/{voice_id}` and strip
408+ /// any trailing slash. When unset, the hardcoded provider URL is used.
409+ #[ test]
410+ fn test_tts_elevenlabs_base_url_override_logic ( ) {
411+ fn build ( base : Option < & str > , voice_id : & str ) -> String {
412+ let b = base
413+ . map ( |b| b. trim_end_matches ( '/' ) . to_string ( ) )
414+ . unwrap_or_else ( || "https://api.elevenlabs.io" . to_string ( ) ) ;
415+ format ! ( "{}/v1/text-to-speech/{}" , b, voice_id)
416+ }
417+
418+ let voice = "21m00Tcm4TlvDq8ikWAM" ;
419+
420+ // Default: hardcoded URL preserved.
421+ assert_eq ! (
422+ build( None , voice) ,
423+ format!( "https://api.elevenlabs.io/v1/text-to-speech/{voice}" )
424+ ) ;
425+
426+ // Override applied.
427+ assert_eq ! (
428+ build( Some ( "http://127.0.0.1:9000" ) , voice) ,
429+ format!( "http://127.0.0.1:9000/v1/text-to-speech/{voice}" )
430+ ) ;
431+
432+ // Trailing slash stripped.
433+ assert_eq ! (
434+ build( Some ( "http://127.0.0.1:9000/" ) , voice) ,
435+ format!( "http://127.0.0.1:9000/v1/text-to-speech/{voice}" )
436+ ) ;
437+ assert_eq ! (
438+ build( Some ( "https://eleven.example.com/" ) , voice) ,
439+ format!( "https://eleven.example.com/v1/text-to-speech/{voice}" )
440+ ) ;
441+ }
309442}
0 commit comments