@@ -44,6 +44,8 @@ def __init__(self) -> None:
4444 self .mock_mode = self .config .get ("mock_mode" , True )
4545 self .provider = self .config .get ("provider" , "piper" )
4646 self .base_url = self .config .get ("base_url" , "http://localhost:5000" )
47+ self .providers = self .config .get ("providers" , {})
48+ self .language_providers = self .config .get ("language_providers" , {})
4749 self .voice = self .config .get ("voice" , "en_US-lessac-medium" )
4850 self .timeout = self .config .get ("timeout_seconds" , 15 )
4951 self .media_dir = get_config ().media_output_dir
@@ -91,10 +93,12 @@ async def synthesize(
9193 return await self ._mock_synthesize (text , language , output_path )
9294
9395 try :
94- if self .provider == "piper" :
96+ provider = self ._get_provider_for_language (language )
97+ if provider == "piper" :
9598 return await self ._piper_synthesize (text , language , output_path )
96- else :
97- return await self ._openai_synthesize (text , language , output_path )
99+ if provider == "supertonic" :
100+ return await self ._supertonic_synthesize (text , language , output_path )
101+ return await self ._openai_synthesize (text , language , output_path )
98102 except Exception as e :
99103 logger .error (f"TTS synthesis failed: { e } " )
100104 return TTSResult (success = False , error = str (e ))
@@ -148,6 +152,46 @@ async def _mock_synthesize(
148152 logger .error (f"Mock TTS failed: { e } " )
149153 return TTSResult (success = False , error = f"Mock TTS failed: { e } " )
150154
155+ def _normalize_language (self , language : str ) -> tuple [str , str ]:
156+ """Return normalized exact and base language codes."""
157+ normalized_language = language .replace ("-" , "_" )
158+ lang_base = normalized_language .split ("_" )[0 ].lower ()
159+ return normalized_language , lang_base
160+
161+ def _get_provider_for_language (self , language : str ) -> str :
162+ """Resolve the TTS provider for a language, falling back to default."""
163+ normalized_language , lang_base = self ._normalize_language (language )
164+ return self .language_providers .get (
165+ normalized_language ,
166+ self .language_providers .get (lang_base , self .provider ),
167+ )
168+
169+ def _get_provider_config (self , provider : str ) -> dict :
170+ """Return provider-specific config merged with top-level defaults."""
171+ provider_config = self .providers .get (provider , {})
172+ return {** self .config , ** provider_config }
173+
174+ def _get_provider_base_url (self , provider : str ) -> str :
175+ """Return the base URL for a provider."""
176+ provider_config = self ._get_provider_config (provider )
177+ return provider_config .get ("base_url" , self .base_url ).rstrip ("/" )
178+
179+ @staticmethod
180+ def _config_int (value : object , default : int ) -> int :
181+ """Parse an integer config value with a safe fallback."""
182+ try :
183+ return int (value )
184+ except (TypeError , ValueError ):
185+ return default
186+
187+ @staticmethod
188+ def _config_float (value : object , default : float ) -> float :
189+ """Parse a float config value with a safe fallback."""
190+ try :
191+ return float (value )
192+ except (TypeError , ValueError ):
193+ return default
194+
151195 async def _fetch_voices (self ) -> dict :
152196 """
153197 Fetch available voices from Piper TTS API with thread-safe caching.
@@ -182,7 +226,7 @@ async def _fetch_voices(self) -> dict:
182226
183227 try :
184228 response = await self ._http_client .get (
185- f"{ self .base_url . rstrip ( '/ ' )} /voices"
229+ f"{ self ._get_provider_base_url ( 'piper ' )} /voices"
186230 )
187231 response .raise_for_status ()
188232 self ._voices_cache = response .json ()
@@ -210,8 +254,7 @@ async def _get_voice_for_language(self, language: str) -> str:
210254 4. Base language match from API voices
211255 5. Default voice from config
212256 """
213- normalized_language = language .replace ("-" , "_" )
214- lang_base = normalized_language .split ("_" )[0 ].lower ()
257+ normalized_language , lang_base = self ._normalize_language (language )
215258 voices = await self ._fetch_voices ()
216259
217260 def voice_available (voice_name : str ) -> bool :
@@ -276,8 +319,7 @@ def _get_length_scale_for_language(self, language: str, voice: str) -> float:
276319 lookup supports voice-specific, exact language, base language, then
277320 global default settings.
278321 """
279- normalized_language = language .replace ("-" , "_" )
280- lang_base = normalized_language .split ("_" )[0 ].lower ()
322+ normalized_language , lang_base = self ._normalize_language (language )
281323
282324 candidates = [
283325 voice ,
@@ -306,7 +348,7 @@ async def _piper_synthesize(
306348 Returns:
307349 TTSResult with audio file path
308350 """
309- url = self .base_url . rstrip ( "/ " )
351+ url = self ._get_provider_base_url ( "piper " )
310352
311353 # Select voice dynamically from Piper TTS API
312354 voice = await self ._get_voice_for_language (language )
@@ -341,17 +383,78 @@ async def _piper_synthesize(
341383 audio_url = audio_url ,
342384 success = True ,
343385 )
344- except httpx .TimeoutException :
345- logger .error (f"Piper TTS timeout after { self . timeout } s " )
386+ except httpx .HTTPError as e :
387+ logger .error (f"Piper TTS HTTP error: { e } " )
346388 return TTSResult (
347389 success = False ,
348- error = f"Piper TTS timeout after { self . timeout } s " ,
390+ error = f"Piper TTS HTTP error: { e } " ,
349391 )
392+
393+ def _get_supertonic_voice_for_language (self , language : str ) -> str :
394+ """Return Supertonic voice style for a language."""
395+ provider_config = self ._get_provider_config ("supertonic" )
396+ voices = provider_config .get ("voices" , {})
397+ normalized_language , lang_base = self ._normalize_language (language )
398+ return voices .get (
399+ normalized_language ,
400+ voices .get (lang_base , provider_config .get ("voice" , "M1" )),
401+ )
402+
403+ async def _supertonic_synthesize (
404+ self , text : str , language : str , output_path : Optional [Path ] = None
405+ ) -> TTSResult :
406+ """Synthesize speech using a Supertonic TTS HTTP server."""
407+ provider_config = self ._get_provider_config ("supertonic" )
408+ normalized_language , lang_base = self ._normalize_language (language )
409+ voice = self ._get_supertonic_voice_for_language (language )
410+ url = self ._get_provider_base_url ("supertonic" ) + "/v1/tts"
411+
412+ payload = {
413+ "text" : text ,
414+ "voice" : voice ,
415+ "lang" : lang_base ,
416+ "steps" : self ._config_int (provider_config .get ("steps" ), 8 ),
417+ "speed" : self ._config_float (provider_config .get ("speed" ), 1.05 ),
418+ "response_format" : provider_config .get ("response_format" , "wav" ),
419+ }
420+
421+ try :
422+ async with self ._http_client .stream ("POST" , url , json = payload ) as response :
423+ response .raise_for_status ()
424+
425+ if output_path is None :
426+ unique_id = str (uuid .uuid4 ())[:8 ]
427+ output_path = (
428+ self .media_dir / f"tts_{ normalized_language } _{ unique_id } .wav"
429+ )
430+
431+ output_path .parent .mkdir (parents = True , exist_ok = True )
432+
433+ with open (output_path , "wb" ) as f :
434+ async for chunk in response .aiter_bytes ():
435+ if chunk :
436+ f .write (chunk )
437+
438+ audio_url = f"/media/output/{ output_path .name } "
439+ duration_header = response .headers .get ("X-Audio-Duration" )
440+ duration = float (duration_header ) if duration_header else None
441+
442+ logger .info (
443+ f"Supertonic TTS generated: { output_path } "
444+ f"(lang: { lang_base } , voice: { voice } )"
445+ )
446+
447+ return TTSResult (
448+ audio_path = output_path ,
449+ audio_url = audio_url ,
450+ duration = duration ,
451+ success = True ,
452+ )
350453 except httpx .HTTPError as e :
351- logger .error (f"Piper TTS HTTP error: { e } " )
454+ logger .error (f"Supertonic TTS HTTP error: { e } " )
352455 return TTSResult (
353456 success = False ,
354- error = f"Piper TTS HTTP error: { e } " ,
457+ error = f"Supertonic TTS HTTP error: { e } " ,
355458 )
356459
357460 async def _openai_synthesize (
@@ -386,12 +489,6 @@ async def _openai_synthesize(
386489 response .raise_for_status ()
387490
388491 audio_content = response .content
389- except httpx .TimeoutException :
390- logger .error (f"OpenAI TTS timeout after { self .timeout } s" )
391- return TTSResult (
392- success = False ,
393- error = f"OpenAI TTS timeout after { self .timeout } s" ,
394- )
395492 except httpx .HTTPError as e :
396493 logger .error (f"OpenAI TTS HTTP error: { e } " )
397494 return TTSResult (
0 commit comments