1111from riva .client .proto .riva_audio_pb2 import AudioEncoding
1212import wave
1313
14+ def add_custom_dictionary_to_config (req , custom_dictionary ):
15+ result_list = [f"{ key } { value } " for key , value in custom_dictionary .items ()]
16+ result_string = ',' .join (result_list )
17+ req .custom_dictionary = result_string
18+
1419class SpeechSynthesisService :
1520 """
1621 A class for synthesizing speech from text. Provides :meth:`synthesize` which returns entire audio for a text
@@ -38,6 +43,7 @@ def synthesize(
3843 audio_prompt_encoding : AudioEncoding = AudioEncoding .LINEAR_PCM ,
3944 quality : int = 20 ,
4045 future : bool = False ,
46+ custom_dictionary : Optional [dict ] = None ,
4147 ) -> Union [rtts .SynthesizeSpeechResponse , _MultiThreadedRendezvous ]:
4248 """
4349 Synthesizes an entire audio for text :param:`text`.
@@ -56,6 +62,7 @@ def synthesize(
5662 audio but also takes longer to generate the audio. Ranges between 1-40.
5763 future (:obj:`bool`, defaults to :obj:`False`): Whether to return an async result instead of usual
5864 response. You can get a response by calling ``result()`` method of the future object.
65+ custom_dictionary (:obj:`dict`, `optional`): Dictionary with key-value pair containing grapheme and corresponding phoneme
5966
6067 Returns:
6168 :obj:`Union[riva.client.proto.riva_tts_pb2.SynthesizeSpeechResponse, grpc._channel._MultiThreadedRendezvous]`:
@@ -81,6 +88,8 @@ def synthesize(
8188 req .zero_shot_data .encoding = audio_prompt_encoding
8289 req .zero_shot_data .quality = quality
8390
91+ add_custom_dictionary_to_config (req , custom_dictionary )
92+
8493 func = self .stub .Synthesize .future if future else self .stub .Synthesize
8594 return func (req , metadata = self .auth .get_auth_metadata ())
8695
@@ -94,6 +103,7 @@ def synthesize_online(
94103 audio_prompt_file : Optional [str ] = None ,
95104 audio_prompt_encoding : AudioEncoding = AudioEncoding .LINEAR_PCM ,
96105 quality : int = 20 ,
106+ custom_dictionary : Optional [dict ] = None ,
97107 ) -> Generator [rtts .SynthesizeSpeechResponse , None , None ]:
98108 """
99109 Synthesizes and yields output audio chunks for text :param:`text` as the chunks
@@ -111,6 +121,7 @@ def synthesize_online(
111121 audio_prompt_encoding: (:obj:`AudioEncoding`): Encoding of audio prompt file, e.g. ``AudioEncoding.LINEAR_PCM``.
112122 quality: (:obj:`int`): This defines the number of times decoder is run. Higher number improves quality of generated
113123 audio but also takes longer to generate the audio. Ranges between 1-40.
124+ custom_dictionary (:obj:`dict`, `optional`): Dictionary with key-value pair containing grapheme and corresponding phoneme
114125
115126 Yields:
116127 :obj:`riva.client.proto.riva_tts_pb2.SynthesizeSpeechResponse`: a response with output. You may find
@@ -138,4 +149,6 @@ def synthesize_online(
138149 req .zero_shot_data .encoding = audio_prompt_encoding
139150 req .zero_shot_data .quality = quality
140151
152+ add_custom_dictionary_to_config (req , custom_dictionary )
153+
141154 return self .stub .SynthesizeOnline (req , metadata = self .auth .get_auth_metadata ())
0 commit comments