Skip to content

Commit 80e5f04

Browse files
add list voices support to tts client (#78)
add --list-voices parameter to tts client to query supported voices
1 parent 18d6f8f commit 80e5f04

1 file changed

Lines changed: 29 additions & 8 deletions

File tree

scripts/tts/talk.py

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import argparse
55
import time
66
import wave
7+
import json
78
from pathlib import Path
89

910
import riva.client
@@ -21,12 +22,12 @@ def parse_args() -> argparse.Namespace:
2122
help="A voice name to use. If this parameter is missing, then the server will try a first available model "
2223
"based on parameter `--language-code`.",
2324
)
24-
parser.add_argument("--text", type=str, required=True, help="Text input to synthesize.")
25+
parser.add_argument("--text", type=str, required=False, help="Text input to synthesize.")
2526
parser.add_argument(
2627
"--audio_prompt_file",
2728
type=Path,
2829
help="An input audio prompt (.wav) file for zero shot model. This is required to do zero shot inferencing.")
29-
parser.add_argument("-o", "--output", type=Path, help="Output file .wav file to write synthesized audio.")
30+
parser.add_argument("-o", "--output", type=Path, default="output.wav", help="Output file .wav file to write synthesized audio.")
3031
parser.add_argument("--quality", type=int, help="Number of times decoder should be run on the output audio. A higher number improves quality of the produced output but introduces latencies.")
3132
parser.add_argument(
3233
"--play-audio",
@@ -35,6 +36,7 @@ def parse_args() -> argparse.Namespace:
3536
"then the default output audio device will be used.",
3637
)
3738
parser.add_argument("--list-devices", action="store_true", help="List output audio devices indices.")
39+
parser.add_argument("--list-voices", action="store_true", help="List available voices.")
3840
parser.add_argument("--output-device", type=int, help="Output device to use.")
3941
parser.add_argument("--language-code", default='en-US', help="A language of input text.")
4042
parser.add_argument(
@@ -49,11 +51,6 @@ def parse_args() -> argparse.Namespace:
4951
)
5052
parser = add_connection_argparse_parameters(parser)
5153
args = parser.parse_args()
52-
if args.output is None and not args.play_audio and args.output_device is None and not args.list_devices:
53-
parser.error(
54-
f"You have to provide at least one of arguments: `--play-audio`, `--output-device`, `--output`, "
55-
f"`--list-devices`."
56-
)
5754
if args.output is not None:
5855
args.output = args.output.expanduser()
5956
if args.list_devices or args.output_device or args.play_audio:
@@ -65,12 +62,36 @@ def main() -> None:
6562
args = parse_args()
6663
if args.list_devices:
6764
riva.client.audio_io.list_output_devices()
68-
return
65+
6966
auth = riva.client.Auth(args.ssl_cert, args.use_ssl, args.server, args.metadata)
7067
service = riva.client.SpeechSynthesisService(auth)
7168
nchannels = 1
7269
sampwidth = 2
7370
sound_stream, out_f = None, None
71+
72+
if args.list_voices:
73+
config_response = service.stub.GetRivaSynthesisConfig(
74+
riva.client.proto.riva_tts_pb2.RivaSynthesisConfigRequest()
75+
)
76+
tts_models = dict()
77+
for model_config in config_response.model_config:
78+
language_code = model_config.parameters['language_code']
79+
voice_name = model_config.parameters['voice_name']
80+
subvoices = [voice.split(':')[0] for voice in model_config.parameters['subvoices'].split(',')]
81+
full_voice_names = [voice_name + "." + subvoice for subvoice in subvoices]
82+
83+
if language_code in tts_models:
84+
tts_models[language_code]['voices'].extend(full_voice_names)
85+
else:
86+
tts_models[language_code] = {"voices": full_voice_names}
87+
88+
tts_models = dict(sorted(tts_models.items()))
89+
print(json.dumps(tts_models, indent=4))
90+
91+
if not args.text:
92+
print("No input text provided")
93+
return
94+
7495
try:
7596
if args.output_device is not None or args.play_audio:
7697
sound_stream = riva.client.audio_io.SoundCallBack(

0 commit comments

Comments
 (0)