Skip to content

Commit 198da7f

Browse files
authored
feat: add tts voice enum, and add tts to were-wolf example (#749)
Change-Id: I43aebb346398a85766e78f2f2e8723b1977a07da ## AgentScope-Java Version 1.0.9-SNAPSHOT ## Description to #748 Add TTS (Text-to-Speech) to the ReActAgent so it can speak responses aloud during player discussions. ## Checklist Please check the following items before code is ready to be reviewed. - [ ] Code has been formatted with `mvn spotless:apply` - [ ] All tests are passing (`mvn test`) - [ ] Javadoc comments are complete and follow project conventions - [ ] Related documentation has been updated (e.g. links, examples, etc.) - [ ] Code is ready for review
1 parent a7eb5de commit 198da7f

7 files changed

Lines changed: 752 additions & 3 deletions

File tree

Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,238 @@
1+
/*
2+
* Copyright 2024-2026 the original author or authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package io.agentscope.core.model.tts;
17+
18+
import java.util.Locale;
19+
import java.util.Random;
20+
import java.util.concurrent.ThreadLocalRandom;
21+
22+
/**
23+
* Predefined voices for Qwen3 TTS Flash / Realtime models.
24+
*
25+
* <p>The {@code voiceId} values correspond to the {@code voice} parameter
26+
* accepted by qwen3-tts-flash and qwen3-tts-flash-realtime.
27+
*/
28+
public enum Qwen3TTSFlashVoice {
29+
30+
/**
31+
* 芊悦 (Cherry) - A sunny, positive, friendly, and natural young woman.
32+
*/
33+
CHERRY("Cherry", "芊悦", Gender.FEMALE, "A sunny, positive, friendly, and natural young woman"),
34+
35+
/**
36+
* 晨煦 (Ethan) - A bright, warm, energetic, and vibrant male voice with a standard Mandarin pronunciation and a slight northern accent.
37+
*/
38+
ETHAN(
39+
"Ethan",
40+
"晨煦",
41+
Gender.MALE,
42+
"A bright, warm, energetic, and vibrant male voice with a standard Mandarin"
43+
+ " pronunciation and a slight northern accent"),
44+
45+
/**
46+
* 不吃鱼 (Nofish) - A male designer who cannot pronounce retroflex sounds.
47+
*/
48+
NOFISH("Nofish", "不吃鱼", Gender.MALE, "A male designer who cannot pronounce retroflex sounds"),
49+
50+
/**
51+
* 詹妮弗 (Jennifer) - A premium, cinematic American English female voice.
52+
*/
53+
JENNIFER(
54+
"Jennifer", "詹妮弗", Gender.FEMALE, "A premium, cinematic American English female voice"),
55+
56+
/**
57+
* 甜茶 (Ryan) - A rhythmic and dramatic voice with a sense of realism and tension.
58+
*/
59+
RYAN(
60+
"Ryan",
61+
"甜茶",
62+
Gender.MALE,
63+
"A rhythmic and dramatic voice with a sense of realism and tension"),
64+
65+
/**
66+
* 卡捷琳娜 (Katerina) - A mature female voice with a rich rhythm and lingering resonance.
67+
*/
68+
KATERINA(
69+
"Katerina",
70+
"卡捷琳娜",
71+
Gender.FEMALE,
72+
"A mature female voice with a rich rhythm and lingering resonance"),
73+
74+
/**
75+
* 墨讲师 (Elias) - A voice that maintains academic rigor while using storytelling techniques to transform complex knowledge into digestible cognitive modules.
76+
*/
77+
ELIAS(
78+
"Elias",
79+
"墨讲师",
80+
Gender.MALE,
81+
"A voice that maintains academic rigor while using storytelling techniques to transform"
82+
+ " complex knowledge into digestible cognitive modules"),
83+
84+
/**
85+
* 上海-阿珍 (Jada) - An energetic woman from Shanghai.
86+
*/
87+
JADA("Jada", "上海-阿珍", Gender.FEMALE, "An energetic woman from Shanghai"),
88+
89+
/**
90+
* 北京-晓东 (Dylan) - A teenage boy who grew up in the hutongs of Beijing.
91+
*/
92+
DYLAN("Dylan", "北京-晓东", Gender.MALE, "A teenage boy who grew up in the hutongs of Beijing"),
93+
94+
/**
95+
* 四川-晴儿 (Sunny) - The voice of a Sichuan girl whose sweetness melts your heart.
96+
*/
97+
SUNNY(
98+
"Sunny",
99+
"四川-晴儿",
100+
Gender.FEMALE,
101+
"The voice of a Sichuan girl whose sweetness melts your heart"),
102+
103+
/**
104+
* 南京-老李 (li) - Patient male yoga instructor.
105+
*/
106+
LI("li", "南京-老李", Gender.MALE, "Patient male yoga instructor"),
107+
108+
/**
109+
* 陕西-秦川 (Marcus) - A voice that is broad-faced and brief-spoken, sincere-hearted and deep-voiced—the authentic flavor of Shaanxi.
110+
*/
111+
MARCUS(
112+
"Marcus",
113+
"陕西-秦川",
114+
Gender.MALE,
115+
"A voice that is broad-faced and brief-spoken, sincere-hearted and deep-voiced—the"
116+
+ " authentic flavor of Shaanxi"),
117+
118+
/**
119+
* 闽南-阿杰 (Roy) - The voice of a humorous, straightforward, and lively young Taiwanese man.
120+
*/
121+
ROY(
122+
"Roy",
123+
"闽南-阿杰",
124+
Gender.MALE,
125+
"The voice of a humorous, straightforward, and lively young Taiwanese man"),
126+
127+
/**
128+
* 天津-李彼得 (Peter) - The voice of a professional straight man in Tianjin crosstalk.
129+
*/
130+
PETER(
131+
"Peter",
132+
"天津-李彼得",
133+
Gender.MALE,
134+
"The voice of a professional straight man in Tianjin crosstalk"),
135+
136+
/**
137+
* 粤语-阿强 (Rocky) - The voice of the humorous and witty Rocky, here for online chatting.
138+
*/
139+
ROCKY(
140+
"Rocky",
141+
"粤语-阿强",
142+
Gender.MALE,
143+
"The voice of the humorous and witty Rocky, here for online chatting"),
144+
145+
/**
146+
* 粤语-阿清 (Kiki) - A sweet female companion from Hong Kong.
147+
*/
148+
KIKI("Kiki", "粤语-阿清", Gender.FEMALE, "A sweet female companion from Hong Kong"),
149+
150+
/**
151+
* 四川-程川 (Eric) - An unconventional man from Chengdu, Sichuan.
152+
*/
153+
ERIC("Eric", "四川-程川", Gender.MALE, "An unconventional man from Chengdu, Sichuan");
154+
155+
private final String voiceId;
156+
private final String displayName;
157+
private final Gender gender;
158+
private final String description;
159+
160+
Qwen3TTSFlashVoice(String voiceId, String displayName, Gender gender, String description) {
161+
this.voiceId = voiceId;
162+
this.displayName = displayName;
163+
this.gender = gender;
164+
this.description = description;
165+
}
166+
167+
/**
168+
* Voice id to use as the {@code voice} parameter in DashScope TTS requests.
169+
*/
170+
public String getVoiceId() {
171+
return voiceId;
172+
}
173+
174+
/**
175+
* Human friendly display name (typically Chinese).
176+
*/
177+
public String getDisplayName() {
178+
return displayName;
179+
}
180+
181+
/**
182+
* Gender of this voice (for informational / filtering purposes).
183+
*/
184+
public Gender getGender() {
185+
return gender;
186+
}
187+
188+
/**
189+
* Short description of the voice characteristics.
190+
*/
191+
public String getDescription() {
192+
return description;
193+
}
194+
195+
/**
196+
* Find a voice enum by its voiceId (case-insensitive).
197+
*
198+
* @param voiceId the voice id string, e.g. "Cherry"
199+
* @return matching enum value, or {@code null} if not found
200+
*/
201+
public static Qwen3TTSFlashVoice fromVoiceId(String voiceId) {
202+
if (voiceId == null || voiceId.isEmpty()) {
203+
return null;
204+
}
205+
String normalized = voiceId.toLowerCase(Locale.ROOT);
206+
for (Qwen3TTSFlashVoice v : values()) {
207+
if (v.voiceId.toLowerCase(Locale.ROOT).equals(normalized)) {
208+
return v;
209+
}
210+
}
211+
return null;
212+
}
213+
214+
/**
215+
* Pick a random voice using {@link ThreadLocalRandom}.
216+
*/
217+
public static Qwen3TTSFlashVoice random() {
218+
return random(ThreadLocalRandom.current());
219+
}
220+
221+
/**
222+
* Pick a random voice using the provided {@link Random} instance.
223+
*/
224+
public static Qwen3TTSFlashVoice random(Random random) {
225+
Qwen3TTSFlashVoice[] all = values();
226+
if (all.length == 0) {
227+
throw new IllegalStateException("No Qwen3TTSFlashVoice defined");
228+
}
229+
int idx = random.nextInt(all.length);
230+
return all[idx];
231+
}
232+
233+
/** Simple gender enum for voices. */
234+
public enum Gender {
235+
MALE,
236+
FEMALE
237+
}
238+
}

0 commit comments

Comments
 (0)