Skip to content

Commit 9d07f7e

Browse files
committed
add tts voice enum, and add tts to were-wolf example
Change-Id: I43aebb346398a85766e78f2f2e8723b1977a07da
1 parent f153fb0 commit 9d07f7e

6 files changed

Lines changed: 555 additions & 3 deletions

File tree

Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
/*
2+
* Qwen3 TTS Flash / Realtime voices enumeration.
3+
*
4+
* This enum lists the officially documented 17 timbres for
5+
* qwen3-tts-flash / qwen3-tts-flash-realtime models.
6+
*/
7+
package io.agentscope.core.model.tts;
8+
9+
import java.util.Locale;
10+
import java.util.Random;
11+
import java.util.concurrent.ThreadLocalRandom;
12+
13+
/**
14+
* Predefined voices for Qwen3 TTS Flash / Realtime models.
15+
*
16+
* <p>The {@code voiceId} values correspond to the {@code voice} parameter
17+
* accepted by qwen3-tts-flash and qwen3-tts-flash-realtime.
18+
*/
19+
public enum Qwen3TTSFlashVoice {
20+
21+
/**
22+
* 芊悦 (Cherry) - 阳光积极、亲切自然小姐姐。
23+
*/
24+
CHERRY("Cherry", "芊悦", Gender.FEMALE,
25+
"阳光积极、亲切自然小姐姐"),
26+
27+
/**
28+
* 晨煦 (Ethan) - 标准普通话,带部分北方口音。阳光、温暖、活力、朝气。
29+
*/
30+
ETHAN("Ethan", "晨煦", Gender.MALE,
31+
"标准普通话,带部分北方口音,阳光温暖、活力十足"),
32+
33+
/**
34+
* 不吃鱼 (Nofish) - 不会翘舌音的设计师。
35+
*/
36+
NOFISH("Nofish", "不吃鱼", Gender.MALE,
37+
"不会翘舌音的设计师"),
38+
39+
/**
40+
* 詹妮弗 (Jennifer) - 品牌级、电影质感般美语女声。
41+
*/
42+
JENNIFER("Jennifer", "詹妮弗", Gender.FEMALE,
43+
"品牌级、电影质感般美语女声"),
44+
45+
/**
46+
* 甜茶 (Ryan) - 节奏拉满,戏感炸裂,真实与张力共舞。
47+
*/
48+
RYAN("Ryan", "甜茶", Gender.MALE,
49+
"节奏拉满、戏感炸裂的男声"),
50+
51+
/**
52+
* 卡捷琳娜 (Katerina) - 御姐音色,韵律回味十足。
53+
*/
54+
KATERINA("Katerina", "卡捷琳娜", Gender.FEMALE,
55+
"御姐音色,韵律回味十足"),
56+
57+
/**
58+
* 墨讲师 (Elias) - 兼具严谨与叙事性的讲师音色。
59+
*/
60+
ELIAS("Elias", "墨讲师", Gender.FEMALE,
61+
"兼具严谨与叙事性的讲师音色"),
62+
63+
/**
64+
* 上海-阿珍 (Jada) - 风风火火的沪上阿姐。
65+
*/
66+
JADA("Jada", "上海-阿珍", Gender.FEMALE,
67+
"风风火火的沪上阿姐"),
68+
69+
/**
70+
* 北京-晓东 (Dylan) - 北京胡同里长大的少年。
71+
*/
72+
DYLAN("Dylan", "北京-晓东", Gender.MALE,
73+
"北京胡同里长大的少年"),
74+
75+
/**
76+
* 四川-晴儿 (Sunny) - 甜到你心里的川妹子。
77+
*/
78+
SUNNY("Sunny", "四川-晴儿", Gender.FEMALE,
79+
"甜到你心里的川妹子"),
80+
81+
/**
82+
* 南京-老李 (li) - 耐心的瑜伽老师。
83+
*/
84+
LI("li", "南京-老李", Gender.MALE,
85+
"耐心的瑜伽老师"),
86+
87+
/**
88+
* 陕西-秦川 (Marcus) - 面宽话短,心实声沉的老陕味道。
89+
*/
90+
MARCUS("Marcus", "陕西-秦川", Gender.MALE,
91+
"面宽话短、心实声沉的老陕味道"),
92+
93+
/**
94+
* 闽南-阿杰 (Roy) - 诙谐直爽、市井活泼的中国台湾哥仔。
95+
*/
96+
ROY("Roy", "闽南-阿杰", Gender.MALE,
97+
"诙谐直爽、市井活泼的台湾哥仔"),
98+
99+
/**
100+
* 天津-李彼得 (Peter) - 天津相声,专业捧人。
101+
*/
102+
PETER("Peter", "天津-李彼得", Gender.MALE,
103+
"天津相声风格的专业捧人"),
104+
105+
/**
106+
* 粤语-阿强 (Rocky) - 幽默风趣的阿强,在线陪聊。
107+
*/
108+
ROCKY("Rocky", "粤语-阿强", Gender.MALE,
109+
"幽默风趣的粤语阿强"),
110+
111+
/**
112+
* 粤语-阿清 (Kiki) - 甜美的港妹闺蜜。
113+
*/
114+
KIKI("Kiki", "粤语-阿清", Gender.FEMALE,
115+
"甜美的港妹闺蜜"),
116+
117+
/**
118+
* 四川-程川 (Eric) - 一个跳脱市井的四川成都男子。
119+
*/
120+
ERIC("Eric", "四川-程川", Gender.MALE,
121+
"跳脱市井的四川成都男子");
122+
123+
private final String voiceId;
124+
private final String displayName;
125+
private final Gender gender;
126+
private final String description;
127+
128+
Qwen3TTSFlashVoice(String voiceId, String displayName, Gender gender, String description) {
129+
this.voiceId = voiceId;
130+
this.displayName = displayName;
131+
this.gender = gender;
132+
this.description = description;
133+
}
134+
135+
/**
136+
* Voice id to use as the {@code voice} parameter in DashScope TTS requests.
137+
*/
138+
public String getVoiceId() {
139+
return voiceId;
140+
}
141+
142+
/**
143+
* Human friendly display name (typically Chinese).
144+
*/
145+
public String getDisplayName() {
146+
return displayName;
147+
}
148+
149+
/**
150+
* Gender of this voice (for informational / filtering purposes).
151+
*/
152+
public Gender getGender() {
153+
return gender;
154+
}
155+
156+
/**
157+
* Short description of the voice characteristics.
158+
*/
159+
public String getDescription() {
160+
return description;
161+
}
162+
163+
/**
164+
* Find a voice enum by its voiceId (case-insensitive).
165+
*
166+
* @param voiceId the voice id string, e.g. "Cherry"
167+
* @return matching enum value, or {@code null} if not found
168+
*/
169+
public static Qwen3TTSFlashVoice fromVoiceId(String voiceId) {
170+
if (voiceId == null || voiceId.isEmpty()) {
171+
return null;
172+
}
173+
String normalized = voiceId.toLowerCase(Locale.ROOT);
174+
for (Qwen3TTSFlashVoice v : values()) {
175+
if (v.voiceId.toLowerCase(Locale.ROOT).equals(normalized)) {
176+
return v;
177+
}
178+
}
179+
return null;
180+
}
181+
182+
/**
183+
* Pick a random voice using {@link ThreadLocalRandom}.
184+
*/
185+
public static Qwen3TTSFlashVoice random() {
186+
return random(ThreadLocalRandom.current());
187+
}
188+
189+
/**
190+
* Pick a random voice using the provided {@link Random} instance.
191+
*/
192+
public static Qwen3TTSFlashVoice random(Random random) {
193+
Qwen3TTSFlashVoice[] all = values();
194+
if (all.length == 0) {
195+
throw new IllegalStateException("No Qwen3TTSFlashVoice defined");
196+
}
197+
int idx = random.nextInt(all.length);
198+
return all[idx];
199+
}
200+
201+
/** Simple gender enum for voices. */
202+
public enum Gender {
203+
MALE,
204+
FEMALE
205+
}
206+
}

agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/GameEvent.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,4 +166,16 @@ public static GameEvent userInputReceived(String inputType, String content) {
166166
GameEventType.USER_INPUT_RECEIVED,
167167
Map.of("inputType", inputType, "content", content));
168168
}
169+
170+
/**
171+
* Create an audio chunk event for TTS.
172+
*
173+
* @param playerName The name of the player speaking
174+
* @param audioBase64 Base64 encoded audio data
175+
* @return The event
176+
*/
177+
public static GameEvent audioChunk(String playerName, String audioBase64) {
178+
return new GameEvent(
179+
GameEventType.AUDIO_CHUNK, Map.of("player", playerName, "audio", audioBase64));
180+
}
169181
}

agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/GameEventEmitter.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,19 @@ public void emitUserInputReceived(String inputType, String content) {
363363
playerSink.tryEmitNext(event);
364364
}
365365

366+
/**
367+
* Emit an audio chunk for TTS.
368+
* Audio is always public (everyone can hear day discussion).
369+
*
370+
* @param playerName The name of the player speaking
371+
* @param audioBase64 Base64 encoded audio data
372+
*/
373+
public void emitAudioChunk(String playerName, String audioBase64) {
374+
GameEvent event = GameEvent.audioChunk(playerName, audioBase64);
375+
godViewHistory.add(event);
376+
playerSink.tryEmitNext(event);
377+
}
378+
366379
/**
367380
* Get the player event stream as a Flux.
368381
* This stream contains events visible to the human player based on their role.

agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/GameEventType.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,5 +59,8 @@ public enum GameEventType {
5959
WAIT_USER_INPUT,
6060

6161
/** User input received confirmation. */
62-
USER_INPUT_RECEIVED
62+
USER_INPUT_RECEIVED,
63+
64+
/** Audio chunk for TTS (text-to-speech). */
65+
AUDIO_CHUNK
6366
}

0 commit comments

Comments
 (0)