Skip to content

Commit 20b55eb

Browse files
authored
TTS Speech rework (scp-fs2open#7357)
* add imgui speech options * adapt existing windows sapi speech implementation * adapt existing mac speech integration * add speech linux stubs * add speech support in linux * Add array checks * Use dlopen for speech-dispatcher * corrrect lib name * missing includes and static cast * do not change mac file type * fix clang tidy warnings 1 * set tts rate * set localization ids * fix clang tidy warnings 2 * correct symbol name * Remove voice cache and fix win enumerate_voices overriding voice selection * fix mac rate Done by notimaginative * requested changes * re-add voice cache for linux * Open connection for linux get flags * fix missing } * change voice option combobox to std::pair * delete duplicated voice id sanitizer on windows set voice * Use pairs for speech_enumerate_voices() and adapt linux speech * use reference * actually free vector memory * Clear voice cache when ingame options closes * Use extern to call fsspeech_options_cleanup() * update localization ids * Fixing some linux and window speech oversights * Apply volume and rate after a voice change, correct voice, volume and rate apply order
1 parent 6ddc6c5 commit 20b55eb

13 files changed

Lines changed: 863 additions & 440 deletions

File tree

CMakeLists.txt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,11 @@ IF(RESET_INSTALL_PREFIX)
7474
ENDIF(NOT $ENV{FS2PATH} STREQUAL "")
7575
ENDIF(RESET_INSTALL_PREFIX)
7676

77-
IF(WIN32 OR APPLE)
77+
IF(WIN32 OR APPLE OR CMAKE_SYSTEM_NAME STREQUAL "Linux")
7878
OPTION(FSO_USE_SPEECH "Use text-to-speach libraries" ON)
79-
ENDIF(WIN32 OR APPLE)
79+
ELSE()
80+
OPTION(FSO_USE_SPEECH "Use text-to-speach libraries" OFF)
81+
ENDIF()
8082

8183
IF (WIN32)
8284
OPTION(FSO_USE_VOICEREC "Enable voice recognition support" ON)
@@ -227,9 +229,7 @@ include(package)
227229
include(doxygen)
228230

229231
# Print used options to log
230-
IF(WIN32 OR APPLE)
231-
message(STATUS "Using text to speech: ${FSO_USE_SPEECH}")
232-
ENDIF()
232+
message(STATUS "Using text to speech: ${FSO_USE_SPEECH}")
233233
IF (WIN32)
234234
message(STATUS "Using voice recogition: ${FSO_USE_VOICEREC}")
235235
message(STATUS "Building FRED2: ${FSO_BUILD_FRED2}")

cmake/finder/FindSpeech.cmake

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ if (WIN32)
1111
endif()
1212
elseif(APPLE)
1313
# it should just work
14+
elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
15+
# uses speech-dispatcher with dlopen
1416
else()
1517
message(SEND_ERROR "Text to Speech is not supported on this platform!")
1618
endif()

code/cmdline/cmdline.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1414,7 +1414,7 @@ static json_t* json_get_v1() {
14141414
auto voices = speech_enumerate_voices();
14151415

14161416
for (auto& voice : voices) {
1417-
json_array_append_new(voices_array, json_string(voice.c_str()));
1417+
json_array_append_new(voices_array, json_string(voice.second.c_str()));
14181418
}
14191419

14201420
json_object_set_new(root, "voices", voices_array);

code/localization/localize.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ bool *Lcl_unexpected_tstring_check = nullptr;
6464
// NOTE: with map storage of XSTR strings, the indexes no longer need to be contiguous,
6565
// but internal strings should still increment XSTR_SIZE to avoid collisions.
6666
// retail XSTR_SIZE = 1570
67-
// #define XSTR_SIZE 1918 // This is the next available ID
67+
// #define XSTR_SIZE 1932 // This is the next available ID
6868

6969
// struct to allow for strings.tbl-determined x offset
7070
// offset is 0 for english, by default

code/options/Ingame_Options.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,11 @@ void ingame_options_init()
9898
}
9999
}
100100

101+
extern void fsspeech_options_cleanup();
102+
101103
void ingame_options_close()
102104
{
105+
fsspeech_options_cleanup();
103106
OCGR.reset();
104107
}
105108

code/sound/fsspeech.cpp

Lines changed: 241 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
#include "osapi/osregistry.h"
1111
#include "sound/fsspeech.h"
1212
#include "sound/speech.h"
13-
13+
#include "options/Option.h"
1414

1515
extern int Cmdline_freespace_no_sound;
1616

@@ -30,6 +30,204 @@ const char *FSSpeech_play_id[FSSPEECH_FROM_MAX] =
3030
char Speech_buffer[MAX_SPEECH_BUFFER_LEN] = "";
3131
size_t Speech_buffer_len;
3232

33+
static bool ttsrate_change(float new_val, bool initial)
34+
{
35+
if (initial) {
36+
return false;
37+
}
38+
speech_set_rate(new_val);
39+
return true;
40+
}
41+
42+
static bool ttsingame_change(bool new_val, bool initial)
43+
{
44+
if (initial) {
45+
return false;
46+
}
47+
FSSpeech_play_from[FSSPEECH_FROM_INGAME] = new_val;
48+
return true;
49+
}
50+
51+
static bool ttsmulti_change(bool new_val, bool initial)
52+
{
53+
if (initial) {
54+
return false;
55+
}
56+
FSSpeech_play_from[FSSPEECH_FROM_MULTI] = new_val;
57+
return true;
58+
}
59+
60+
static bool ttsbriefing_change(bool new_val, bool initial)
61+
{
62+
if (initial) {
63+
return false;
64+
}
65+
FSSpeech_play_from[FSSPEECH_FROM_BRIEFING] = new_val;
66+
return true;
67+
}
68+
69+
static bool ttstechroom_change(bool new_val, bool initial)
70+
{
71+
if (initial) {
72+
return false;
73+
}
74+
FSSpeech_play_from[FSSPEECH_FROM_TECHROOM] = new_val;
75+
return true;
76+
}
77+
78+
static bool ttsvolume_change(float new_val, bool initial)
79+
{
80+
if (initial) {
81+
return false;
82+
}
83+
speech_set_volume((unsigned short) new_val);
84+
return true;
85+
}
86+
87+
static std::pair<int, SCP_string> ttsvoice_deserializer(const json_t* el)
88+
{
89+
int id;
90+
const char* name = nullptr;
91+
92+
json_error_t err;
93+
if (json_unpack_ex((json_t*)el, &err, 0, "{s:i, s:s}", "id", &id, "name", &name) != 0) {
94+
throw json_exception(err);
95+
}
96+
97+
return std::make_pair(id, name);
98+
}
99+
100+
static json_t* ttsvoice_serializer(const std::pair<int, SCP_string>& value)
101+
{
102+
return json_pack("{s:i, s:s}", "id", value.first, "name", value.second.c_str());
103+
}
104+
105+
static SCP_vector<std::pair<int, SCP_string>> voice_list_cache;
106+
107+
static SCP_vector<std::pair<int, SCP_string>> ttsvoice_enumerator()
108+
{
109+
if(voice_list_cache.empty()) {
110+
auto voices = speech_enumerate_voices();
111+
112+
if (voices.empty()) {
113+
voices.emplace_back(std::make_pair(0, "No voices loaded"));
114+
}
115+
voice_list_cache = voices;
116+
return voices;
117+
}
118+
else {
119+
return voice_list_cache;
120+
}
121+
}
122+
123+
static SCP_string ttsvoice_display(const std::pair<int, SCP_string>& vi)
124+
{
125+
return vi.second;
126+
}
127+
128+
static auto SpeechVolumeOption = options::OptionBuilder<float>("Speech.Volume",
129+
std::pair<const char*, int>{"TTS Volume", 1920},
130+
std::pair<const char*, int>{"Volume used for playing TTS speech", 1921})
131+
.category(std::make_pair("Audio", 1826))
132+
.range(0.0f, 100.0f)
133+
.default_val(100.0f)
134+
.change_listener(ttsvolume_change)
135+
.importance(2)
136+
.finish();
137+
138+
static auto SpeechRateOption = options::OptionBuilder<float>("Speech.Rate",
139+
std::pair<const char*, int>{"TTS Rate", 1922},
140+
std::pair<const char*, int>{"Speed of the TTS voice (100 = normal)", 1923})
141+
.category(std::make_pair("Audio", 1826))
142+
.range(50.0f, 150.0f)
143+
.default_val(100.0f)
144+
.change_listener(ttsrate_change)
145+
.importance(1)
146+
.finish();
147+
148+
static bool ttsvoice_change(const std::pair<int, SCP_string>& new_voice, bool initial)
149+
{
150+
if (initial) {
151+
return false;
152+
}
153+
speech_set_voice(new_voice.first);
154+
// Re-apply volume and rate, it is needed on Mac and maybe on other OS as well
155+
speech_set_volume((unsigned short)SpeechVolumeOption->getValue());
156+
speech_set_rate(SpeechRateOption->getValue());
157+
return true;
158+
}
159+
160+
static auto SpeechVoiceOption = options::OptionBuilder<std::pair<int, SCP_string>>("Speech.Voice",
161+
std::pair<const char*, int>{"TTS Voice", 1918},
162+
std::pair<const char*, int>{"The voice used to read text", 1919})
163+
.category(std::make_pair("Audio", 1826))
164+
.level(options::ExpertLevel::Beginner)
165+
.default_func([]() { return ttsvoice_enumerator().front(); }) // always guarantees at least 1 value
166+
.enumerator(ttsvoice_enumerator)
167+
.display(ttsvoice_display)
168+
.serializer(ttsvoice_serializer)
169+
.deserializer(ttsvoice_deserializer)
170+
.flags({ options::OptionFlags::ForceMultiValueSelection })
171+
.change_listener(ttsvoice_change)
172+
.importance(3)
173+
.finish();
174+
175+
static auto SpeechBriefingOption = options::OptionBuilder<bool>("Speech.Briefing",
176+
std::pair<const char*, int>{"TTS in briefings", 1924},
177+
std::pair<const char*, int>{"Enable or disable TTS in briefings", 1925})
178+
.category(std::make_pair("Audio", 1826))
179+
.level(options::ExpertLevel::Beginner)
180+
.change_listener(ttsbriefing_change)
181+
.default_val(true)
182+
.importance(0)
183+
.finish();
184+
185+
static auto SpeechTechroomOption = options::OptionBuilder<bool>("Speech.Techroom",
186+
std::pair<const char*, int>{"TTS in techroom", 1926},
187+
std::pair<const char*, int>{"Enable or disable TTS in techroom", 1927})
188+
.category(std::make_pair("Audio", 1826))
189+
.level(options::ExpertLevel::Beginner)
190+
.change_listener(ttstechroom_change)
191+
.default_val(true)
192+
.importance(0)
193+
.finish();
194+
195+
static auto SpeechIngameOption = options::OptionBuilder<bool>("Speech.Ingame",
196+
std::pair<const char*, int>{"TTS in-game", 1928},
197+
std::pair<const char*, int>{"Enable or disable TTS in-game", 1929})
198+
.category(std::make_pair("Audio", 1826))
199+
.level(options::ExpertLevel::Beginner)
200+
.change_listener(ttsingame_change)
201+
.default_val(true)
202+
.importance(0)
203+
.finish();
204+
205+
static auto SpeechMultiOption = options::OptionBuilder<bool>("Speech.Multi",
206+
std::pair<const char*, int>{"TTS in multiplayer", 1930},
207+
std::pair<const char*, int>{"Enable or disable TTS in multiplayer", 1931})
208+
.category(std::make_pair("Audio", 1826))
209+
.level(options::ExpertLevel::Beginner)
210+
.change_listener(ttsmulti_change)
211+
.default_val(true)
212+
.importance(0)
213+
.finish();
214+
215+
void sanitize_text(const char* input, SCP_string& output) {
216+
output.clear();
217+
bool saw_dollar = false;
218+
for (auto ch : unicode::codepoint_range(input)) {
219+
if (ch == UNICODE_CHAR('$')) {
220+
saw_dollar = true;
221+
continue;
222+
}
223+
else if (saw_dollar) {
224+
saw_dollar = false;
225+
continue;
226+
}
227+
unicode::encode(ch, std::back_inserter(output));
228+
}
229+
}
230+
33231
bool fsspeech_init()
34232
{
35233
if (speech_inited) {
@@ -45,18 +243,34 @@ bool fsspeech_init()
45243
return false;
46244
}
47245

48-
// Get the settings from the registry
49-
for(int i = 0; i < FSSPEECH_FROM_MAX; i++) {
50-
FSSpeech_play_from[i] =
51-
os_config_read_uint(NULL, FSSpeech_play_id[i], 0) ? true : false;
52-
nprintf(("Speech", "Play %s: %s\n", FSSpeech_play_id[i], FSSpeech_play_from[i] ? "true" : "false"));
246+
if (Using_in_game_options)
247+
{
248+
FSSpeech_play_from[FSSPEECH_FROM_TECHROOM] = SpeechTechroomOption->getValue();
249+
FSSpeech_play_from[FSSPEECH_FROM_BRIEFING] = SpeechBriefingOption->getValue();
250+
FSSpeech_play_from[FSSPEECH_FROM_INGAME] = SpeechIngameOption->getValue();
251+
FSSpeech_play_from[FSSPEECH_FROM_MULTI] = SpeechMultiOption->getValue();
252+
// The apply order must be Voice->Volume/Rate to avoid issues on Mac.
253+
speech_set_voice(SpeechVoiceOption->getValue().first);
254+
speech_set_volume((unsigned short)SpeechVolumeOption->getValue());
255+
speech_set_rate(SpeechRateOption->getValue());
256+
}
257+
else
258+
{
259+
// Get the settings from the registry
260+
for (int i = 0; i < FSSPEECH_FROM_MAX; i++) {
261+
FSSpeech_play_from[i] = static_cast<bool>(os_config_read_uint(nullptr, FSSpeech_play_id[i], 0));
262+
nprintf(("Speech", "Play %s: %s\n", FSSpeech_play_id[i], FSSpeech_play_from[i] ? "true" : "false"));
263+
}
264+
265+
int voice = os_config_read_uint(nullptr, "SpeechVoice", 0);
266+
speech_set_voice(voice);
267+
268+
int volume = os_config_read_uint(nullptr, "SpeechVolume", 100);
269+
speech_set_volume((unsigned short)volume);
270+
271+
int rate = os_config_read_uint(nullptr, "SpeechRate", 100);
272+
speech_set_rate(static_cast<float>(rate));
53273
}
54-
55-
int volume = os_config_read_uint(NULL, "SpeechVolume", 100);
56-
speech_set_volume((unsigned short) volume);
57-
58-
int voice = os_config_read_uint(NULL, "SpeechVoice", 0);
59-
speech_set_voice(voice);
60274

61275
speech_inited = 1;
62276

@@ -75,6 +289,11 @@ void fsspeech_deinit()
75289

76290
void fsspeech_play(int type, const char *text)
77291
{
292+
if (text == nullptr) {
293+
nprintf(("Speech", "Not playing speech because passed text is null.\n"));
294+
return;
295+
}
296+
78297
if (!speech_inited) {
79298
nprintf(("Speech", "Aborting fsspech_play because speech_inited is false.\n"));
80299
return;
@@ -90,7 +309,10 @@ void fsspeech_play(int type, const char *text)
90309
return;
91310
}
92311

93-
speech_play(text);
312+
SCP_string sanitized_string;
313+
sanitize_text(text, sanitized_string);
314+
315+
speech_play(sanitized_string);
94316
}
95317

96318
void fsspeech_stop()
@@ -157,3 +379,9 @@ bool fsspeech_playing()
157379

158380
return speech_is_speaking();
159381
}
382+
383+
void fsspeech_options_cleanup()
384+
{
385+
voice_list_cache.clear();
386+
voice_list_cache.shrink_to_fit();
387+
}

code/sound/fsspeech.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,4 +31,7 @@ void fsspeech_play_buffer(int type);
3131
bool fsspeech_play_from(int type);
3232
bool fsspeech_playing();
3333

34+
// Cleanup the voice cache after the options menu is closed
35+
void fsspeech_options_cleanup();
36+
3437
#endif // header define

0 commit comments

Comments
 (0)