Skip to content

Commit c1f0bbd

Browse files
committed
feat: wire SystemEngine rate/pitch/volume to speech-dispatcher, add pause/resume/synth_to_bytes C API
- SystemEngine now calls set_voice_rate_all/pitch_all/volume_all with proper float-to-SPD scale conversion (0.1-10.0 range → -100 to +100) - SystemEngine.pause()/resume() call speech-dispatcher pause_all/resume_all - New C API: tts_pause(), tts_resume(), tts_synth_to_bytes(), tts_free_bytes() - Updated tts_wrapper.h with new function declarations - Fix unused variable warning in factory.rs
1 parent d1fde5f commit c1f0bbd

4 files changed

Lines changed: 173 additions & 11 deletions

File tree

include/tts_wrapper.h

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
#include <stdint.h>
77
#include <stdbool.h>
8-
98
#ifdef __cplusplus
109
extern "C" {
1110
#endif
@@ -231,8 +230,46 @@ void tts_free_engine_info(struct tts_engine_info *engines, int32_t count);
231230
*/
232231
const char *tts_get_last_error(void);
233232

233+
/**
234+
* Pause in-progress speech.
235+
*
236+
* # Safety
237+
* `ctx` must be valid.
238+
*/
239+
void tts_pause(struct tts_ctx *ctx);
240+
241+
/**
242+
* Resume paused speech.
243+
*
244+
* # Safety
245+
* `ctx` must be valid.
246+
*/
247+
void tts_resume(struct tts_ctx *ctx);
248+
249+
/**
250+
* Synthesize text to audio bytes without playback.
251+
* Writes a heap-allocated buffer to `*out_bytes` and its length to `*out_len`.
252+
* Caller must free with [`tts_free_bytes`].
253+
* Returns 0 on success, -1 on failure.
254+
*
255+
* # Safety
256+
* `ctx` must be valid. `out_bytes` and `out_len` must be non-null.
257+
*/
258+
int32_t tts_synth_to_bytes(struct tts_ctx *ctx,
259+
const char *text,
260+
uint8_t **out_bytes,
261+
uintptr_t *out_len);
262+
263+
/**
264+
* Free a byte buffer returned by [`tts_synth_to_bytes`].
265+
*
266+
* # Safety
267+
* `bytes` must be from `tts_synth_to_bytes` with the matching `len`.
268+
*/
269+
void tts_free_bytes(uint8_t *bytes, uintptr_t len);
270+
234271
#ifdef __cplusplus
235-
}
236-
#endif
272+
} // extern "C"
273+
#endif // __cplusplus
237274

238275
#endif /* TTS_WRAPPER_H */

src/factory.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use crate::system_engine::SystemEngine;
1616
/// `credentials_json` is a JSON object with engine-specific credentials
1717
/// (e.g. `{"apiKey": "..."}`). Pass `""` for engines that don't need credentials.
1818
#[must_use]
19-
pub fn create_engine(engine_id: &str, credentials_json: &str) -> Option<Box<dyn TtsEngine>> {
19+
pub fn create_engine(engine_id: &str, _credentials_json: &str) -> Option<Box<dyn TtsEngine>> {
2020
match engine_id {
2121
#[cfg(feature = "system")]
2222
"system" => Some(Box::new(SystemEngine::new())),

src/lib.rs

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -557,3 +557,99 @@ pub extern "C" fn tts_get_last_error() -> *const c_char {
557557
Err(_) => ptr::null(),
558558
}
559559
}
560+
561+
/// Pause in-progress speech.
562+
///
563+
/// # Safety
564+
/// `ctx` must be valid.
565+
#[no_mangle]
566+
pub extern "C" fn tts_pause(ctx: *mut tts_ctx) {
567+
if ctx.is_null() {
568+
return;
569+
}
570+
let ctx_ref = unsafe { &*ctx };
571+
let engine = ctx_ref.engine.lock().unwrap();
572+
let _ = engine.pause();
573+
}
574+
575+
/// Resume paused speech.
576+
///
577+
/// # Safety
578+
/// `ctx` must be valid.
579+
#[no_mangle]
580+
pub extern "C" fn tts_resume(ctx: *mut tts_ctx) {
581+
if ctx.is_null() {
582+
return;
583+
}
584+
let ctx_ref = unsafe { &*ctx };
585+
let engine = ctx_ref.engine.lock().unwrap();
586+
let _ = engine.resume();
587+
}
588+
589+
/// Synthesize text to audio bytes without playback.
590+
/// Writes a heap-allocated buffer to `*out_bytes` and its length to `*out_len`.
591+
/// Caller must free with [`tts_free_bytes`].
592+
/// Returns 0 on success, -1 on failure.
593+
///
594+
/// # Safety
595+
/// `ctx` must be valid. `out_bytes` and `out_len` must be non-null.
596+
#[no_mangle]
597+
pub extern "C" fn tts_synth_to_bytes(
598+
ctx: *mut tts_ctx,
599+
text: *const c_char,
600+
out_bytes: *mut *mut u8,
601+
out_len: *mut usize,
602+
) -> i32 {
603+
if ctx.is_null() || text.is_null() || out_bytes.is_null() || out_len.is_null() {
604+
return -1;
605+
}
606+
let ctx_ref = unsafe { &*ctx };
607+
let text_str = unsafe { CStr::from_ptr(text) }
608+
.to_string_lossy()
609+
.into_owned();
610+
let voice = ctx_ref.voice_id.lock().unwrap().clone();
611+
let rate = *ctx_ref.rate.lock().unwrap();
612+
let pitch = *ctx_ref.pitch.lock().unwrap();
613+
let volume = *ctx_ref.volume.lock().unwrap();
614+
615+
let engine = ctx_ref.engine.lock().unwrap();
616+
match engine.synth_to_bytes(&text_str, voice.as_deref(), rate, pitch, volume) {
617+
Ok(data) => {
618+
if data.is_empty() {
619+
unsafe {
620+
*out_bytes = ptr::null_mut();
621+
*out_len = 0;
622+
}
623+
return 0;
624+
}
625+
let len = data.len();
626+
let layout = std::alloc::Layout::array::<u8>(len).unwrap();
627+
let ptr = unsafe { std::alloc::alloc(layout) };
628+
unsafe {
629+
ptr::copy_nonoverlapping(data.as_ptr(), ptr, len);
630+
*out_bytes = ptr;
631+
*out_len = len;
632+
}
633+
0
634+
}
635+
Err(e) => {
636+
*ctx_ref.last_error.lock().unwrap() = e.to_string();
637+
-1
638+
}
639+
}
640+
}
641+
642+
/// Free a byte buffer returned by [`tts_synth_to_bytes`].
643+
///
644+
/// # Safety
645+
/// `bytes` must be from `tts_synth_to_bytes` with the matching `len`.
646+
#[no_mangle]
647+
pub extern "C" fn tts_free_bytes(bytes: *mut u8, len: usize) {
648+
if bytes.is_null() || len == 0 {
649+
return;
650+
}
651+
let layout = std::alloc::Layout::array::<u8>(len).unwrap();
652+
unsafe {
653+
std::alloc::dealloc(bytes, layout);
654+
}
655+
}

src/system_engine.rs

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,13 @@
1-
//! System TTS engine via speech-dispatcher (Linux).
2-
31
use crate::engine::{estimate_word_boundaries, TtsEngine};
42
use crate::types::{TtsError, TtsResult, Voice};
53
use std::sync::Mutex;
64

7-
/// TTS engine that uses the system's speech-dispatcher daemon.
85
#[derive(Debug)]
96
pub struct SystemEngine {
107
conn: Mutex<Option<speech_dispatcher::Connection>>,
118
}
129

1310
impl SystemEngine {
14-
/// Create a new system engine, connecting to speech-dispatcher.
1511
pub fn new() -> Self {
1612
let conn = speech_dispatcher::Connection::open(
1713
"rust-tts-wrapper",
@@ -26,14 +22,26 @@ impl SystemEngine {
2622
}
2723
}
2824

25+
fn rate_to_spd(rate: f32) -> i32 {
26+
((rate.clamp(0.1, 10.0) - 1.0) * 100.0).round() as i32
27+
}
28+
29+
fn pitch_to_spd(pitch: f32) -> i32 {
30+
((pitch.clamp(0.1, 10.0) - 1.0) * 100.0).round() as i32
31+
}
32+
33+
fn volume_to_spd(volume: f32) -> i32 {
34+
((volume.clamp(0.0, 2.0) - 1.0) * 100.0).round() as i32
35+
}
36+
2937
impl TtsEngine for SystemEngine {
3038
fn speak(
3139
&self,
3240
text: &str,
3341
voice: Option<&str>,
34-
_rate: f32,
35-
_pitch: f32,
36-
_volume: f32,
42+
rate: f32,
43+
pitch: f32,
44+
volume: f32,
3745
_on_audio: Option<crate::engine::OnAudioCallback>,
3846
mut on_boundary: Option<crate::engine::OnBoundaryCallback>,
3947
) -> TtsResult<()> {
@@ -45,6 +53,11 @@ impl TtsEngine for SystemEngine {
4553
if let Some(v) = voice {
4654
let _ = conn.set_synthesis_voice_all(v);
4755
}
56+
57+
let _ = conn.set_voice_rate_all(rate_to_spd(rate));
58+
let _ = conn.set_voice_pitch_all(pitch_to_spd(pitch));
59+
let _ = conn.set_volume_all(volume_to_spd(volume));
60+
4861
conn.say(speech_dispatcher::Priority::Important, text);
4962

5063
if let Some(cb) = on_boundary.as_mut() {
@@ -83,6 +96,22 @@ impl TtsEngine for SystemEngine {
8396
.map_err(|e| TtsError(format!("Stop failed: {e}")))
8497
}
8598

99+
fn pause(&self) -> TtsResult<()> {
100+
let guard = self.conn.lock().unwrap();
101+
if let Some(conn) = guard.as_ref() {
102+
let _ = conn.pause_all();
103+
}
104+
Ok(())
105+
}
106+
107+
fn resume(&self) -> TtsResult<()> {
108+
let guard = self.conn.lock().unwrap();
109+
if let Some(conn) = guard.as_ref() {
110+
let _ = conn.resume_all();
111+
}
112+
Ok(())
113+
}
114+
86115
fn get_voices(&self) -> TtsResult<Vec<Voice>> {
87116
Ok(vec![])
88117
}

0 commit comments

Comments
 (0)