|
| 1 | +//! macOS AVSpeechSynthesizer engine via raw objc calls. |
| 2 | +
|
| 3 | +use crate::engine::{estimate_word_boundaries, TtsEngine}; |
| 4 | +use crate::types::{TtsError, TtsResult, Voice}; |
| 5 | +use objc::runtime::Object; |
| 6 | +use objc::{class, msg_send, sel, sel_impl}; |
| 7 | +use std::ffi::c_void; |
| 8 | +use std::sync::{Arc, Mutex}; |
| 9 | + |
| 10 | +#[derive(Debug)] |
| 11 | +pub struct AvSynthEngine { |
| 12 | + synth: Arc<Mutex<Option<*mut Object>>>, |
| 13 | + voice_id: Mutex<Option<String>>, |
| 14 | +} |
| 15 | + |
| 16 | +unsafe impl Send for AvSynthEngine {} |
| 17 | +unsafe impl Sync for AvSynthEngine {} |
| 18 | + |
| 19 | +impl AvSynthEngine { |
| 20 | + pub fn new() -> Self { |
| 21 | + let synth = unsafe { |
| 22 | + let cls = class!(AVSpeechSynthesizer); |
| 23 | + let obj: *mut Object = msg_send![cls, alloc]; |
| 24 | + let obj: *mut Object = msg_send![obj, init]; |
| 25 | + if obj.is_null() { |
| 26 | + None |
| 27 | + } else { |
| 28 | + Some(obj) |
| 29 | + } |
| 30 | + }; |
| 31 | + AvSynthEngine { |
| 32 | + synth: Arc::new(Mutex::new(synth)), |
| 33 | + voice_id: Mutex::new(None), |
| 34 | + } |
| 35 | + } |
| 36 | +} |
| 37 | + |
| 38 | +unsafe fn to_nsstring(s: &str) -> *mut Object { |
| 39 | + let cls = class!(NSString); |
| 40 | + let bytes = s.as_ptr() as *const c_void; |
| 41 | + let len = s.len(); |
| 42 | + let ns: *mut Object = msg_send![cls, alloc]; |
| 43 | + let ns: *mut Object = msg_send![ns, |
| 44 | + initWithBytes: bytes |
| 45 | + length: len |
| 46 | + encoding: 4usize |
| 47 | + ]; |
| 48 | + ns |
| 49 | +} |
| 50 | + |
| 51 | +unsafe fn from_nsstring(ns: *mut Object) -> String { |
| 52 | + if ns.is_null() { |
| 53 | + return String::new(); |
| 54 | + } |
| 55 | + let len: usize = msg_send![ns, lengthOfBytesUsingEncoding: 4usize]; |
| 56 | + if len == 0 { |
| 57 | + return String::new(); |
| 58 | + } |
| 59 | + let mut buf = vec![0u8; len]; |
| 60 | + let _: usize = msg_send![ns, |
| 61 | + getBytes: buf.as_mut_ptr() |
| 62 | + maxLength: len |
| 63 | + encoding: 4usize |
| 64 | + options: 1usize |
| 65 | + range: (0usize, len) |
| 66 | + remainingRange: std::ptr::null::<(usize, usize)>() |
| 67 | + ]; |
| 68 | + String::from_utf8_lossy(&buf).into_owned() |
| 69 | +} |
| 70 | + |
| 71 | +fn rate_to_avsynth(rate: f32) -> f32 { |
| 72 | + rate.clamp(0.1, 10.0) |
| 73 | +} |
| 74 | + |
| 75 | +fn pitch_to_avsynth(pitch: f32) -> f32 { |
| 76 | + pitch.clamp(0.5, 2.0) |
| 77 | +} |
| 78 | + |
| 79 | +fn volume_to_avsynth(volume: f32) -> f32 { |
| 80 | + volume.clamp(0.0, 1.0) |
| 81 | +} |
| 82 | + |
| 83 | +impl TtsEngine for AvSynthEngine { |
| 84 | + fn speak( |
| 85 | + &self, |
| 86 | + text: &str, |
| 87 | + voice: Option<&str>, |
| 88 | + rate: f32, |
| 89 | + pitch: f32, |
| 90 | + volume: f32, |
| 91 | + _on_audio: Option<crate::engine::OnAudioCallback>, |
| 92 | + mut on_boundary: Option<crate::engine::OnBoundaryCallback>, |
| 93 | + ) -> TtsResult<()> { |
| 94 | + let guard = self.synth.lock().unwrap(); |
| 95 | + let synth = guard |
| 96 | + .ok_or_else(|| TtsError("AVSpeechSynthesizer not initialized".into()))?; |
| 97 | + |
| 98 | + unsafe { |
| 99 | + let ns_text = to_nsstring(text); |
| 100 | + let utterance_cls = class!(AVSpeechUtterance); |
| 101 | + let u: *mut Object = msg_send![utterance_cls, alloc]; |
| 102 | + let u: *mut Object = msg_send![u, initWithString: ns_text]; |
| 103 | + |
| 104 | + if !u.is_null() { |
| 105 | + let _: () = msg_send![u, setRate: rate_to_avsynth(rate)]; |
| 106 | + let _: () = msg_send![u, setPitchMultiplier: pitch_to_avsynth(pitch)]; |
| 107 | + let _: () = msg_send![u, setVolume: volume_to_avsynth(volume)]; |
| 108 | + |
| 109 | + let voice_to_use = voice |
| 110 | + .map(|v| v.to_string()) |
| 111 | + .or_else(|| self.voice_id.lock().unwrap().clone()); |
| 112 | + |
| 113 | + if let Some(ref vid) = voice_to_use { |
| 114 | + let ns_vid = to_nsstring(vid); |
| 115 | + let voice_cls = class!(AVSpeechSynthesisVoice); |
| 116 | + let av_voice: *mut Object = msg_send![voice_cls, voiceWithIdentifier: ns_vid]; |
| 117 | + if !av_voice.is_null() { |
| 118 | + let _: () = msg_send![u, setVoice: av_voice]; |
| 119 | + } |
| 120 | + } |
| 121 | + |
| 122 | + let _: () = msg_send![synth, speakUtterance: u]; |
| 123 | + let _: () = msg_send![u, release]; |
| 124 | + } |
| 125 | + let _: () = msg_send![ns_text, release]; |
| 126 | + } |
| 127 | + |
| 128 | + if let Some(cb) = on_boundary.as_mut() { |
| 129 | + let estimated = estimate_word_boundaries(text); |
| 130 | + for b in &estimated { |
| 131 | + #[allow(clippy::cast_precision_loss)] |
| 132 | + let start = b.offset as f32 / 1000.0; |
| 133 | + #[allow(clippy::cast_precision_loss)] |
| 134 | + let end = (b.offset + b.duration) as f32 / 1000.0; |
| 135 | + cb(&b.text, start, end); |
| 136 | + } |
| 137 | + } |
| 138 | + |
| 139 | + Ok(()) |
| 140 | + } |
| 141 | + |
| 142 | + fn speak_sync( |
| 143 | + &self, |
| 144 | + text: &str, |
| 145 | + voice: Option<&str>, |
| 146 | + rate: f32, |
| 147 | + pitch: f32, |
| 148 | + volume: f32, |
| 149 | + on_audio: Option<crate::engine::OnAudioCallback>, |
| 150 | + on_boundary: Option<crate::engine::OnBoundaryCallback>, |
| 151 | + ) -> TtsResult<()> { |
| 152 | + self.speak(text, voice, rate, pitch, volume, on_audio, on_boundary) |
| 153 | + } |
| 154 | + |
| 155 | + fn stop(&self) -> TtsResult<()> { |
| 156 | + let guard = self.synth.lock().unwrap(); |
| 157 | + if let Some(synth) = *guard { |
| 158 | + unsafe { |
| 159 | + let _: () = msg_send![synth, stopSpeakingAtBoundary: 0i32]; |
| 160 | + } |
| 161 | + } |
| 162 | + Ok(()) |
| 163 | + } |
| 164 | + |
| 165 | + fn pause(&self) -> TtsResult<()> { |
| 166 | + let guard = self.synth.lock().unwrap(); |
| 167 | + if let Some(synth) = *guard { |
| 168 | + unsafe { |
| 169 | + let _: () = msg_send![synth, pauseSpeakingAtBoundary: 0i32]; |
| 170 | + } |
| 171 | + } |
| 172 | + Ok(()) |
| 173 | + } |
| 174 | + |
| 175 | + fn resume(&self) -> TtsResult<()> { |
| 176 | + let guard = self.synth.lock().unwrap(); |
| 177 | + if let Some(synth) = *guard { |
| 178 | + unsafe { |
| 179 | + let _: () = msg_send![synth, continueSpeaking]; |
| 180 | + } |
| 181 | + } |
| 182 | + Ok(()) |
| 183 | + } |
| 184 | + |
| 185 | + fn get_voices(&self) -> TtsResult<Vec<Voice>> { |
| 186 | + unsafe { |
| 187 | + let voice_cls = class!(AVSpeechSynthesisVoice); |
| 188 | + let voices: *mut Object = msg_send![voice_cls, speechVoices]; |
| 189 | + if voices.is_null() { |
| 190 | + return Ok(vec![]); |
| 191 | + } |
| 192 | + |
| 193 | + let count: usize = msg_send![voices, count]; |
| 194 | + let mut result = Vec::with_capacity(count); |
| 195 | + |
| 196 | + for i in 0..count { |
| 197 | + let v: *mut Object = msg_send![voices, objectAtIndex: i]; |
| 198 | + if !v.is_null() { |
| 199 | + let id_ptr: *mut Object = msg_send![v, identifier]; |
| 200 | + let name_ptr: *mut Object = msg_send![v, name]; |
| 201 | + let lang_ptr: *mut Object = msg_send![v, language]; |
| 202 | + |
| 203 | + let id = from_nsstring(id_ptr); |
| 204 | + let name = from_nsstring(name_ptr); |
| 205 | + let lang = from_nsstring(lang_ptr); |
| 206 | + |
| 207 | + result.push(Voice { |
| 208 | + id, |
| 209 | + name, |
| 210 | + gender: crate::types::Gender::Unknown, |
| 211 | + provider: "avsynth".to_string(), |
| 212 | + language_codes: vec![crate::types::LanguageCode { |
| 213 | + bcp47: lang.clone(), |
| 214 | + iso639_3: lang.split('-').next().unwrap_or("en").to_string(), |
| 215 | + display: lang, |
| 216 | + }], |
| 217 | + }); |
| 218 | + } |
| 219 | + } |
| 220 | + Ok(result) |
| 221 | + } |
| 222 | + } |
| 223 | + |
| 224 | + fn engine_id(&self) -> &'static str { |
| 225 | + "avsynth" |
| 226 | + } |
| 227 | +} |
| 228 | + |
| 229 | +impl Drop for AvSynthEngine { |
| 230 | + fn drop(&mut self) { |
| 231 | + if let Ok(mut guard) = self.synth.lock() { |
| 232 | + if let Some(ptr) = guard.take() { |
| 233 | + unsafe { |
| 234 | + let _: () = msg_send![ptr, release]; |
| 235 | + } |
| 236 | + } |
| 237 | + } |
| 238 | + } |
| 239 | +} |
0 commit comments