Skip to content

Commit f05ba5e

Browse files
committed
tts image urls
1 parent d9e72ab commit f05ba5e

5 files changed

Lines changed: 307 additions & 6 deletions

File tree

crates/openfang-kernel/src/kernel.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1056,7 +1056,13 @@ impl OpenFangKernel {
10561056
// Initialize media understanding engine
10571057
let media_engine =
10581058
openfang_runtime::media_understanding::MediaEngine::new(config.media.clone());
1059-
let tts_engine = openfang_runtime::tts::TtsEngine::new(config.tts.clone());
1059+
// Closes #1051: thread MediaConfig URL overrides into the TTS engine
1060+
// so local OpenAI/ElevenLabs-compatible services can be targeted.
1061+
let tts_engine = openfang_runtime::tts::TtsEngine::new(config.tts.clone())
1062+
.with_base_urls(
1063+
config.media.tts_openai_base_url.clone(),
1064+
config.media.tts_elevenlabs_base_url.clone(),
1065+
);
10601066
let mut pairing = crate::pairing::PairingManager::new(config.pairing.clone());
10611067

10621068
// Load paired devices from database and set up persistence callback

crates/openfang-runtime/src/image_gen.rs

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,15 @@ use tracing::warn;
77
/// Generate images via OpenAI's image generation API.
88
///
99
/// Requires OPENAI_API_KEY to be set.
10-
pub async fn generate_image(request: &ImageGenRequest) -> Result<ImageGenResult, String> {
10+
///
11+
/// `base_url_override` (sourced from `MediaConfig.image_gen_base_url`) lets
12+
/// callers redirect the request to a local OpenAI-compatible image service
13+
/// (e.g. Lemonade/Flux, LM Studio). When `None`, the hardcoded
14+
/// `https://api.openai.com/v1/images/generations` endpoint is used. Closes #1051.
15+
pub async fn generate_image(
16+
request: &ImageGenRequest,
17+
base_url_override: Option<&str>,
18+
) -> Result<ImageGenResult, String> {
1119
// Validate request
1220
request.validate()?;
1321

@@ -30,9 +38,19 @@ pub async fn generate_image(request: &ImageGenRequest) -> Result<ImageGenResult,
3038
body["quality"] = serde_json::json!(request.quality);
3139
}
3240

41+
// `image_gen_base_url` (config.media.image_gen_base_url) overrides the
42+
// hardcoded provider URL when set, allowing the same OpenAI-compat JSON
43+
// wire format to be sent to a local image generation service
44+
// (Lemonade/Flux, LM Studio, etc.) instead of the cloud provider. The
45+
// Authorization header is still built from `OPENAI_API_KEY`; local
46+
// services typically accept any non-empty bearer token. Closes #1051.
47+
let url = base_url_override
48+
.map(|base| format!("{}/v1/images/generations", base.trim_end_matches('/')))
49+
.unwrap_or_else(|| "https://api.openai.com/v1/images/generations".to_string());
50+
3351
let client = reqwest::Client::new();
3452
let response = client
35-
.post("https://api.openai.com/v1/images/generations")
53+
.post(&url)
3654
.header("Authorization", format!("Bearer {}", api_key))
3755
.header("Content-Type", "application/json")
3856
.json(&body)
@@ -201,6 +219,38 @@ mod tests {
201219
}
202220
}
203221

222+
/// Closes #1051: when `image_gen_base_url` is set, the URL building
223+
/// logic must use the override (with `/v1/images/generations` appended)
224+
/// and strip any trailing slash from the user-supplied base. When unset,
225+
/// the hardcoded provider URL is used.
226+
#[test]
227+
fn test_image_gen_base_url_override_logic() {
228+
// Helper mirroring the URL construction in `generate_image`.
229+
fn build(base: Option<&str>) -> String {
230+
base.map(|b| format!("{}/v1/images/generations", b.trim_end_matches('/')))
231+
.unwrap_or_else(|| "https://api.openai.com/v1/images/generations".to_string())
232+
}
233+
234+
// Default: hardcoded URL preserved (backward compatibility).
235+
assert_eq!(build(None), "https://api.openai.com/v1/images/generations");
236+
237+
// Override applied.
238+
assert_eq!(
239+
build(Some("http://127.0.0.1:7000")),
240+
"http://127.0.0.1:7000/v1/images/generations"
241+
);
242+
243+
// Trailing slash on the user-supplied base is stripped.
244+
assert_eq!(
245+
build(Some("http://127.0.0.1:7000/")),
246+
"http://127.0.0.1:7000/v1/images/generations"
247+
);
248+
assert_eq!(
249+
build(Some("https://images.example.com/")),
250+
"https://images.example.com/v1/images/generations"
251+
);
252+
}
253+
204254
#[test]
205255
fn test_save_images_creates_dir() {
206256
let dir = tempfile::tempdir().unwrap();

crates/openfang-runtime/src/media_understanding.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,13 @@ impl MediaEngine {
2424
}
2525
}
2626

27+
/// Read-only access to the media configuration. Used by callers that
28+
/// need the URL overrides (e.g. image_gen_base_url for #1051) without
29+
/// taking ownership of the engine.
30+
pub fn config(&self) -> &MediaConfig {
31+
&self.config
32+
}
33+
2734
/// Describe an image using a vision-capable LLM.
2835
/// Auto-cascade: Anthropic -> OpenAI -> Gemini (based on API key availability).
2936
pub async fn describe_image(

crates/openfang-runtime/src/tts.rs

Lines changed: 136 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,38 @@ pub struct TtsResult {
1919
/// Text-to-speech engine.
2020
pub struct TtsEngine {
2121
config: TtsConfig,
22+
/// Optional override for OpenAI TTS base URL. When set, the engine POSTs
23+
/// to `<openai_base_url>/v1/audio/speech` instead of the hardcoded
24+
/// `https://api.openai.com/v1/audio/speech`. Sourced from
25+
/// `MediaConfig.tts_openai_base_url`. Closes #1051.
26+
openai_base_url: Option<String>,
27+
/// Optional override for ElevenLabs TTS base URL. When set, the engine
28+
/// POSTs to `<elevenlabs_base_url>/v1/text-to-speech/{voice_id}` instead
29+
/// of the hardcoded `https://api.elevenlabs.io/...`. Sourced from
30+
/// `MediaConfig.tts_elevenlabs_base_url`. Closes #1051.
31+
elevenlabs_base_url: Option<String>,
2232
}
2333

2434
impl TtsEngine {
2535
pub fn new(config: TtsConfig) -> Self {
26-
Self { config }
36+
Self {
37+
config,
38+
openai_base_url: None,
39+
elevenlabs_base_url: None,
40+
}
41+
}
42+
43+
/// Attach optional base-URL overrides from `MediaConfig`. Use this to
44+
/// route TTS calls at a local OpenAI-compatible service (e.g.
45+
/// Lemonade/Kokoro, LM Studio) or an ElevenLabs proxy. Closes #1051.
46+
pub fn with_base_urls(
47+
mut self,
48+
openai_base_url: Option<String>,
49+
elevenlabs_base_url: Option<String>,
50+
) -> Self {
51+
self.openai_base_url = openai_base_url;
52+
self.elevenlabs_base_url = elevenlabs_base_url;
53+
self
2754
}
2855

2956
/// Detect which TTS provider is available based on environment variables.
@@ -100,9 +127,21 @@ impl TtsEngine {
100127
"speed": self.config.openai.speed,
101128
});
102129

130+
// `tts_openai_base_url` (config.media.tts_openai_base_url) overrides
131+
// the hardcoded provider URL when set, allowing the same OpenAI-compat
132+
// JSON wire format to be sent to a local TTS service (Lemonade/Kokoro,
133+
// LM Studio, etc.) instead of the cloud provider. The Authorization
134+
// header is still built from `OPENAI_API_KEY`; local services typically
135+
// accept any non-empty bearer token. Closes #1051.
136+
let url = self
137+
.openai_base_url
138+
.as_deref()
139+
.map(|base| format!("{}/v1/audio/speech", base.trim_end_matches('/')))
140+
.unwrap_or_else(|| "https://api.openai.com/v1/audio/speech".to_string());
141+
103142
let client = reqwest::Client::new();
104143
let response = client
105-
.post("https://api.openai.com/v1/audio/speech")
144+
.post(&url)
106145
.header("Authorization", format!("Bearer {}", api_key))
107146
.header("Content-Type", "application/json")
108147
.json(&body)
@@ -161,7 +200,17 @@ impl TtsEngine {
161200
std::env::var("ELEVENLABS_API_KEY").map_err(|_| "ELEVENLABS_API_KEY not set")?;
162201

163202
let voice_id = voice_override.unwrap_or(&self.config.elevenlabs.voice_id);
164-
let url = format!("https://api.elevenlabs.io/v1/text-to-speech/{}", voice_id);
203+
// `tts_elevenlabs_base_url` (config.media.tts_elevenlabs_base_url)
204+
// overrides the hardcoded provider URL when set, allowing the same
205+
// ElevenLabs JSON wire format to be routed through a proxy or
206+
// self-hosted ElevenLabs-compatible gateway. The `xi-api-key` header
207+
// still comes from `ELEVENLABS_API_KEY`. Closes #1051.
208+
let base = self
209+
.elevenlabs_base_url
210+
.as_deref()
211+
.map(|b| b.trim_end_matches('/').to_string())
212+
.unwrap_or_else(|| "https://api.elevenlabs.io".to_string());
213+
let url = format!("{}/v1/text-to-speech/{}", base, voice_id);
165214

166215
let body = serde_json::json!({
167216
"text": text,
@@ -306,4 +355,88 @@ mod tests {
306355
fn test_max_audio_constant() {
307356
assert_eq!(MAX_AUDIO_RESPONSE_BYTES, 10 * 1024 * 1024);
308357
}
358+
359+
#[test]
360+
fn test_with_base_urls_sets_overrides() {
361+
let engine = TtsEngine::new(default_config()).with_base_urls(
362+
Some("http://127.0.0.1:8000".to_string()),
363+
Some("http://127.0.0.1:9000".to_string()),
364+
);
365+
assert_eq!(
366+
engine.openai_base_url.as_deref(),
367+
Some("http://127.0.0.1:8000")
368+
);
369+
assert_eq!(
370+
engine.elevenlabs_base_url.as_deref(),
371+
Some("http://127.0.0.1:9000")
372+
);
373+
}
374+
375+
/// Closes #1051: when the OpenAI TTS base URL is overridden, the URL
376+
/// building logic must append `/v1/audio/speech` and strip any trailing
377+
/// slash. When unset, the hardcoded provider URL is used.
378+
#[test]
379+
fn test_tts_openai_base_url_override_logic() {
380+
// Helper mirroring the URL construction in `synthesize_openai`.
381+
fn build(base: Option<&str>) -> String {
382+
base.map(|b| format!("{}/v1/audio/speech", b.trim_end_matches('/')))
383+
.unwrap_or_else(|| "https://api.openai.com/v1/audio/speech".to_string())
384+
}
385+
386+
// Default: hardcoded URL preserved (backward compatibility).
387+
assert_eq!(build(None), "https://api.openai.com/v1/audio/speech");
388+
389+
// Override applied.
390+
assert_eq!(
391+
build(Some("http://127.0.0.1:8000")),
392+
"http://127.0.0.1:8000/v1/audio/speech"
393+
);
394+
395+
// Trailing slash on the user-supplied base is stripped.
396+
assert_eq!(
397+
build(Some("http://127.0.0.1:8000/")),
398+
"http://127.0.0.1:8000/v1/audio/speech"
399+
);
400+
assert_eq!(
401+
build(Some("https://tts.example.com/")),
402+
"https://tts.example.com/v1/audio/speech"
403+
);
404+
}
405+
406+
/// Closes #1051: when the ElevenLabs TTS base URL is overridden, the URL
407+
/// building logic must append `/v1/text-to-speech/{voice_id}` and strip
408+
/// any trailing slash. When unset, the hardcoded provider URL is used.
409+
#[test]
410+
fn test_tts_elevenlabs_base_url_override_logic() {
411+
fn build(base: Option<&str>, voice_id: &str) -> String {
412+
let b = base
413+
.map(|b| b.trim_end_matches('/').to_string())
414+
.unwrap_or_else(|| "https://api.elevenlabs.io".to_string());
415+
format!("{}/v1/text-to-speech/{}", b, voice_id)
416+
}
417+
418+
let voice = "21m00Tcm4TlvDq8ikWAM";
419+
420+
// Default: hardcoded URL preserved.
421+
assert_eq!(
422+
build(None, voice),
423+
format!("https://api.elevenlabs.io/v1/text-to-speech/{voice}")
424+
);
425+
426+
// Override applied.
427+
assert_eq!(
428+
build(Some("http://127.0.0.1:9000"), voice),
429+
format!("http://127.0.0.1:9000/v1/text-to-speech/{voice}")
430+
);
431+
432+
// Trailing slash stripped.
433+
assert_eq!(
434+
build(Some("http://127.0.0.1:9000/"), voice),
435+
format!("http://127.0.0.1:9000/v1/text-to-speech/{voice}")
436+
);
437+
assert_eq!(
438+
build(Some("https://eleven.example.com/"), voice),
439+
format!("https://eleven.example.com/v1/text-to-speech/{voice}")
440+
);
441+
}
309442
}

0 commit comments

Comments
 (0)