Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cmd/gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,10 @@ func runGateway() {

pgStores, traceCollector, snapshotWorker := setupStoresAndTracing(cfg, dataDir, msgBus)

if ttsTool != nil && pgStores.SystemConfigs != nil {
ttsTool.SetSystemConfigStore(pgStores.SystemConfigs)
}

// Recover from crashes: flip ghost 'summoning' rows to 'summon_failed'.
// Summon goroutines don't survive process restart; stale DB rows would trap the UI.
if pgStores.Agents != nil {
Expand Down
51 changes: 36 additions & 15 deletions internal/tools/tts.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,22 @@ import (
// Implements Tool + ContextualTool interfaces.
// Per-call channel is read from ctx for thread-safety.
type TtsTool struct {
mu sync.RWMutex
manager *tts.Manager
vaultIntc *VaultInterceptor
mu sync.RWMutex
manager *tts.Manager
vaultIntc *VaultInterceptor
systemConfigs store.SystemConfigStore
}

func (t *TtsTool) SetVaultInterceptor(v *VaultInterceptor) { t.vaultIntc = v }

// SetSystemConfigStore wires system_configs as the final voice/model fallback
// so the dashboard /tts page also affects LLM-invoked tts tool calls.
func (t *TtsTool) SetSystemConfigStore(s store.SystemConfigStore) {
t.mu.Lock()
defer t.mu.Unlock()
t.systemConfigs = s
}

// NewTtsTool creates a TTS tool backed by the given manager.
func NewTtsTool(mgr *tts.Manager) *TtsTool {
return &TtsTool{manager: mgr}
Expand Down Expand Up @@ -95,15 +104,10 @@ type agentAudioConfig struct {
TTSParams map[string]any `json:"tts_params,omitempty"`
}

// resolveVoiceAndModel computes the effective voice + model IDs for the
// request using the documented precedence order:
//
// args > agent (store.AgentAudioFromCtx OtherConfig) > tenant (BuiltinToolSettings) > empty.
//
// Empty return values signal "use provider default" downstream — they are not
// errors. Missing agent snapshot emits slog.Warn so operators can spot
// dispatch-layer regressions; missing tenant settings are quiet (common).
func (t *TtsTool) resolveVoiceAndModel(ctx context.Context, argVoice, argModel string) (voice, model string) {
// resolveVoiceAndModel precedence: args > agent OtherConfig > tenant builtin
// settings > system_configs[tts.<provider>.voice/model] (dashboard /tts page).
// Empty result = use provider default.
func (t *TtsTool) resolveVoiceAndModel(ctx context.Context, providerName, argVoice, argModel string) (voice, model string) {
voice, model = argVoice, argModel

// Pull agent-level config from the dispatcher-injected snapshot.
Expand Down Expand Up @@ -147,6 +151,20 @@ func (t *TtsTool) resolveVoiceAndModel(ctx context.Context, argVoice, argModel s
model = tenantCfg.DefaultModel
}
}

// Final fallback: dashboard /tts settings.
if (voice == "" || model == "") && t.systemConfigs != nil && providerName != "" {
if voice == "" {
if v, err := t.systemConfigs.Get(ctx, "tts."+providerName+".voice"); err == nil && v != "" {
voice = v
}
}
if model == "" {
if m, err := t.systemConfigs.Get(ctx, "tts."+providerName+".model"); err == nil && m != "" {
model = m
}
}
}
return voice, model
}

Expand Down Expand Up @@ -213,9 +231,6 @@ func (t *TtsTool) Execute(ctx context.Context, args map[string]any) *Result {
argModel, _ := args["model"].(string)
providerName, _ := args["provider"].(string)

// Resolve voice/model via args > agent (ctx snapshot) > tenant > default.
voice, model := t.resolveVoiceAndModel(ctx, argVoice, argModel)

// Read generic agent TTS params once; adapt PER-ATTEMPT below (Finding #1 CRITICAL).
// Storing generic keys here so each fallback provider gets its own adapted copy.
genericAgentParams := t.resolveAgentGenericTTSParams(ctx)
Expand All @@ -225,6 +240,12 @@ func (t *TtsTool) Execute(ctx context.Context, args map[string]any) *Result {
mgr := t.manager
t.mu.RUnlock()

effectiveProvider := providerName
if effectiveProvider == "" {
effectiveProvider = t.resolvePrimary(ctx, mgr)
}
voice, model := t.resolveVoiceAndModel(ctx, effectiveProvider, argVoice, argModel)

// Determine format based on channel (read from ctx — thread-safe)
channel := ToolChannelFromCtx(ctx)
opts := tts.Options{Voice: voice, Model: model}
Expand Down
10 changes: 5 additions & 5 deletions internal/tools/tts_agent_ctx_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ func ctxWithAgentAudio(t *testing.T, voiceID, modelID string) context.Context {
func TestResolveVoiceAndModel_ArgsWinOverAgent(t *testing.T) {
tool := NewTtsTool(makeTTSManager("elevenlabs"))
ctx := ctxWithAgentAudio(t, "AGENT_V", "AGENT_M")
v, m := tool.resolveVoiceAndModel(ctx, "ARG_V", "ARG_M")
v, m := tool.resolveVoiceAndModel(ctx, "edge", "ARG_V", "ARG_M")
if v != "ARG_V" {
t.Errorf("voice: got %q, want ARG_V (args must win)", v)
}
Expand All @@ -50,7 +50,7 @@ func TestResolveVoiceAndModel_AgentWinsOverTenantWhenArgsEmpty(t *testing.T) {
ctx = WithBuiltinToolSettings(ctx, BuiltinToolSettings{
"tts": rawJSON(t, map[string]string{"default_voice_id": "TENANT_V", "default_model": "TENANT_M"}),
})
v, m := tool.resolveVoiceAndModel(ctx, "", "")
v, m := tool.resolveVoiceAndModel(ctx, "edge", "", "")
if v != "AGENT_V" {
t.Errorf("voice: got %q, want AGENT_V (agent > tenant)", v)
}
Expand All @@ -65,7 +65,7 @@ func TestResolveVoiceAndModel_TenantFallbackWhenAgentSilent(t *testing.T) {
ctx := WithBuiltinToolSettings(context.Background(), BuiltinToolSettings{
"tts": rawJSON(t, map[string]string{"default_voice_id": "TENANT_V", "default_model": "TENANT_M"}),
})
v, m := tool.resolveVoiceAndModel(ctx, "", "")
v, m := tool.resolveVoiceAndModel(ctx, "edge", "", "")
if v != "TENANT_V" {
t.Errorf("voice: got %q, want TENANT_V", v)
}
Expand All @@ -76,7 +76,7 @@ func TestResolveVoiceAndModel_TenantFallbackWhenAgentSilent(t *testing.T) {

func TestResolveVoiceAndModel_EmptyAllMeansDefault(t *testing.T) {
tool := NewTtsTool(makeTTSManager("elevenlabs"))
v, m := tool.resolveVoiceAndModel(context.Background(), "", "")
v, m := tool.resolveVoiceAndModel(context.Background(), "edge", "", "")
if v != "" {
t.Errorf("voice: got %q, want empty (no sources → provider default)", v)
}
Expand All @@ -92,7 +92,7 @@ func TestResolveVoiceAndModel_PartialAgentConfig(t *testing.T) {
ctx = WithBuiltinToolSettings(ctx, BuiltinToolSettings{
"tts": rawJSON(t, map[string]string{"default_model": "TENANT_M"}),
})
v, m := tool.resolveVoiceAndModel(ctx, "", "")
v, m := tool.resolveVoiceAndModel(ctx, "edge", "", "")
if v != "AGENT_V" {
t.Errorf("voice: got %q, want AGENT_V", v)
}
Expand Down
77 changes: 77 additions & 0 deletions internal/tools/tts_systemconfigs_fallback_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package tools

import (
"context"
"testing"
)

// fakeSystemConfigStore satisfies store.SystemConfigStore for the tts fallback test.
type fakeSystemConfigStore struct {
data map[string]string
}

func (f *fakeSystemConfigStore) Get(_ context.Context, key string) (string, error) {
return f.data[key], nil
}
func (f *fakeSystemConfigStore) Set(_ context.Context, key, value string) error {
if f.data == nil {
f.data = map[string]string{}
}
f.data[key] = value
return nil
}
func (f *fakeSystemConfigStore) Delete(_ context.Context, key string) error {
delete(f.data, key)
return nil
}
func (f *fakeSystemConfigStore) List(_ context.Context) (map[string]string, error) {
return f.data, nil
}

func TestResolveVoiceAndModel_SystemConfigsFallback(t *testing.T) {
tool := NewTtsTool(nil)
sc := &fakeSystemConfigStore{data: map[string]string{
"tts.edge.voice": "vi-VN-HoaiMyNeural",
"tts.edge.model": "edge-tts-1",
}}
tool.SetSystemConfigStore(sc)

v, m := tool.resolveVoiceAndModel(context.Background(), "edge", "", "")
if v != "vi-VN-HoaiMyNeural" {
t.Errorf("voice fallback failed: got %q, want vi-VN-HoaiMyNeural", v)
}
if m != "edge-tts-1" {
t.Errorf("model fallback failed: got %q, want edge-tts-1", m)
}
}

func TestResolveVoiceAndModel_ArgWinsOverSystemConfigs(t *testing.T) {
tool := NewTtsTool(nil)
tool.SetSystemConfigStore(&fakeSystemConfigStore{data: map[string]string{
"tts.edge.voice": "vi-VN-HoaiMyNeural",
}})

v, _ := tool.resolveVoiceAndModel(context.Background(), "edge", "en-US-AriaNeural", "")
if v != "en-US-AriaNeural" {
t.Errorf("arg voice must win over system_configs: got %q", v)
}
}

func TestResolveVoiceAndModel_NoStoreNoFallback(t *testing.T) {
tool := NewTtsTool(nil)
v, m := tool.resolveVoiceAndModel(context.Background(), "edge", "", "")
if v != "" || m != "" {
t.Errorf("expected empty fallback when no system_configs wired, got voice=%q model=%q", v, m)
}
}

func TestResolveVoiceAndModel_EmptyProviderSkipsFallback(t *testing.T) {
tool := NewTtsTool(nil)
tool.SetSystemConfigStore(&fakeSystemConfigStore{data: map[string]string{
"tts..voice": "should-not-match",
}})
v, m := tool.resolveVoiceAndModel(context.Background(), "", "", "")
if v != "" || m != "" {
t.Errorf("empty provider must skip lookup, got voice=%q model=%q", v, m)
}
}
Loading