diff --git a/cmd/gateway.go b/cmd/gateway.go index 0ebb2a899c..f71be49abe 100644 --- a/cmd/gateway.go +++ b/cmd/gateway.go @@ -144,6 +144,10 @@ func runGateway() { pgStores, traceCollector, snapshotWorker := setupStoresAndTracing(cfg, dataDir, msgBus) + if ttsTool != nil && pgStores.SystemConfigs != nil { + ttsTool.SetSystemConfigStore(pgStores.SystemConfigs) + } + // Recover from crashes: flip ghost 'summoning' rows to 'summon_failed'. // Summon goroutines don't survive process restart; stale DB rows would trap the UI. if pgStores.Agents != nil { diff --git a/internal/tools/tts.go b/internal/tools/tts.go index 7202a443b2..7cab309981 100644 --- a/internal/tools/tts.go +++ b/internal/tools/tts.go @@ -27,13 +27,22 @@ import ( // Implements Tool + ContextualTool interfaces. // Per-call channel is read from ctx for thread-safety. type TtsTool struct { - mu sync.RWMutex - manager *tts.Manager - vaultIntc *VaultInterceptor + mu sync.RWMutex + manager *tts.Manager + vaultIntc *VaultInterceptor + systemConfigs store.SystemConfigStore } func (t *TtsTool) SetVaultInterceptor(v *VaultInterceptor) { t.vaultIntc = v } +// SetSystemConfigStore wires system_configs as the final voice/model fallback +// so the dashboard /tts page also affects LLM-invoked tts tool calls. +func (t *TtsTool) SetSystemConfigStore(s store.SystemConfigStore) { + t.mu.Lock() + defer t.mu.Unlock() + t.systemConfigs = s +} + // NewTtsTool creates a TTS tool backed by the given manager. func NewTtsTool(mgr *tts.Manager) *TtsTool { return &TtsTool{manager: mgr} @@ -95,15 +104,10 @@ type agentAudioConfig struct { TTSParams map[string]any `json:"tts_params,omitempty"` } -// resolveVoiceAndModel computes the effective voice + model IDs for the -// request using the documented precedence order: -// -// args > agent (store.AgentAudioFromCtx OtherConfig) > tenant (BuiltinToolSettings) > empty. -// -// Empty return values signal "use provider default" downstream — they are not -// errors. Missing agent snapshot emits slog.Warn so operators can spot -// dispatch-layer regressions; missing tenant settings are quiet (common). -func (t *TtsTool) resolveVoiceAndModel(ctx context.Context, argVoice, argModel string) (voice, model string) { +// resolveVoiceAndModel precedence: args > agent OtherConfig > tenant builtin +// settings > system_configs[tts..voice/model] (dashboard /tts page). +// Empty result = use provider default. +func (t *TtsTool) resolveVoiceAndModel(ctx context.Context, providerName, argVoice, argModel string) (voice, model string) { voice, model = argVoice, argModel // Pull agent-level config from the dispatcher-injected snapshot. @@ -147,6 +151,20 @@ func (t *TtsTool) resolveVoiceAndModel(ctx context.Context, argVoice, argModel s model = tenantCfg.DefaultModel } } + + // Final fallback: dashboard /tts settings. + if (voice == "" || model == "") && t.systemConfigs != nil && providerName != "" { + if voice == "" { + if v, err := t.systemConfigs.Get(ctx, "tts."+providerName+".voice"); err == nil && v != "" { + voice = v + } + } + if model == "" { + if m, err := t.systemConfigs.Get(ctx, "tts."+providerName+".model"); err == nil && m != "" { + model = m + } + } + } return voice, model } @@ -213,9 +231,6 @@ func (t *TtsTool) Execute(ctx context.Context, args map[string]any) *Result { argModel, _ := args["model"].(string) providerName, _ := args["provider"].(string) - // Resolve voice/model via args > agent (ctx snapshot) > tenant > default. - voice, model := t.resolveVoiceAndModel(ctx, argVoice, argModel) - // Read generic agent TTS params once; adapt PER-ATTEMPT below (Finding #1 CRITICAL). // Storing generic keys here so each fallback provider gets its own adapted copy. genericAgentParams := t.resolveAgentGenericTTSParams(ctx) @@ -225,6 +240,12 @@ func (t *TtsTool) Execute(ctx context.Context, args map[string]any) *Result { mgr := t.manager t.mu.RUnlock() + effectiveProvider := providerName + if effectiveProvider == "" { + effectiveProvider = t.resolvePrimary(ctx, mgr) + } + voice, model := t.resolveVoiceAndModel(ctx, effectiveProvider, argVoice, argModel) + // Determine format based on channel (read from ctx — thread-safe) channel := ToolChannelFromCtx(ctx) opts := tts.Options{Voice: voice, Model: model} diff --git a/internal/tools/tts_agent_ctx_test.go b/internal/tools/tts_agent_ctx_test.go index 1a0c61f5ec..9e8bc5dba4 100644 --- a/internal/tools/tts_agent_ctx_test.go +++ b/internal/tools/tts_agent_ctx_test.go @@ -35,7 +35,7 @@ func ctxWithAgentAudio(t *testing.T, voiceID, modelID string) context.Context { func TestResolveVoiceAndModel_ArgsWinOverAgent(t *testing.T) { tool := NewTtsTool(makeTTSManager("elevenlabs")) ctx := ctxWithAgentAudio(t, "AGENT_V", "AGENT_M") - v, m := tool.resolveVoiceAndModel(ctx, "ARG_V", "ARG_M") + v, m := tool.resolveVoiceAndModel(ctx, "edge", "ARG_V", "ARG_M") if v != "ARG_V" { t.Errorf("voice: got %q, want ARG_V (args must win)", v) } @@ -50,7 +50,7 @@ func TestResolveVoiceAndModel_AgentWinsOverTenantWhenArgsEmpty(t *testing.T) { ctx = WithBuiltinToolSettings(ctx, BuiltinToolSettings{ "tts": rawJSON(t, map[string]string{"default_voice_id": "TENANT_V", "default_model": "TENANT_M"}), }) - v, m := tool.resolveVoiceAndModel(ctx, "", "") + v, m := tool.resolveVoiceAndModel(ctx, "edge", "", "") if v != "AGENT_V" { t.Errorf("voice: got %q, want AGENT_V (agent > tenant)", v) } @@ -65,7 +65,7 @@ func TestResolveVoiceAndModel_TenantFallbackWhenAgentSilent(t *testing.T) { ctx := WithBuiltinToolSettings(context.Background(), BuiltinToolSettings{ "tts": rawJSON(t, map[string]string{"default_voice_id": "TENANT_V", "default_model": "TENANT_M"}), }) - v, m := tool.resolveVoiceAndModel(ctx, "", "") + v, m := tool.resolveVoiceAndModel(ctx, "edge", "", "") if v != "TENANT_V" { t.Errorf("voice: got %q, want TENANT_V", v) } @@ -76,7 +76,7 @@ func TestResolveVoiceAndModel_TenantFallbackWhenAgentSilent(t *testing.T) { func TestResolveVoiceAndModel_EmptyAllMeansDefault(t *testing.T) { tool := NewTtsTool(makeTTSManager("elevenlabs")) - v, m := tool.resolveVoiceAndModel(context.Background(), "", "") + v, m := tool.resolveVoiceAndModel(context.Background(), "edge", "", "") if v != "" { t.Errorf("voice: got %q, want empty (no sources → provider default)", v) } @@ -92,7 +92,7 @@ func TestResolveVoiceAndModel_PartialAgentConfig(t *testing.T) { ctx = WithBuiltinToolSettings(ctx, BuiltinToolSettings{ "tts": rawJSON(t, map[string]string{"default_model": "TENANT_M"}), }) - v, m := tool.resolveVoiceAndModel(ctx, "", "") + v, m := tool.resolveVoiceAndModel(ctx, "edge", "", "") if v != "AGENT_V" { t.Errorf("voice: got %q, want AGENT_V", v) } diff --git a/internal/tools/tts_systemconfigs_fallback_test.go b/internal/tools/tts_systemconfigs_fallback_test.go new file mode 100644 index 0000000000..58484fe055 --- /dev/null +++ b/internal/tools/tts_systemconfigs_fallback_test.go @@ -0,0 +1,77 @@ +package tools + +import ( + "context" + "testing" +) + +// fakeSystemConfigStore satisfies store.SystemConfigStore for the tts fallback test. +type fakeSystemConfigStore struct { + data map[string]string +} + +func (f *fakeSystemConfigStore) Get(_ context.Context, key string) (string, error) { + return f.data[key], nil +} +func (f *fakeSystemConfigStore) Set(_ context.Context, key, value string) error { + if f.data == nil { + f.data = map[string]string{} + } + f.data[key] = value + return nil +} +func (f *fakeSystemConfigStore) Delete(_ context.Context, key string) error { + delete(f.data, key) + return nil +} +func (f *fakeSystemConfigStore) List(_ context.Context) (map[string]string, error) { + return f.data, nil +} + +func TestResolveVoiceAndModel_SystemConfigsFallback(t *testing.T) { + tool := NewTtsTool(nil) + sc := &fakeSystemConfigStore{data: map[string]string{ + "tts.edge.voice": "vi-VN-HoaiMyNeural", + "tts.edge.model": "edge-tts-1", + }} + tool.SetSystemConfigStore(sc) + + v, m := tool.resolveVoiceAndModel(context.Background(), "edge", "", "") + if v != "vi-VN-HoaiMyNeural" { + t.Errorf("voice fallback failed: got %q, want vi-VN-HoaiMyNeural", v) + } + if m != "edge-tts-1" { + t.Errorf("model fallback failed: got %q, want edge-tts-1", m) + } +} + +func TestResolveVoiceAndModel_ArgWinsOverSystemConfigs(t *testing.T) { + tool := NewTtsTool(nil) + tool.SetSystemConfigStore(&fakeSystemConfigStore{data: map[string]string{ + "tts.edge.voice": "vi-VN-HoaiMyNeural", + }}) + + v, _ := tool.resolveVoiceAndModel(context.Background(), "edge", "en-US-AriaNeural", "") + if v != "en-US-AriaNeural" { + t.Errorf("arg voice must win over system_configs: got %q", v) + } +} + +func TestResolveVoiceAndModel_NoStoreNoFallback(t *testing.T) { + tool := NewTtsTool(nil) + v, m := tool.resolveVoiceAndModel(context.Background(), "edge", "", "") + if v != "" || m != "" { + t.Errorf("expected empty fallback when no system_configs wired, got voice=%q model=%q", v, m) + } +} + +func TestResolveVoiceAndModel_EmptyProviderSkipsFallback(t *testing.T) { + tool := NewTtsTool(nil) + tool.SetSystemConfigStore(&fakeSystemConfigStore{data: map[string]string{ + "tts..voice": "should-not-match", + }}) + v, m := tool.resolveVoiceAndModel(context.Background(), "", "", "") + if v != "" || m != "" { + t.Errorf("empty provider must skip lookup, got voice=%q model=%q", v, m) + } +}