Skip to content

Commit 284c9cb

Browse files
committed
feat(tts): per-tenant TTS config and per-agent voice override
- Add per-tenant TTS configuration endpoints (GET/POST /v1/tts/config) with RoleAdmin auth instead of master scope - Implement TenantTTSResolver for channels to use tenant-specific TTS providers and auto mode settings - Add per-agent voice override in channel TTS auto-apply: - Extend OutboundMessage with AgentID and AgentOtherConfig - Inject AgentAudioSnapshot in dispatch.go from outbound message - MaybeApply reads tts_voice_id/tts_model_id from agent context - Fix events.go to use RunContext.TenantID directly (H3) - Fix dispatch.go error notification to use sendCtx (H1) - Update UI to use new /v1/tts/config endpoints
1 parent 0101495 commit 284c9cb

16 files changed

Lines changed: 656 additions & 53 deletions

cmd/gateway_consumer_normal.go

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ func processNormalMessage(
233233
blockReply := deps.ChannelMgr != nil && deps.ChannelMgr.ResolveBlockReply(msg.Channel, deps.Cfg.Gateway.BlockReply)
234234
toolStatus := deps.Cfg.Gateway.ToolStatus == nil || *deps.Cfg.Gateway.ToolStatus // default true
235235
if deps.ChannelMgr != nil {
236-
deps.ChannelMgr.RegisterRun(runID, msg.Channel, chatIDForRun, messageID, outMeta, enableStream, blockReply, toolStatus)
236+
deps.ChannelMgr.RegisterRun(runID, msg.Channel, chatIDForRun, messageID, outMeta, msg.TenantID, enableStream, blockReply, toolStatus)
237237
}
238238

239239
// Group-aware system prompt: help the LLM adapt tone and behavior for group chats.
@@ -396,7 +396,7 @@ func processNormalMessage(
396396
})
397397

398398
// Handle result asynchronously to not block the flush callback.
399-
go func(agentKey, channel, chatID, session, rID, peerKind, inboundContent string, meta map[string]string, blockReplyEnabled bool, ptd *tools.PendingTeamDispatch) {
399+
go func(agentKey, channel, chatID, session, rID, peerKind, inboundContent string, meta map[string]string, blockReplyEnabled bool, ptd *tools.PendingTeamDispatch, tenantID, agentUUID uuid.UUID, agentOtherConfig []byte) {
400400
outcome := <-outCh
401401

402402
// Release team create lock — tasks already visible in DB, other goroutines can list.
@@ -426,6 +426,8 @@ func processNormalMessage(
426426
ChatID: chatID,
427427
Content: "",
428428
Metadata: meta,
429+
TenantID: tenantID,
430+
AgentID: agentUUID,
429431
})
430432
return
431433
}
@@ -444,6 +446,8 @@ func processNormalMessage(
444446
ChatID: chatID,
445447
Content: errContent,
446448
Metadata: meta,
449+
TenantID: tenantID,
450+
AgentID: agentUUID,
447451
})
448452
return
449453
}
@@ -461,6 +465,8 @@ func processNormalMessage(
461465
ChatID: chatID,
462466
Content: "",
463467
Metadata: meta,
468+
TenantID: tenantID,
469+
AgentID: agentUUID,
464470
})
465471
return
466472
}
@@ -476,6 +482,8 @@ func processNormalMessage(
476482
ChatID: chatID,
477483
Content: "",
478484
Metadata: meta,
485+
TenantID: tenantID,
486+
AgentID: agentUUID,
479487
})
480488
return
481489
}
@@ -491,10 +499,13 @@ func processNormalMessage(
491499

492500
// Publish response back to the channel
493501
outMsg := bus.OutboundMessage{
494-
Channel: channel,
495-
ChatID: chatID,
496-
Content: replyContent,
497-
Metadata: meta,
502+
Channel: channel,
503+
ChatID: chatID,
504+
Content: replyContent,
505+
Metadata: meta,
506+
TenantID: tenantID,
507+
AgentID: agentUUID,
508+
AgentOtherConfig: agentOtherConfig,
498509
}
499510

500511
appendMediaToOutbound(&outMsg, outcome.Result.Media)
@@ -505,5 +516,5 @@ func processNormalMessage(
505516
if deps.TeamStore != nil && channel != tools.ChannelSystem && channel != tools.ChannelTeammate && channel != tools.ChannelDashboard {
506517
go autoSetFollowup(ctx, deps.TeamStore, deps.AgentStore, agentKey, channel, chatID, replyContent)
507518
}
508-
}(agentID, msg.Channel, msg.ChatID, sessionKey, runID, peerKind, msg.Content, outMeta, blockReply, ptd)
519+
}(agentID, msg.Channel, msg.ChatID, sessionKey, runID, peerKind, msg.Content, outMeta, blockReply, ptd, msg.TenantID, agentLoop.UUID(), agentLoop.OtherConfig())
509520
}

cmd/gateway_http_wiring.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,10 +254,21 @@ func (d *gatewayDeps) wireHTTPHandlersOnServer(
254254
if rl := d.server.RateLimiter(); rl != nil && rl.Enabled() {
255255
ttsH.SetRateLimiter(rl.Allow)
256256
}
257+
// Wire stores for per-tenant TTS config lookup at synthesis time.
258+
if d.pgStores.SystemConfigs != nil && d.pgStores.ConfigSecrets != nil {
259+
ttsH.SetStores(d.pgStores.SystemConfigs, d.pgStores.ConfigSecrets)
260+
// Wire tenant resolver for channels TTS auto-apply
261+
d.audioMgr.SetTenantResolver(httpapi.NewTenantTTSResolver(d.pgStores.SystemConfigs, d.pgStores.ConfigSecrets))
262+
}
257263
d.server.SetTTSHandler(ttsH)
258264
d.ttsHandler = ttsH // store for hot-reload
259265
}
260266

267+
// Per-tenant TTS config endpoint — allows tenant admins to configure TTS.
268+
if d.pgStores.SystemConfigs != nil && d.pgStores.ConfigSecrets != nil {
269+
d.server.SetTTSConfigHandler(httpapi.NewTTSConfigHandler(d.pgStores.SystemConfigs, d.pgStores.ConfigSecrets))
270+
}
271+
261272
// Seed + apply builtin tool disables
262273
if d.pgStores.BuiltinTools != nil {
263274
seedBuiltinTools(context.Background(), d.pgStores.BuiltinTools)

internal/agent/loop_tracing.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ func (l *Loop) ID() string { return l.id }
3232
// See docs/agent-identity-conventions.md.
3333
func (l *Loop) UUID() uuid.UUID { return l.agentUUID }
3434

35+
// OtherConfig returns the agent's other_config JSONB (extensibility bag).
36+
// Used for per-agent TTS voice override (tts_voice_id, tts_model_id).
37+
func (l *Loop) OtherConfig() json.RawMessage { return l.agentOtherConfig }
38+
3539
// Model returns the model identifier for this agent loop.
3640
func (l *Loop) Model() string { return l.model }
3741

internal/agent/router_cache_canonicalize_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package agent
22

33
import (
44
"context"
5+
"encoding/json"
56
"strings"
67
"sync/atomic"
78
"testing"
@@ -21,6 +22,8 @@ type stubAgent struct {
2122
}
2223

2324
func (s *stubAgent) ID() string { return s.id }
25+
func (s *stubAgent) UUID() uuid.UUID { return uuid.Nil }
26+
func (s *stubAgent) OtherConfig() json.RawMessage { return nil }
2427
func (s *stubAgent) Run(context.Context, RunRequest) (*RunResult, error) { return nil, nil }
2528
func (s *stubAgent) IsRunning() bool { return s.running }
2629
func (s *stubAgent) Model() string { return "test-model" }

internal/agent/types.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@ package agent
22

33
import (
44
"context"
5+
"encoding/json"
6+
7+
"github.com/google/uuid"
58

69
"github.com/nextlevelbuilder/goclaw/internal/providers"
710
)
@@ -10,6 +13,8 @@ import (
1013
// Implemented by *Loop; extracted as an interface for testability and composability.
1114
type Agent interface {
1215
ID() string
16+
UUID() uuid.UUID
17+
OtherConfig() json.RawMessage
1318
Run(ctx context.Context, req RunRequest) (*RunResult, error)
1419
IsRunning() bool
1520
Model() string
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
package audio
2+
3+
import (
4+
"context"
5+
"os"
6+
"path/filepath"
7+
"time"
8+
)
9+
10+
// AutoApplyResult holds the result of TTS auto-apply processing.
11+
type AutoApplyResult struct {
12+
// Text is the message content with [[tts]] directives stripped.
13+
Text string
14+
// AudioPath is the path to generated audio file (empty if no TTS applied).
15+
AudioPath string
16+
// AudioMime is the MIME type of generated audio (e.g. "audio/ogg").
17+
AudioMime string
18+
}
19+
20+
// AutoApplyToText checks if TTS should be auto-applied to the message content.
21+
// Returns modified text (directives stripped) and audio path if TTS was applied.
22+
// channel: "telegram", "discord", etc. - used for format selection (opus for telegram).
23+
// isVoiceInbound: true if user sent voice message (for "inbound" auto mode).
24+
// workspace: directory to save generated audio files.
25+
func (m *Manager) AutoApplyToText(
26+
ctx context.Context,
27+
content string,
28+
channel string,
29+
isVoiceInbound bool,
30+
workspace string,
31+
) (*AutoApplyResult, error) {
32+
if m == nil || content == "" {
33+
return &AutoApplyResult{Text: content}, nil
34+
}
35+
36+
// Check if TTS should be applied (respects auto mode: off/always/inbound/tagged)
37+
result, ok := m.MaybeApply(ctx, content, channel, isVoiceInbound, "final")
38+
if !ok || result == nil {
39+
return &AutoApplyResult{Text: StripTTSDirectives(content)}, nil
40+
}
41+
42+
// Write audio to workspace/tts/ directory
43+
ttsDir := workspace
44+
if ttsDir == "" {
45+
ttsDir = os.TempDir()
46+
}
47+
ttsDir = filepath.Join(ttsDir, "tts")
48+
if err := os.MkdirAll(ttsDir, 0755); err != nil {
49+
return &AutoApplyResult{Text: StripTTSDirectives(content)}, err
50+
}
51+
52+
audioPath := filepath.Join(ttsDir, "auto-"+time.Now().Format("20060102-150405")+"."+result.Extension)
53+
if err := os.WriteFile(audioPath, result.Audio, 0644); err != nil {
54+
return &AutoApplyResult{Text: StripTTSDirectives(content)}, err
55+
}
56+
57+
return &AutoApplyResult{
58+
Text: StripTTSDirectives(content),
59+
AudioPath: audioPath,
60+
AudioMime: result.MimeType,
61+
}, nil
62+
}

internal/audio/manager.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@ func channelFromCtx(ctx context.Context) string {
2323
return v
2424
}
2525

26+
// TenantTTSResolver resolves per-tenant TTS provider and config.
27+
// Returns (provider, providerName, autoMode, error). If error is non-nil,
28+
// caller should fall back to global manager config.
29+
type TenantTTSResolver func(ctx context.Context) (TTSProvider, string, AutoMode, error)
30+
2631
// Manager orchestrates audio providers across TTS, STT, Music, and SFX
2732
// operations. Each op has its own provider map + primary/fallback chain.
2833
//
@@ -43,6 +48,8 @@ type Manager struct {
4348
mode Mode
4449
maxLength int // max text length before truncation (default 1500)
4550
timeoutMs int // provider timeout (default 30000)
51+
52+
tenantResolver TenantTTSResolver // per-tenant TTS config resolver (nil = use global)
4653
}
4754

4855
// ManagerConfig configures the audio manager. Preserved from legacy TTS
@@ -126,6 +133,23 @@ func (m *Manager) GetProvider(name string) (TTSProvider, bool) {
126133
// PrimaryProvider returns the primary TTS provider name.
127134
func (m *Manager) PrimaryProvider() string { return m.primary }
128135

136+
// SetTenantResolver sets the per-tenant TTS config resolver.
137+
// Channels use this to resolve tenant-specific TTS providers.
138+
func (m *Manager) SetTenantResolver(r TenantTTSResolver) { m.tenantResolver = r }
139+
140+
// ResolveTenantProvider attempts to get tenant-specific TTS provider.
141+
// Returns (provider, name, autoMode, ok). If ok=false, caller uses global config.
142+
func (m *Manager) ResolveTenantProvider(ctx context.Context) (TTSProvider, string, AutoMode, bool) {
143+
if m.tenantResolver == nil {
144+
return nil, "", "", false
145+
}
146+
p, name, auto, err := m.tenantResolver(ctx)
147+
if err != nil {
148+
return nil, "", "", false
149+
}
150+
return p, name, auto, true
151+
}
152+
129153
// AutoMode returns the current auto-apply mode.
130154
func (m *Manager) AutoMode() AutoMode { return m.auto }
131155

internal/audio/manager_auto.go

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,11 @@ package audio
22

33
import (
44
"context"
5+
"encoding/json"
56
"log/slog"
67
"strings"
8+
9+
"github.com/nextlevelbuilder/goclaw/internal/store"
710
)
811

912
// MaybeApply inspects auto-mode and conditionally applies TTS to a reply.
@@ -16,7 +19,15 @@ import (
1619
// - isVoiceInbound: whether the user's inbound message was voice
1720
// - kind: "tool", "block", or "final"
1821
func (m *Manager) MaybeApply(ctx context.Context, text, channel string, isVoiceInbound bool, kind string) (*SynthResult, bool) {
19-
if m.auto == AutoOff {
22+
// Try tenant-specific TTS config first
23+
tenantProvider, _, tenantAuto, hasTenant := m.ResolveTenantProvider(ctx)
24+
25+
auto := m.auto
26+
if hasTenant && tenantAuto != "" {
27+
auto = tenantAuto
28+
}
29+
30+
if auto == AutoOff {
2031
return nil, false
2132
}
2233

@@ -25,7 +36,7 @@ func (m *Manager) MaybeApply(ctx context.Context, text, channel string, isVoiceI
2536
return nil, false
2637
}
2738

28-
switch m.auto {
39+
switch auto {
2940
case AutoInbound:
3041
if !isVoiceInbound {
3142
return nil, false
@@ -42,7 +53,7 @@ func (m *Manager) MaybeApply(ctx context.Context, text, channel string, isVoiceI
4253

4354
// Content validation (matches legacy TTS behavior).
4455
cleanText := stripMarkdown(text)
45-
cleanText = stripTtsDirectives(cleanText)
56+
cleanText = StripTTSDirectives(cleanText)
4657
cleanText = strings.TrimSpace(cleanText)
4758

4859
if len(cleanText) < 10 {
@@ -61,7 +72,32 @@ func (m *Manager) MaybeApply(ctx context.Context, text, channel string, isVoiceI
6172
opts.Format = "opus" // Telegram voice bubbles need opus
6273
}
6374

64-
result, err := m.SynthesizeWithFallback(ctx, cleanText, opts)
75+
// Apply per-agent voice/model override from context (set by dispatch.go from OutboundMessage)
76+
if snap, ok := store.AgentAudioFromCtx(ctx); ok && len(snap.OtherConfig) > 0 {
77+
var agentCfg struct {
78+
TTSVoiceID string `json:"tts_voice_id,omitempty"`
79+
TTSModelID string `json:"tts_model_id,omitempty"`
80+
}
81+
if err := json.Unmarshal(snap.OtherConfig, &agentCfg); err == nil {
82+
if agentCfg.TTSVoiceID != "" {
83+
opts.Voice = agentCfg.TTSVoiceID
84+
}
85+
if agentCfg.TTSModelID != "" {
86+
opts.Model = agentCfg.TTSModelID
87+
}
88+
}
89+
}
90+
91+
var result *SynthResult
92+
var err error
93+
94+
// Use tenant provider if available, otherwise fall back to global
95+
if hasTenant && tenantProvider != nil {
96+
result, err = tenantProvider.Synthesize(ctx, cleanText, opts)
97+
} else {
98+
result, err = m.SynthesizeWithFallback(ctx, cleanText, opts)
99+
}
100+
65101
if err != nil {
66102
slog.Warn("tts auto-apply failed", "error", err)
67103
return nil, false

internal/bus/types.go

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,14 @@ type InboundMessage struct {
3535

3636
// OutboundMessage represents a message to be sent to a channel.
3737
type OutboundMessage struct {
38-
Channel string `json:"channel"`
39-
ChatID string `json:"chat_id"`
40-
Content string `json:"content"`
41-
Media []MediaAttachment `json:"media,omitempty"` // optional media attachments
42-
Metadata map[string]string `json:"metadata,omitempty"` // channel-specific metadata
38+
Channel string `json:"channel"`
39+
ChatID string `json:"chat_id"`
40+
Content string `json:"content"`
41+
Media []MediaAttachment `json:"media,omitempty"` // optional media attachments
42+
Metadata map[string]string `json:"metadata,omitempty"` // channel-specific metadata
43+
TenantID uuid.UUID `json:"tenant_id,omitempty"` // tenant scope for per-tenant TTS
44+
AgentID uuid.UUID `json:"agent_id,omitempty"` // agent scope for per-agent TTS voice override
45+
AgentOtherConfig []byte `json:"agent_other_config,omitempty"` // agent's other_config for TTS voice/model
4346
}
4447

4548
// MediaAttachment represents a media file to be sent with a message.

0 commit comments

Comments
 (0)