Skip to content

Commit 676b93b

Browse files
Align managed vendor validation with generated core shapes
1 parent 968e1f0 commit 676b93b

6 files changed

Lines changed: 40 additions & 11 deletions

File tree

docs/concepts/vendors.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ Used with `agent.with_tts()`. Each TTS vendor produces audio at a specific sampl
5454
| `RimeTTS` | Rime | `key`, `speaker`, `model_id` ||
5555
| `FishAudioTTS` | Fish Audio | `key`, `reference_id`, `backend` ||
5656
| `GroqTTS` | Groq | `key` ||
57-
| `MiniMaxTTS` | MiniMax | `key` ||
57+
| `MiniMaxTTS` | MiniMax | `model` for supported Agora-managed models; `key`, `group_id`, `model`, `voice_id`, `url` for BYOK ||
5858
| `DeepgramTTS` | Deepgram | `api_key`, `model` | Configurable |
5959
| `SarvamTTS` | Sarvam | `api_key` ||
6060

@@ -80,7 +80,7 @@ Use `agent.with_interaction_language()` for Agora `asr.language`; it defaults to
8080
| Class | Provider | Required Parameters |
8181
|---|---|---|
8282
| `SpeechmaticsSTT` | Speechmatics | `api_key`, `language` |
83-
| `DeepgramSTT` | Deepgram | — (all optional) |
83+
| `DeepgramSTT` | Deepgram | `model` for Agora-managed `nova-2`/`nova-3`; `api_key` for BYOK |
8484
| `MicrosoftSTT` | Microsoft Azure | `key`, `region`, `language` |
8585
| `OpenAISTT` | OpenAI | `api_key` |
8686
| `GoogleSTT` | Google Cloud | `project_id`, `location`, `adc_credentials_string`, `language` |

docs/reference/vendors.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -190,11 +190,10 @@ The SDK also includes named helpers for the remaining Agora-supported LLM provid
190190
| `voice` | `str` | Yes || Voice: `alloy`, `echo`, `fable`, `onyx`, `nova`, `shimmer` |
191191
| `model` | `str` | BYOK only | `None` | Model: `tts-1` or `tts-1-hd` |
192192
| `base_url` | `str` | BYOK only | `None` | OpenAI TTS endpoint URL |
193-
| `response_format` | `str` | No | `None` | Audio format (e.g., `pcm`) |
194193
| `speed` | `float` | No | `None` | Speech speed multiplier |
195194
| `skip_patterns` | `List[int]` | No | `None` | Skip patterns |
196195

197-
`api_key`, `model`, and `base_url` are required together for BYOK. Without `api_key`, AgentKit uses the Agora-managed `tts-1` path. Fixed sample rate: 24000 Hz.
196+
`api_key`, `model`, and `base_url` are required together for BYOK. Without `api_key`, `model` must be omitted or set to the Agora-managed `tts-1` path. Fixed sample rate: 24000 Hz.
198197

199198
### `CartesiaTTS`
200199

@@ -323,14 +322,16 @@ Use `agent.with_interaction_language()` for Agora `asr.language`; it defaults to
323322

324323
| Parameter | Type | Required | Default | Description |
325324
|---|---|---|---|---|
326-
| `api_key` | `str` | No | `None` | Deepgram API key |
325+
| `api_key` | `str` | BYOK only | `None` | Deepgram API key. Optional only for Agora-managed `nova-2` and `nova-3`. |
327326
| `model` | `str` | No | `None` | Model (e.g., `nova-2`) |
328327
| `language` | `str` | No | `None` | Language code (e.g., `en-US`) |
329328
| `interaction_language` | `str` | No | `None` | Agora `asr.language` override |
330329
| `smart_format` | `bool` | No | `None` | Enable smart formatting |
331330
| `punctuation` | `bool` | No | `None` | Enable punctuation |
332331
| `additional_params` | `Dict[str, Any]` | No | `None` | Additional parameters |
333332

333+
For `nova-2` and `nova-3`, omit `api_key` to use Agora-managed credentials. For all other Deepgram models, AgentKit requires `api_key`.
334+
334335
### `MicrosoftSTT`
335336

336337
| Parameter | Type | Required | Default | Description |

src/agora_agent/agentkit/vendors/stt.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from typing import Any, Dict, Optional
22

3-
from pydantic import BaseModel, ConfigDict, Field
3+
from pydantic import BaseModel, ConfigDict, Field, model_validator
44
from typing_extensions import Literal
55

66
from .base import BaseSTT
@@ -41,6 +41,7 @@
4141
]
4242

4343
_INTERACTION_LANGUAGES = set(InteractionLanguage.__args__)
44+
_DEEPGRAM_MANAGED_MODELS = {"nova-2", "nova-3"}
4445

4546

4647
def _interaction_language(language: Optional[str], interaction_language: Optional[InteractionLanguage]) -> Optional[InteractionLanguage]:
@@ -97,6 +98,12 @@ class DeepgramSTTOptions(BaseModel):
9798
punctuation: Optional[bool] = Field(default=None, description="Enable punctuation")
9899
additional_params: Optional[Dict[str, Any]] = Field(default=None)
99100

101+
@model_validator(mode="after")
102+
def _validate_managed_model(self) -> "DeepgramSTTOptions":
103+
if self.api_key is None and (self.model is None or self.model.strip().lower() not in _DEEPGRAM_MANAGED_MODELS):
104+
raise ValueError("DeepgramSTT requires api_key unless using a supported Agora-managed model")
105+
return self
106+
100107
class DeepgramSTT(BaseSTT):
101108
def __init__(self, **kwargs: Any):
102109
self.options = DeepgramSTTOptions(**kwargs)

src/agora_agent/agentkit/vendors/tts.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from pydantic import BaseModel, ConfigDict, Field, model_validator
44

55
from .base import BaseTTS, CartesiaSampleRate, ElevenLabsSampleRate, GoogleTTSSampleRate, MicrosoftSampleRate
6+
from ..presets import MiniMaxPresetModels, OpenAITtsPresetModels
67

78
class ElevenLabsTTSOptions(BaseModel):
89
model_config = ConfigDict(extra="forbid")
@@ -100,7 +101,6 @@ class OpenAITTSOptions(BaseModel):
100101
voice: str = Field(..., description="Voice name (alloy, echo, fable, onyx, nova, shimmer)")
101102
model: Optional[str] = Field(default=None, description="Model name (tts-1, tts-1-hd)")
102103
base_url: Optional[str] = Field(default=None, description="Endpoint URL")
103-
response_format: Optional[str] = Field(default=None, description="Audio format (e.g., pcm)")
104104
instructions: Optional[str] = Field(default=None, description="Custom voice instructions")
105105
speed: Optional[float] = Field(default=None, description="Speech speed multiplier")
106106
skip_patterns: Optional[List[int]] = Field(default=None)
@@ -118,8 +118,11 @@ def _validate_byok_params(self) -> "OpenAITTSOptions":
118118
]
119119
if missing:
120120
raise ValueError(f"OpenAITTS requires {', '.join(missing)} when api_key is set")
121-
elif self.base_url is not None:
122-
raise ValueError("OpenAITTS base_url is only valid when api_key is set")
121+
else:
122+
if self.model is not None and self.model.strip().lower() not in OpenAITtsPresetModels:
123+
raise ValueError("OpenAITTS requires api_key unless using the Agora-managed tts-1 model")
124+
if self.base_url is not None:
125+
raise ValueError("OpenAITTS base_url is only valid when api_key is set")
123126
return self
124127

125128
class OpenAITTS(BaseTTS):
@@ -141,8 +144,6 @@ def to_config(self) -> Dict[str, Any]:
141144
elif self.options.model is not None:
142145
params["model"] = self.options.model
143146

144-
if self.options.response_format is not None:
145-
params["response_format"] = self.options.response_format
146147
if self.options.instructions is not None:
147148
params["instructions"] = self.options.instructions
148149
if self.options.speed is not None:
@@ -423,6 +424,8 @@ def _validate_byok_params(self) -> "MiniMaxTTSOptions":
423424
]
424425
if missing:
425426
raise ValueError(f"MiniMaxTTS requires {', '.join(missing)} when key is set")
427+
elif self.model.strip().lower() not in MiniMaxPresetModels:
428+
raise ValueError("MiniMaxTTS requires key unless using a supported Agora-managed model")
426429
return self
427430

428431
class MiniMaxTTS(BaseTTS):

tests/custom/test_stt_language.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,14 @@ def test_default_interaction_language_is_sent_without_stt() -> None:
8181

8282

8383
def test_stt_vendor_params_match_documented_shapes() -> None:
84+
assert DeepgramSTT(model="nova-3", language="en-US").to_config()["params"] == {
85+
"model": "nova-3",
86+
"language": "en-US",
87+
}
88+
89+
with pytest.raises(Exception, match="api_key"):
90+
DeepgramSTT(model="enhanced")
91+
8492
assert DeepgramSTT(api_key="dg-key", language="en").to_config()["params"] == {
8593
"key": "dg-key",
8694
"language": "en",

tests/custom/test_tts_vendors.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import pytest
2+
13
from agora_agent import AmazonTTS, CartesiaTTS, ElevenLabsTTS, FishAudioTTS, GoogleTTS, HumeAITTS, MiniMaxTTS, MurfTTS, OpenAITTS, RimeTTS, SarvamTTS
24

35

@@ -97,3 +99,11 @@ def test_tts_vendor_params_match_generated_core_shapes() -> None:
9799
assert MurfTTS(key="murf-key").to_config()["params"] == {
98100
"api_key": "murf-key",
99101
}
102+
103+
104+
def test_tts_managed_mode_validation_matches_core_shapes() -> None:
105+
with pytest.raises(Exception, match="OpenAITTS requires api_key"):
106+
OpenAITTS(voice="coral", model="tts-1-hd")
107+
108+
with pytest.raises(Exception, match="MiniMaxTTS requires key"):
109+
MiniMaxTTS(model="speech-02-turbo")

0 commit comments

Comments
 (0)