Skip to content

Commit 1d51b55

Browse files
Merge pull request #22 from AgoraIO-Conversational-AI/hotfix/v1.3.2
Hotfix/v1.3.2
2 parents 6d341ca + 4b3316d commit 1d51b55

6 files changed

Lines changed: 222 additions & 26 deletions

File tree

README.md

Lines changed: 136 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
[![fern shield](https://img.shields.io/badge/%F0%9F%8C%BF-Built%20with%20Fern-brightgreen)](https://buildwithfern.com?utm_source=github&utm_medium=github&utm_campaign=readme&utm_source=https%3A%2F%2Fgithub.com%2FAgoraIO-Conversational-AI%2Fagent-server-sdk-python)
44
[![pypi](https://img.shields.io/pypi/v/agora-agent-server-sdk)](https://pypi.python.org/pypi/agora-agent-server-sdk)
55

6-
The Agora Conversational AI SDK provides convenient access to the Agora Conversational AI APIs, enabling you to build voice-powered AI agents with support for both cascading flows (ASR -> LLM -> TTS) and multimodal flows (MLLM) for real-time audio processing.
6+
The Agora Agent Server SDK for Python lets you build real-time voice agents on Agora Conversational AI with a high-level `Agent` / `AgentSession` API and a generated low-level REST client.
77

88
## Requirements
99

@@ -17,55 +17,170 @@ pip install agora-agent-server-sdk
1717

1818
## Quick Start
1919

20-
Minimal builder-based example using supported preset-backed models with no vendor API keys:
20+
The recommended onboarding path is a server-side builder flow: define the agent once, configure preset-backed providers in the builder, and let AgentKit infer the reseller `preset` values when the session starts.
2121

2222
```python
23+
import os
24+
import time
25+
2326
from agora_agent import Agora, Area
24-
from agora_agent.agentkit import Agent, DeepgramSTT, OpenAI, OpenAITTS
27+
from agora_agent.agentkit import (
28+
Agent,
29+
DataChannel,
30+
DeepgramSTT,
31+
MiniMaxTTS,
32+
OpenAI,
33+
expires_in_hours,
34+
)
35+
36+
AGENT_PROMPT = (
37+
"You are a concise, technically credible voice assistant. "
38+
"Keep replies short unless the user asks for detail."
39+
)
40+
41+
GREETING = "Hi there! I am your Agora voice assistant. How can I help?"
42+
43+
44+
def start_conversation() -> str:
45+
app_id = os.environ["AGORA_APP_ID"]
46+
app_certificate = os.environ["AGORA_APP_CERTIFICATE"]
2547

26-
def main() -> None:
2748
client = Agora(
2849
area=Area.US,
29-
app_id="your-app-id",
30-
app_certificate="your-app-certificate",
50+
app_id=app_id,
51+
app_certificate=app_certificate,
3152
)
3253

3354
agent = Agent(
34-
instructions="You are a concise voice assistant.",
35-
greeting="Hello! How can I help you today?",
55+
name=f"conversation-{int(time.time())}",
56+
instructions=AGENT_PROMPT,
57+
greeting=GREETING,
58+
failure_message="Please wait a moment.",
59+
max_history=50,
60+
turn_detection={
61+
"config": {
62+
"speech_threshold": 0.5,
63+
"start_of_speech": {
64+
"mode": "vad",
65+
"vad_config": {
66+
"interrupt_duration_ms": 160,
67+
"prefix_padding_ms": 300,
68+
},
69+
},
70+
"end_of_speech": {
71+
"mode": "vad",
72+
"vad_config": {
73+
"silence_duration_ms": 480,
74+
},
75+
},
76+
},
77+
},
78+
advanced_features={
79+
"enable_rtm": True,
80+
"enable_tools": True,
81+
},
82+
parameters={
83+
"data_channel": DataChannel.RTM,
84+
"enable_error_message": True,
85+
},
3686
).with_stt(
37-
DeepgramSTT(model="nova-3")
87+
DeepgramSTT(
88+
model="nova-3",
89+
language="en",
90+
)
3891
).with_llm(
39-
OpenAI(model="gpt-5-mini")
92+
OpenAI(
93+
model="gpt-4o-mini",
94+
greeting_message=GREETING,
95+
failure_message="Please wait a moment.",
96+
max_history=15,
97+
params={
98+
"max_tokens": 1024,
99+
"temperature": 0.7,
100+
"top_p": 0.95,
101+
},
102+
)
40103
).with_tts(
41-
OpenAITTS(voice="alloy")
104+
MiniMaxTTS(
105+
model="speech_2_6_turbo",
106+
voice_id="English_captivating_female1",
107+
)
42108
)
43109

44110
session = agent.create_session(
45111
client,
46-
channel="support-room-123",
47-
agent_uid="1",
48-
remote_uids=["100"],
112+
channel=f"demo-channel-{int(time.time())}",
113+
agent_uid="123456",
114+
remote_uids=["*"],
115+
idle_timeout=30,
116+
expires_in=expires_in_hours(1),
117+
debug=False,
49118
)
50119

51-
agent_id = session.start()
52-
print(agent_id)
53-
54-
55-
if __name__ == "__main__":
56-
main()
120+
return session.start()
57121
```
58122

59123
### Why no token or vendor key in the example?
60124

61-
The SDK-managed path is the recommended path. `Agora` generates the required ConvoAI REST auth and RTC join tokens automatically, and AgentKit infers the matching supported presets from the vendor configs when you omit vendor API keys.
125+
`Agora` generates the required ConvoAI REST auth and RTC join tokens automatically when you provide `app_id` and `app_certificate`. AgentKit then inspects the builder-provided vendor configs and infers the matching supported `preset` values for reseller-backed models, so you do not pass vendor API keys in this flow.
126+
127+
### BYOK version of the same builder flow
128+
129+
Use the same `Agent` builder shape, but provide credentials explicitly when you want vendor-managed billing and routing instead of Agora-managed presets.
130+
131+
```python
132+
agent = Agent(
133+
instructions=AGENT_PROMPT,
134+
greeting=GREETING,
135+
).with_stt(
136+
DeepgramSTT(
137+
api_key=os.environ["DEEPGRAM_API_KEY"],
138+
model="nova-3",
139+
language="en",
140+
)
141+
).with_llm(
142+
OpenAI(
143+
api_key=os.environ["OPENAI_API_KEY"],
144+
model="gpt-4o-mini",
145+
max_tokens=1024,
146+
temperature=0.7,
147+
top_p=0.95,
148+
)
149+
).with_tts(
150+
MiniMaxTTS(
151+
key=os.environ["MINIMAX_API_KEY"],
152+
group_id=os.environ["MINIMAX_GROUP_ID"],
153+
model="speech_2_6_turbo",
154+
voice_id="English_captivating_female1",
155+
url="wss://api-uw.minimax.io/ws/v1/t2a_v2",
156+
)
157+
)
158+
```
62159

63160
## BYOK
64161

65162
If you want to bring your own vendor credentials instead of using Agora-managed presets, use the BYOK guide:
66163

67164
- [BYOK Guide](./docs/guides/byok.md)
68165

166+
## MLLM (Realtime / Multimodal)
167+
168+
Use `with_mllm()` for OpenAI Realtime or Gemini Live. No STT, LLM, or TTS vendor is needed when MLLM mode is enabled.
169+
170+
```python
171+
from agora_agent.agentkit import Agent, OpenAIRealtime
172+
173+
agent = Agent(name="realtime-assistant").with_mllm(
174+
OpenAIRealtime(
175+
api_key=os.environ["OPENAI_API_KEY"],
176+
model="gpt-4o-realtime-preview",
177+
greeting_message="Hello! Ready to chat.",
178+
)
179+
)
180+
```
181+
182+
See the [MLLM Flow guide](./docs/guides/mllm-flow.md) for full examples with Gemini Live and Vertex AI.
183+
69184
## Documentation
70185

71186
- [Overview](./docs/index.md)

src/agora_agent/agentkit/agent.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,10 @@ def to_properties(
536536
mllm_config = dict(self._mllm)
537537
if self._greeting:
538538
mllm_config.setdefault("greeting_message", self._greeting)
539+
if self._failure_message:
540+
mllm_config.setdefault("failure_message", self._failure_message)
541+
if self._max_history is not None:
542+
mllm_config.setdefault("max_history", self._max_history)
539543
base_kwargs["mllm"] = mllm_config
540544
return StartAgentsRequestProperties(**base_kwargs)
541545

src/agora_agent/agentkit/vendors/mllm.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ class OpenAIRealtimeOptions(BaseModel):
1616
output_modalities: Optional[List[str]] = Field(default=None, description="Output modalities")
1717
messages: Optional[List[Dict[str, Any]]] = Field(default=None, description="Conversation messages")
1818
params: Optional[Dict[str, Any]] = Field(default=None, description="Additional parameters")
19+
predefined_tools: Optional[List[str]] = Field(default=None, description="Predefined tools")
20+
failure_message: Optional[str] = Field(default=None, description="Message played on failure")
21+
max_history: Optional[int] = Field(default=None, description="Maximum conversation history length")
1922

2023
class OpenAIRealtime(BaseMLLM):
2124
def __init__(self, **kwargs: Any):
@@ -45,6 +48,12 @@ def to_config(self) -> Dict[str, Any]:
4548
config["output_modalities"] = self.options.output_modalities
4649
if self.options.messages is not None:
4750
config["messages"] = self.options.messages
51+
if self.options.predefined_tools is not None:
52+
config["predefined_tools"] = self.options.predefined_tools
53+
if self.options.failure_message is not None:
54+
config["failure_message"] = self.options.failure_message
55+
if self.options.max_history is not None:
56+
config["max_history"] = self.options.max_history
4857

4958
return config
5059

@@ -53,6 +62,7 @@ class VertexAIOptions(BaseModel):
5362
model_config = ConfigDict(extra="forbid")
5463

5564
model: str = Field(..., description="Model name")
65+
url: Optional[str] = Field(default=None, description="WebSocket URL")
5666
project_id: str = Field(..., description="Google Cloud project ID")
5767
location: str = Field(..., description="Google Cloud location/region")
5868
adc_credentials_string: str = Field(..., description="Application Default Credentials JSON string")
@@ -63,6 +73,9 @@ class VertexAIOptions(BaseModel):
6373
output_modalities: Optional[List[str]] = Field(default=None, description="Output modalities")
6474
messages: Optional[List[Dict[str, Any]]] = Field(default=None, description="Conversation messages")
6575
additional_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional parameters")
76+
predefined_tools: Optional[List[str]] = Field(default=None, description="Predefined tools")
77+
failure_message: Optional[str] = Field(default=None, description="Message played on failure")
78+
max_history: Optional[int] = Field(default=None, description="Maximum conversation history length")
6679

6780
class VertexAI(BaseMLLM):
6881
def __init__(self, **kwargs: Any):
@@ -89,6 +102,8 @@ def to_config(self) -> Dict[str, Any]:
89102
"params": params,
90103
}
91104

105+
if self.options.url is not None:
106+
config["url"] = self.options.url
92107
if self.options.greeting_message is not None:
93108
config["greeting_message"] = self.options.greeting_message
94109
if self.options.input_modalities is not None:
@@ -97,6 +112,12 @@ def to_config(self) -> Dict[str, Any]:
97112
config["output_modalities"] = self.options.output_modalities
98113
if self.options.messages is not None:
99114
config["messages"] = self.options.messages
115+
if self.options.predefined_tools is not None:
116+
config["predefined_tools"] = self.options.predefined_tools
117+
if self.options.failure_message is not None:
118+
config["failure_message"] = self.options.failure_message
119+
if self.options.max_history is not None:
120+
config["max_history"] = self.options.max_history
100121

101122
return config
102123

@@ -106,13 +127,17 @@ class GeminiLiveOptions(BaseModel):
106127

107128
api_key: str = Field(..., description="Google API key")
108129
model: str = Field(..., description="Gemini Live model name")
130+
url: Optional[str] = Field(default=None, description="WebSocket URL")
109131
instructions: Optional[str] = Field(default=None, description="System instructions")
110132
voice: Optional[str] = Field(default=None, description="Voice name")
111133
greeting_message: Optional[str] = Field(default=None, description="Agent greeting message")
112134
input_modalities: Optional[List[str]] = Field(default=None, description="Input modalities")
113135
output_modalities: Optional[List[str]] = Field(default=None, description="Output modalities")
114136
messages: Optional[List[Dict[str, Any]]] = Field(default=None, description="Conversation messages")
115137
additional_params: Optional[Dict[str, Any]] = Field(default=None, description="Additional parameters")
138+
predefined_tools: Optional[List[str]] = Field(default=None, description="Predefined tools")
139+
failure_message: Optional[str] = Field(default=None, description="Message played on failure")
140+
max_history: Optional[int] = Field(default=None, description="Maximum conversation history length")
116141

117142
class GeminiLive(BaseMLLM):
118143
def __init__(self, **kwargs: Any):
@@ -135,6 +160,8 @@ def to_config(self) -> Dict[str, Any]:
135160
"params": params,
136161
}
137162

163+
if self.options.url is not None:
164+
config["url"] = self.options.url
138165
if self.options.greeting_message is not None:
139166
config["greeting_message"] = self.options.greeting_message
140167
if self.options.input_modalities is not None:
@@ -143,5 +170,11 @@ def to_config(self) -> Dict[str, Any]:
143170
config["output_modalities"] = self.options.output_modalities
144171
if self.options.messages is not None:
145172
config["messages"] = self.options.messages
173+
if self.options.predefined_tools is not None:
174+
config["predefined_tools"] = self.options.predefined_tools
175+
if self.options.failure_message is not None:
176+
config["failure_message"] = self.options.failure_message
177+
if self.options.max_history is not None:
178+
config["max_history"] = self.options.max_history
146179

147180
return config

tests/agentkit/test_agent.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ def test_to_properties_generates_token_and_respects_mllm_vendor_precedence():
108108
agent = Agent(greeting="top hello", failure_message="top fail", max_history=9).with_mllm(
109109
OpenAIRealtime(
110110
api_key="key",
111+
url="wss://openai.example.com/realtime",
111112
greeting_message="vendor hello",
112113
)
113114
).with_advanced_features({"enable_mllm": True})
@@ -123,6 +124,7 @@ def test_to_properties_generates_token_and_respects_mllm_vendor_precedence():
123124
)
124125

125126
assert props["mllm"]["greeting_message"] == "vendor hello"
126-
assert "failure_message" not in props["mllm"]
127-
assert "max_history" not in props["mllm"]
127+
assert props["mllm"]["failure_message"] == "top fail"
128+
assert props["mllm"]["max_history"] == 9
129+
assert props["mllm"]["url"] == "wss://openai.example.com/realtime"
128130
assert isinstance(props["token"], str) and props["token"]

tests/agentkit/test_agentkit_parity.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,9 +137,13 @@ def test_gemini_live_matches_low_level_shape(self):
137137
config = GeminiLive(
138138
api_key="google-key",
139139
model="gemini-live-2.5-flash",
140+
url="wss://generativelanguage.googleapis.com/ws",
140141
instructions="You are concise.",
141142
voice="Aoede",
142143
greeting_message="Hello",
144+
predefined_tools=["_publish_message"],
145+
failure_message="Please try again.",
146+
max_history=8,
143147
additional_params={"temperature": 0.2},
144148
messages=[{"role": "user", "content": "Hi"}],
145149
).to_config()
@@ -150,6 +154,7 @@ def test_gemini_live_matches_low_level_shape(self):
150154
"vendor": "gemini",
151155
"style": "openai",
152156
"api_key": "google-key",
157+
"url": "wss://generativelanguage.googleapis.com/ws",
153158
"params": {
154159
"temperature": 0.2,
155160
"model": "gemini-live-2.5-flash",
@@ -158,6 +163,9 @@ def test_gemini_live_matches_low_level_shape(self):
158163
},
159164
"messages": [{"role": "user", "content": "Hi"}],
160165
"greeting_message": "Hello",
166+
"predefined_tools": ["_publish_message"],
167+
"failure_message": "Please try again.",
168+
"max_history": 8,
161169
},
162170
)
163171

0 commit comments

Comments
 (0)