|
1 | | -# Agora Agent Server SDK for Python |
| 1 | +# Agoraio Python Library |
2 | 2 |
|
3 | 3 | [](https://buildwithfern.com?utm_source=github&utm_medium=github&utm_campaign=readme&utm_source=https%3A%2F%2Fgithub.com%2FAgoraIO-Conversational-AI%2Fagent-server-sdk-python) |
4 | 4 | [](https://pypi.python.org/pypi/agora-agent-server-sdk) |
5 | 5 |
|
6 | | -The Agora Conversational AI SDK provides convenient access to the Agora Conversational AI APIs, |
7 | | -enabling you to build voice-powered AI agents with support for both cascading flows (ASR -> LLM -> TTS) |
| 6 | +The Agora Conversational AI SDK provides convenient access to the Agora Conversational AI APIs, |
| 7 | +enabling you to build voice-powered AI agents with support for both cascading flows (ASR -> LLM -> TTS) |
8 | 8 | and multimodal flows (MLLM) for real-time audio processing. |
9 | 9 |
|
| 10 | + |
10 | 11 | ## Table of Contents |
11 | 12 |
|
12 | 13 | - [Installation](#installation) |
13 | 14 | - [Quick Start](#quick-start) |
14 | 15 | - [Documentation](#documentation) |
15 | 16 | - [Reference](#reference) |
16 | 17 | - [Mllm Flow Multimodal](#mllm-flow-multimodal) |
| 18 | +- [Mllm Flow Multimodal](#mllm-flow-multimodal) |
17 | 19 | - [Usage](#usage) |
18 | 20 | - [Async Client](#async-client) |
19 | 21 | - [Exception Handling](#exception-handling) |
@@ -212,6 +214,71 @@ client.agents.start( |
212 | 214 | ) |
213 | 215 | ``` |
214 | 216 |
|
| 217 | +## MLLM Flow (Multimodal) |
| 218 | + |
| 219 | +For real-time audio processing using OpenAI's Realtime API or Google Gemini Live, use the MLLM (Multimodal Large Language Model) flow instead of the cascading ASR -> LLM -> TTS flow. See the [MLLM Overview](https://docs.agora.io/en/conversational-ai/models/mllm/overview) for more details. |
| 220 | + |
| 221 | +```python |
| 222 | +from agora-agent-server-sdk import Agora |
| 223 | +from agora-agent-server-sdk.agents import ( |
| 224 | + StartAgentsRequestProperties, |
| 225 | + StartAgentsRequestPropertiesAdvancedFeatures, |
| 226 | + StartAgentsRequestPropertiesMllm, |
| 227 | + StartAgentsRequestPropertiesMllmVendor, |
| 228 | + StartAgentsRequestPropertiesTts, |
| 229 | + StartAgentsRequestPropertiesTtsVendor, |
| 230 | + StartAgentsRequestPropertiesLlm, |
| 231 | + StartAgentsRequestPropertiesTurnDetection, |
| 232 | + StartAgentsRequestPropertiesTurnDetectionType, |
| 233 | +) |
| 234 | + |
| 235 | +client = Agora( |
| 236 | + customer_id="YOUR_CUSTOMER_ID", |
| 237 | + customer_secret="YOUR_CUSTOMER_SECRET", |
| 238 | +) |
| 239 | + |
| 240 | +client.agents.start( |
| 241 | + appid="your_app_id", |
| 242 | + name="mllm_agent", |
| 243 | + properties=StartAgentsRequestProperties( |
| 244 | + channel="channel_name", |
| 245 | + token="your_token", |
| 246 | + agent_rtc_uid="1001", |
| 247 | + remote_rtc_uids=["1002"], |
| 248 | + idle_timeout=120, |
| 249 | + advanced_features=StartAgentsRequestPropertiesAdvancedFeatures( |
| 250 | + enable_mllm=True, |
| 251 | + ), |
| 252 | + mllm=StartAgentsRequestPropertiesMllm( |
| 253 | + url="wss://api.openai.com/v1/realtime", |
| 254 | + api_key="<your_openai_api_key>", |
| 255 | + vendor=StartAgentsRequestPropertiesMllmVendor.OPENAI, |
| 256 | + params={ |
| 257 | + "model": "gpt-4o-realtime-preview", |
| 258 | + "voice": "alloy", |
| 259 | + }, |
| 260 | + input_modalities=["audio"], |
| 261 | + output_modalities=["text", "audio"], |
| 262 | + greeting_message="Hello! I'm ready to chat in real-time.", |
| 263 | + ), |
| 264 | + turn_detection=StartAgentsRequestPropertiesTurnDetection( |
| 265 | + type=StartAgentsRequestPropertiesTurnDetectionType.SERVER_VAD, |
| 266 | + threshold=0.5, |
| 267 | + silence_duration_ms=500, |
| 268 | + ), |
| 269 | + # TTS and LLM are still required but not used when MLLM is enabled |
| 270 | + tts=StartAgentsRequestPropertiesTts( |
| 271 | + vendor=StartAgentsRequestPropertiesTtsVendor.MICROSOFT, |
| 272 | + params={}, |
| 273 | + ), |
| 274 | + llm=StartAgentsRequestPropertiesLlm( |
| 275 | + url="https://api.openai.com/v1/chat/completions", |
| 276 | + ), |
| 277 | + ), |
| 278 | +) |
| 279 | +``` |
| 280 | + |
| 281 | + |
215 | 282 | ## Usage |
216 | 283 |
|
217 | 284 | Instantiate and use the client with the following: |
|
0 commit comments