Skip to content

Commit e0b224e

Browse files
SDK regeneration
Unable to analyze changes with AI, incrementing PATCH version.
1 parent c510c70 commit e0b224e

32 files changed

Lines changed: 1009 additions & 164 deletions

README.md

Lines changed: 80 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ and multimodal flows (MLLM) for real-time audio processing.
1414
- [Installation](#installation)
1515
- [Reference](#reference)
1616
- [Mllm Flow Multimodal](#mllm-flow-multimodal)
17+
- [Mllm Flow Multimodal](#mllm-flow-multimodal)
1718
- [Usage](#usage)
1819
- [Async Client](#async-client)
1920
- [Exception Handling](#exception-handling)
@@ -104,18 +105,82 @@ client.agents.start(
104105
```
105106

106107

108+
## MLLM Flow (Multimodal)
109+
110+
For real-time audio processing using OpenAI's Realtime API or Google Gemini Live, use the MLLM (Multimodal Large Language Model) flow instead of the cascading ASR -> LLM -> TTS flow. See the [MLLM Overview](https://docs.agora.io/en/conversational-ai/models/mllm/overview) for more details.
111+
112+
```python
113+
from agoraio-sdk import Agora
114+
from agoraio-sdk.agents import (
115+
StartAgentsRequestProperties,
116+
StartAgentsRequestPropertiesAdvancedFeatures,
117+
StartAgentsRequestPropertiesMllm,
118+
StartAgentsRequestPropertiesMllmVendor,
119+
StartAgentsRequestPropertiesTts,
120+
StartAgentsRequestPropertiesTtsVendor,
121+
StartAgentsRequestPropertiesLlm,
122+
StartAgentsRequestPropertiesTurnDetection,
123+
StartAgentsRequestPropertiesTurnDetectionType,
124+
)
125+
126+
client = Agora(
127+
username="YOUR_APP_ID",
128+
password="YOUR_APP_CERTIFICATE",
129+
)
130+
131+
client.agents.start(
132+
appid="your_app_id",
133+
name="mllm_agent",
134+
properties=StartAgentsRequestProperties(
135+
channel="channel_name",
136+
token="your_token",
137+
agent_rtc_uid="1001",
138+
remote_rtc_uids=["1002"],
139+
idle_timeout=120,
140+
advanced_features=StartAgentsRequestPropertiesAdvancedFeatures(
141+
enable_mllm=True,
142+
),
143+
mllm=StartAgentsRequestPropertiesMllm(
144+
url="wss://api.openai.com/v1/realtime",
145+
api_key="<your_openai_api_key>",
146+
vendor=StartAgentsRequestPropertiesMllmVendor.OPENAI,
147+
params={
148+
"model": "gpt-4o-realtime-preview",
149+
"voice": "alloy",
150+
},
151+
input_modalities=["audio"],
152+
output_modalities=["text", "audio"],
153+
greeting_message="Hello! I'm ready to chat in real-time.",
154+
),
155+
turn_detection=StartAgentsRequestPropertiesTurnDetection(
156+
type=StartAgentsRequestPropertiesTurnDetectionType.SERVER_VAD,
157+
threshold=0.5,
158+
silence_duration_ms=500,
159+
),
160+
# TTS and LLM are still required but not used when MLLM is enabled
161+
tts=StartAgentsRequestPropertiesTts(
162+
vendor=StartAgentsRequestPropertiesTtsVendor.MICROSOFT,
163+
params={},
164+
),
165+
llm=StartAgentsRequestPropertiesLlm(
166+
url="https://api.openai.com/v1/chat/completions",
167+
),
168+
),
169+
)
170+
```
171+
172+
107173
## Usage
108174

109175
Instantiate and use the client with the following:
110176

111177
```python
112-
from agoraio import Agora
178+
from agoraio import Agora, MicrosoftTtsParams, Tts_Microsoft
113179
from agoraio.agents import (
114180
StartAgentsRequestProperties,
115181
StartAgentsRequestPropertiesAdvancedFeatures,
116182
StartAgentsRequestPropertiesAsr,
117183
StartAgentsRequestPropertiesLlm,
118-
StartAgentsRequestPropertiesTts,
119184
)
120185

121186
client = Agora(
@@ -137,13 +202,12 @@ client.agents.start(
137202
asr=StartAgentsRequestPropertiesAsr(
138203
language="en-US",
139204
),
140-
tts=StartAgentsRequestPropertiesTts(
141-
vendor="microsoft",
142-
params={
143-
"key": "<your_tts_api_key>",
144-
"region": "eastus",
145-
"voice_name": "en-US-AndrewMultilingualNeural",
146-
},
205+
tts=Tts_Microsoft(
206+
params=MicrosoftTtsParams(
207+
key="key",
208+
region="region",
209+
voice_name="voice_name",
210+
),
147211
),
148212
llm=StartAgentsRequestPropertiesLlm(
149213
url="https://api.openai.com/v1/chat/completions",
@@ -167,13 +231,12 @@ The SDK also exports an `async` client so that you can make non-blocking calls t
167231
```python
168232
import asyncio
169233

170-
from agoraio import AsyncAgora
234+
from agoraio import AsyncAgora, MicrosoftTtsParams, Tts_Microsoft
171235
from agoraio.agents import (
172236
StartAgentsRequestProperties,
173237
StartAgentsRequestPropertiesAdvancedFeatures,
174238
StartAgentsRequestPropertiesAsr,
175239
StartAgentsRequestPropertiesLlm,
176-
StartAgentsRequestPropertiesTts,
177240
)
178241

179242
client = AsyncAgora(
@@ -198,13 +261,12 @@ async def main() -> None:
198261
asr=StartAgentsRequestPropertiesAsr(
199262
language="en-US",
200263
),
201-
tts=StartAgentsRequestPropertiesTts(
202-
vendor="microsoft",
203-
params={
204-
"key": "<your_tts_api_key>",
205-
"region": "eastus",
206-
"voice_name": "en-US-AndrewMultilingualNeural",
207-
},
264+
tts=Tts_Microsoft(
265+
params=MicrosoftTtsParams(
266+
key="key",
267+
region="region",
268+
voice_name="voice_name",
269+
),
208270
),
209271
llm=StartAgentsRequestPropertiesLlm(
210272
url="https://api.openai.com/v1/chat/completions",

changelog.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
## 0.0.10 - 2026-01-13
2+
* SDK regeneration
3+
* Unable to analyze changes with AI, incrementing PATCH version.
4+
15
## 0.0.9 - 2026-01-09
26
* SDK regeneration
37
* Unable to analyze changes with AI, incrementing PATCH version.

poetry.lock

Lines changed: 48 additions & 43 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ name = "agoraio-sdk"
33

44
[tool.poetry]
55
name = "agoraio-sdk"
6-
version = "0.0.9"
6+
version = "0.0.10"
77
description = ""
88
readme = "README.md"
99
authors = []

0 commit comments

Comments
 (0)