Skip to content

Commit 3ba60cd

Browse files
Release 0.0.14
1 parent 8fc16f2 commit 3ba60cd

8 files changed

Lines changed: 83 additions & 6 deletions

.fern/metadata.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"cliVersion": "1.9.2",
2+
"cliVersion": "3.35.2",
33
"generatorName": "fernapi/fern-python-sdk",
44
"generatorVersion": "4.37.0",
55
"generatorConfig": {

README.md

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ and multimodal flows (MLLM) for real-time audio processing.
1414
- [Installation](#installation)
1515
- [Reference](#reference)
1616
- [Mllm Flow Multimodal](#mllm-flow-multimodal)
17+
- [Mllm Flow Multimodal](#mllm-flow-multimodal)
1718
- [Usage](#usage)
1819
- [Async Client](#async-client)
1920
- [Exception Handling](#exception-handling)
@@ -104,6 +105,71 @@ client.agents.start(
104105
```
105106

106107

108+
## MLLM Flow (Multimodal)
109+
110+
For real-time audio processing using OpenAI's Realtime API or Google Gemini Live, use the MLLM (Multimodal Large Language Model) flow instead of the cascading ASR -> LLM -> TTS flow. See the [MLLM Overview](https://docs.agora.io/en/conversational-ai/models/mllm/overview) for more details.
111+
112+
```python
113+
from agoraio-sdk import Agora
114+
from agoraio-sdk.agents import (
115+
StartAgentsRequestProperties,
116+
StartAgentsRequestPropertiesAdvancedFeatures,
117+
StartAgentsRequestPropertiesMllm,
118+
StartAgentsRequestPropertiesMllmVendor,
119+
StartAgentsRequestPropertiesTts,
120+
StartAgentsRequestPropertiesTtsVendor,
121+
StartAgentsRequestPropertiesLlm,
122+
StartAgentsRequestPropertiesTurnDetection,
123+
StartAgentsRequestPropertiesTurnDetectionType,
124+
)
125+
126+
client = Agora(
127+
username="YOUR_APP_ID",
128+
password="YOUR_APP_CERTIFICATE",
129+
)
130+
131+
client.agents.start(
132+
appid="your_app_id",
133+
name="mllm_agent",
134+
properties=StartAgentsRequestProperties(
135+
channel="channel_name",
136+
token="your_token",
137+
agent_rtc_uid="1001",
138+
remote_rtc_uids=["1002"],
139+
idle_timeout=120,
140+
advanced_features=StartAgentsRequestPropertiesAdvancedFeatures(
141+
enable_mllm=True,
142+
),
143+
mllm=StartAgentsRequestPropertiesMllm(
144+
url="wss://api.openai.com/v1/realtime",
145+
api_key="<your_openai_api_key>",
146+
vendor=StartAgentsRequestPropertiesMllmVendor.OPENAI,
147+
params={
148+
"model": "gpt-4o-realtime-preview",
149+
"voice": "alloy",
150+
},
151+
input_modalities=["audio"],
152+
output_modalities=["text", "audio"],
153+
greeting_message="Hello! I'm ready to chat in real-time.",
154+
),
155+
turn_detection=StartAgentsRequestPropertiesTurnDetection(
156+
type=StartAgentsRequestPropertiesTurnDetectionType.SERVER_VAD,
157+
threshold=0.5,
158+
silence_duration_ms=500,
159+
),
160+
# TTS and LLM are still required but not used when MLLM is enabled
161+
tts=StartAgentsRequestPropertiesTts(
162+
vendor=StartAgentsRequestPropertiesTtsVendor.MICROSOFT,
163+
params={},
164+
),
165+
llm=StartAgentsRequestPropertiesLlm(
166+
url="https://api.openai.com/v1/chat/completions",
167+
),
168+
),
169+
)
170+
```
171+
172+
107173
## Usage
108174

109175
Instantiate and use the client with the following:

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ name = "agoraio-sdk"
33

44
[tool.poetry]
55
name = "agoraio-sdk"
6-
version = "0.1.1"
6+
version = "0.0.14"
77
description = ""
88
readme = "README.md"
99
authors = []

src/agoraio/agents/types/get_history_agents_response.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from ...core.pydantic_utilities import IS_PYDANTIC_V2
77
from ...core.unchecked_base_model import UncheckedBaseModel
88
from .get_history_agents_response_contents_item import GetHistoryAgentsResponseContentsItem
9+
from .get_history_agents_response_status import GetHistoryAgentsResponseStatus
910

1011

1112
class GetHistoryAgentsResponse(UncheckedBaseModel):
@@ -19,7 +20,7 @@ class GetHistoryAgentsResponse(UncheckedBaseModel):
1920
Agent creation timestamp.
2021
"""
2122

22-
status: typing.Optional[typing.Literal["RUNNING"]] = pydantic.Field(default=None)
23+
status: typing.Optional[GetHistoryAgentsResponseStatus] = pydantic.Field(default=None)
2324
"""
2425
Agent status. Only supports querying the running agent.
2526
"""
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# This file was auto-generated by Fern from our API Definition.
2+
3+
import typing
4+
5+
GetHistoryAgentsResponseStatus = typing.Union[typing.Literal["RUNNING"], typing.Any]

src/agoraio/agents/types/start_agents_request_properties_mllm.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import pydantic
66
from ...core.pydantic_utilities import IS_PYDANTIC_V2
77
from ...core.unchecked_base_model import UncheckedBaseModel
8+
from .start_agents_request_properties_mllm_style import StartAgentsRequestPropertiesMllmStyle
89
from .start_agents_request_properties_mllm_vendor import StartAgentsRequestPropertiesMllmVendor
910

1011

@@ -58,7 +59,7 @@ class StartAgentsRequestPropertiesMllm(UncheckedBaseModel):
5859
- `vertexai`: Use this for Google Gemini Live
5960
"""
6061

61-
style: typing.Optional[typing.Literal["openai"]] = pydantic.Field(default=None)
62+
style: typing.Optional[StartAgentsRequestPropertiesMllmStyle] = pydantic.Field(default=None)
6263
"""
6364
The request style for MLLM completion:
6465
- `openai`: For OpenAI Realtime API format
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# This file was auto-generated by Fern from our API Definition.
2+
3+
import typing
4+
5+
StartAgentsRequestPropertiesMllmStyle = typing.Union[typing.Literal["openai"], typing.Any]

src/agoraio/core/client_wrapper.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,9 @@ def __init__(
2424

2525
def get_headers(self) -> typing.Dict[str, str]:
2626
headers: typing.Dict[str, str] = {
27-
"User-Agent": "agoraio-sdk/0.1.1",
2827
"X-Fern-Language": "Python",
2928
"X-Fern-SDK-Name": "agoraio-sdk",
30-
"X-Fern-SDK-Version": "0.1.1",
29+
"X-Fern-SDK-Version": "0.0.14",
3130
**(self.get_custom_headers() or {}),
3231
}
3332
headers["Authorization"] = httpx.BasicAuth(self._get_username(), self._get_password())._auth_header

0 commit comments

Comments
 (0)