From 60f844d5ebb5413acae6eff7eeac12180926958d Mon Sep 17 00:00:00 2001
From: "fern-api[bot]" <115122769+fern-api[bot]@users.noreply.github.com>
Date: Wed, 11 Mar 2026 14:01:53 +0000
Subject: [PATCH 1/4] SDK regeneration

---
 README.md                                     | 73 ++++++++++++++++++-
 pyproject.toml                                |  2 +-
 .../types/start_agents_request_properties.py  |  6 +-
 .../start_agents_request_properties_mllm.py   |  2 +-
 .../start_agents_request_properties_sal.py    |  2 +-
 src/agora_agent/core/client_wrapper.py        |  4 +-
 6 files changed, 78 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 3b661dd..44310b3 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,13 @@
-# Agora Agent Server SDK for Python
+# Agoraio Python Library
 
 [![fern shield](https://img.shields.io/badge/%F0%9F%8C%BF-Built%20with%20Fern-brightgreen)](https://buildwithfern.com?utm_source=github&utm_medium=github&utm_campaign=readme&utm_source=https%3A%2F%2Fgithub.com%2FAgoraIO-Conversational-AI%2Fagent-server-sdk-python)
 [![pypi](https://img.shields.io/pypi/v/agora-agent-server-sdk)](https://pypi.python.org/pypi/agora-agent-server-sdk)
 
-The Agora Conversational AI SDK provides convenient access to the Agora Conversational AI APIs,
-enabling you to build voice-powered AI agents with support for both cascading flows (ASR -> LLM -> TTS)
+The Agora Conversational AI SDK provides convenient access to the Agora Conversational AI APIs, 
+enabling you to build voice-powered AI agents with support for both cascading flows (ASR -> LLM -> TTS) 
 and multimodal flows (MLLM) for real-time audio processing.
 
+
 ## Table of Contents
 
 - [Installation](#installation)
@@ -14,6 +15,7 @@ and multimodal flows (MLLM) for real-time audio processing.
 - [Documentation](#documentation)
 - [Reference](#reference)
 - [Mllm Flow Multimodal](#mllm-flow-multimodal)
+- [Mllm Flow Multimodal](#mllm-flow-multimodal)
 - [Usage](#usage)
 - [Async Client](#async-client)
 - [Exception Handling](#exception-handling)
@@ -212,6 +214,71 @@ client.agents.start(
 )
 ```
 
+## MLLM Flow (Multimodal)
+
+For real-time audio processing using OpenAI's Realtime API or Google Gemini Live, use the MLLM (Multimodal Large Language Model) flow instead of the cascading ASR -> LLM -> TTS flow. See the [MLLM Overview](https://docs.agora.io/en/conversational-ai/models/mllm/overview) for more details.
+
+```python
+from agora-agent-server-sdk import Agora
+from agora-agent-server-sdk.agents import (
+    StartAgentsRequestProperties,
+    StartAgentsRequestPropertiesAdvancedFeatures,
+    StartAgentsRequestPropertiesMllm,
+    StartAgentsRequestPropertiesMllmVendor,
+    StartAgentsRequestPropertiesTts,
+    StartAgentsRequestPropertiesTtsVendor,
+    StartAgentsRequestPropertiesLlm,
+    StartAgentsRequestPropertiesTurnDetection,
+    StartAgentsRequestPropertiesTurnDetectionType,
+)
+
+client = Agora(
+    customer_id="YOUR_CUSTOMER_ID",
+    customer_secret="YOUR_CUSTOMER_SECRET",
+)
+
+client.agents.start(
+    appid="your_app_id",
+    name="mllm_agent",
+    properties=StartAgentsRequestProperties(
+        channel="channel_name",
+        token="your_token",
+        agent_rtc_uid="1001",
+        remote_rtc_uids=["1002"],
+        idle_timeout=120,
+        advanced_features=StartAgentsRequestPropertiesAdvancedFeatures(
+            enable_mllm=True,
+        ),
+        mllm=StartAgentsRequestPropertiesMllm(
+            url="wss://api.openai.com/v1/realtime",
+            api_key="<your_openai_api_key>",
+            vendor=StartAgentsRequestPropertiesMllmVendor.OPENAI,
+            params={
+                "model": "gpt-4o-realtime-preview",
+                "voice": "alloy",
+            },
+            input_modalities=["audio"],
+            output_modalities=["text", "audio"],
+            greeting_message="Hello! I'm ready to chat in real-time.",
+        ),
+        turn_detection=StartAgentsRequestPropertiesTurnDetection(
+            type=StartAgentsRequestPropertiesTurnDetectionType.SERVER_VAD,
+            threshold=0.5,
+            silence_duration_ms=500,
+        ),
+        # TTS and LLM are still required but not used when MLLM is enabled
+        tts=StartAgentsRequestPropertiesTts(
+            vendor=StartAgentsRequestPropertiesTtsVendor.MICROSOFT,
+            params={},
+        ),
+        llm=StartAgentsRequestPropertiesLlm(
+            url="https://api.openai.com/v1/chat/completions",
+        ),
+    ),
+)
+```
+
+
 ## Usage
 
 Instantiate and use the client with the following:
diff --git a/pyproject.toml b/pyproject.toml
index 8f51a0e..8b2dcf0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,7 +3,7 @@ name = "agora-agent-server-sdk"
 
 [tool.poetry]
 name = "agora-agent-server-sdk"
-version = "1.1.0"
+version = "1.1.1"
 description = ""
 readme = "README.md"
 authors = []
diff --git a/src/agora_agent/agents/types/start_agents_request_properties.py b/src/agora_agent/agents/types/start_agents_request_properties.py
index 9f8b762..538a2b0 100644
--- a/src/agora_agent/agents/types/start_agents_request_properties.py
+++ b/src/agora_agent/agents/types/start_agents_request_properties.py
@@ -71,19 +71,19 @@ class StartAgentsRequestProperties(UncheckedBaseModel):
     Automatic Speech Recognition (ASR) configuration.
     """
 
-    tts: Tts = pydantic.Field()
+    tts: typing.Optional[Tts] = pydantic.Field(default=None)
     """
     Text-to-speech (TTS) module configuration.
     """
 
-    llm: StartAgentsRequestPropertiesLlm = pydantic.Field()
+    llm: typing.Optional[StartAgentsRequestPropertiesLlm] = pydantic.Field(default=None)
     """
     Large language model (LLM) configuration.
     """
 
     mllm: typing.Optional[StartAgentsRequestPropertiesMllm] = pydantic.Field(default=None)
     """
-    Multimodal Large Language Model (MLLM) configuration for real-time audio and text processing.
+    Multimodal Large Language Model (MLLM) configuration for real-time audio and text processing. MLLM is an exclusive alternative to the standard `asr` + `llm` + `tts` pipeline.
     """
 
     avatar: typing.Optional[StartAgentsRequestPropertiesAvatar] = pydantic.Field(default=None)
diff --git a/src/agora_agent/agents/types/start_agents_request_properties_mllm.py b/src/agora_agent/agents/types/start_agents_request_properties_mllm.py
index 881c155..c0b9f61 100644
--- a/src/agora_agent/agents/types/start_agents_request_properties_mllm.py
+++ b/src/agora_agent/agents/types/start_agents_request_properties_mllm.py
@@ -10,7 +10,7 @@
 
 class StartAgentsRequestPropertiesMllm(UncheckedBaseModel):
     """
-    Multimodal Large Language Model (MLLM) configuration for real-time audio and text processing.
+    Multimodal Large Language Model (MLLM) configuration for real-time audio and text processing. MLLM is an exclusive alternative to the standard `asr` + `llm` + `tts` pipeline.
     """
 
     url: typing.Optional[str] = pydantic.Field(default=None)
diff --git a/src/agora_agent/agents/types/start_agents_request_properties_sal.py b/src/agora_agent/agents/types/start_agents_request_properties_sal.py
index c39b299..1d8b0b5 100644
--- a/src/agora_agent/agents/types/start_agents_request_properties_sal.py
+++ b/src/agora_agent/agents/types/start_agents_request_properties_sal.py
@@ -29,7 +29,7 @@ class StartAgentsRequestPropertiesSal(UncheckedBaseModel):
     > - For a registered voiceprint, ensure that:
     >   - Size: A single voiceprint file must not exceed 2 MB.
     >   - Duration: 10 to 15 seconds, with at least 8 seconds of effective audio without silent segments.
-    >   - Format: 16kHz sampling rate, 16-bit depth, mono PCM audio file. The file name extension must be ".pcm".      
+    >   - Format: 16kHz sampling rate, 16-bit depth, mono PCM audio file. The file name extension must be ".pcm".
     """
 
     if IS_PYDANTIC_V2:
diff --git a/src/agora_agent/core/client_wrapper.py b/src/agora_agent/core/client_wrapper.py
index 9bd9ac7..d28bbbf 100644
--- a/src/agora_agent/core/client_wrapper.py
+++ b/src/agora_agent/core/client_wrapper.py
@@ -26,10 +26,10 @@ def __init__(
 
     def get_headers(self) -> typing.Dict[str, str]:
         headers: typing.Dict[str, str] = {
-            "User-Agent": "agora-agent-server-sdk/1.1.0",
+            "User-Agent": "agora-agent-server-sdk/1.1.1",
             "X-Fern-Language": "Python",
             "X-Fern-SDK-Name": "agora-agent-server-sdk",
-            "X-Fern-SDK-Version": "1.1.0",
+            "X-Fern-SDK-Version": "1.1.1",
             **(self.get_custom_headers() or {}),
         }
         headers["Authorization"] = httpx.BasicAuth(self._get_username(), self._get_password())._auth_header

From ea2dd59e481a6b6e7dfb973f55c62d692d4df302 Mon Sep 17 00:00:00 2001
From: Hermes <digitallysavvy@users.noreply.github.com>
Date: Wed, 11 Mar 2026 10:24:17 -0400
Subject: [PATCH 2/4] Update project title in README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index eccf502..e01e338 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# Agoraio Python Library
+# Agora Agent Server SDK for Python
 
 [![fern shield](https://img.shields.io/badge/%F0%9F%8C%BF-Built%20with%20Fern-brightgreen)](https://buildwithfern.com?utm_source=github&utm_medium=github&utm_campaign=readme&utm_source=https%3A%2F%2Fgithub.com%2FAgoraIO-Conversational-AI%2Fagent-server-sdk-python)
 [![pypi](https://img.shields.io/pypi/v/agent-server-sdk-python)](https://pypi.python.org/pypi/agent-server-sdk-python)

From 848bcaa75f6dfe32706121f11a8a4613f99eda93 Mon Sep 17 00:00:00 2001
From: Hermes <digitallysavvy@users.noreply.github.com>
Date: Wed, 11 Mar 2026 10:27:02 -0400
Subject: [PATCH 3/4] Rename project to 'agent-server-sdk-python'

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 4e3c1f3..1747f0f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "agent-server-sdk-python"
 
 [tool.poetry]
-name = "agora-agent-server-sdk"
+name = "agent-server-sdk-python"
 version = "1.1.1"
 description = ""
 readme = "README.md"

From 5fdac4e29edec995b608101c91615ff8ec789451 Mon Sep 17 00:00:00 2001
From: Hermes <digitallysavvy@users.noreply.github.com>
Date: Wed, 11 Mar 2026 10:40:51 -0400
Subject: [PATCH 4/4] Rename project to agora-agent-server-sdk

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 1747f0f..8b2dcf0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,8 +1,8 @@
 [project]
-name = "agent-server-sdk-python"
+name = "agora-agent-server-sdk"
 
 [tool.poetry]
-name = "agent-server-sdk-python"
+name = "agora-agent-server-sdk"
 version = "1.1.1"
 description = ""
 readme = "README.md"