| sidebar_position | 1 |
|---|---|
| title | Cascading Flow (ASR → LLM → TTS) |
| description | Build a voice agent using Speech-to-Text, a text LLM, and Text-to-Speech. |
The cascading flow is the most common pattern for building voice agents. Audio from a user is transcribed by an STT (ASR) vendor, the transcript is sent to an LLM for a response, and the response is rendered as audio by a TTS vendor.
User audio → STT → LLM → TTS → Agent audio
from agora_agent import Agent, Agora, Area, DeepgramSTT, ElevenLabsTTS, OpenAI
import time
client = Agora(
area=Area.US,
app_id='your-app-id',
app_certificate='your-app-certificate',
)
agent = (
Agent(client=client)
.with_llm(OpenAI(
api_key='your-openai-key',
base_url='https://api.openai.com/v1/chat/completions',
model='gpt-4o-mini',
system_messages=[{'role': 'system', 'content': 'You are a friendly customer support agent.'}],
))
.with_tts(ElevenLabsTTS(key='your-elevenlabs-key', model_id='eleven_flash_v2_5', voice_id='your-voice-id', base_url='wss://api.elevenlabs.io/v1', sample_rate=24000))
.with_stt(DeepgramSTT(api_key='your-deepgram-key', language='en-US', model='nova-2'))
)
session = agent.create_session(channel=f"demo-channel-{int(time.time())}", agent_uid='1', remote_uids=['100'], name=f"conversation-{int(time.time())}")
agent_id = session.start()
session.say('Welcome! How can I assist you today?')
# ... agent listens and responds automatically ...
session.stop()import asyncio
from agora_agent import Agent, AsyncAgora, Area, DeepgramSTT, ElevenLabsTTS, OpenAI
import time
async def main():
client = AsyncAgora(
area=Area.US,
app_id='your-app-id',
app_certificate='your-app-certificate',
)
agent = (
Agent(client=client)
.with_llm(OpenAI(
api_key='your-openai-key',
base_url='https://api.openai.com/v1/chat/completions',
model='gpt-4o-mini',
system_messages=[{'role': 'system', 'content': 'You are a friendly customer support agent.'}],
))
.with_tts(ElevenLabsTTS(key='your-elevenlabs-key', model_id='eleven_flash_v2_5', voice_id='your-voice-id', base_url='wss://api.elevenlabs.io/v1', sample_rate=24000))
.with_stt(DeepgramSTT(api_key='your-deepgram-key', language='en-US', model='nova-2'))
)
session = agent.create_session(channel=f"demo-channel-{int(time.time())}", agent_uid='1', remote_uids=['100'], name=f"conversation-{int(time.time())}")
agent_id = await session.start()
await session.say('Welcome! How can I assist you today?')
# ... agent listens and responds automatically ...
await session.stop()
asyncio.run(main())This combination keeps everything within the Azure ecosystem:
from agora_agent import Agent, Agora, Area, AzureOpenAI, MicrosoftTTS, MicrosoftSTT
import time
client = Agora(
area=Area.EU,
app_id='your-app-id',
app_certificate='your-app-certificate',
)
agent = (
Agent(client=client)
.with_llm(AzureOpenAI(
api_key='your-azure-key',
endpoint='https://your-resource.openai.azure.com',
deployment_name='gpt-4o-mini',
system_messages=[{'role': 'system', 'content': 'You are a helpful assistant for enterprise customers.'}],
))
.with_tts(MicrosoftTTS(
key='your-azure-speech-key',
region='eastus',
voice_name='en-US-JennyNeural',
sample_rate=24000,
))
.with_stt(MicrosoftSTT(
key='your-azure-speech-key',
region='eastus',
language='en-US',
))
)
session = agent.create_session(channel=f"demo-channel-{int(time.time())}", agent_uid='1', remote_uids=['100'], name=f"conversation-{int(time.time())}")
agent_id = session.start()
session.say('Hello! I am your enterprise assistant.')
session.stop()All LLM vendors support optional parameters for fine-tuning:
from agora_agent import OpenAI
llm = OpenAI(
api_key='your-openai-key',
base_url='https://api.openai.com/v1/chat/completions',
model='gpt-4o-mini',
temperature=0.7,
top_p=0.9,
max_tokens=1024,
)Configure greetings on the LLM vendor so message ownership stays with the LLM configuration:
from agora_agent import Agent, Agora, Area, OpenAI
client = Agora(area=Area.US, app_id='your-app-id', app_certificate='your-app-certificate')
agent = Agent(client=client).with_llm(OpenAI(
api_key='your-openai-key',
base_url='https://api.openai.com/v1/chat/completions',
model='gpt-4o-mini',
system_messages=[{'role': 'system', 'content': 'You are a helpful assistant.'}],
greeting_message='Hi there! What can I do for you?',
))