@@ -26,17 +26,9 @@ def __init__(self) -> None:
2626 You eagerly assist users with their questions by providing information from your extensive knowledge.
2727 Your responses are concise, to the point, and without any complex formatting or punctuation including emojis, asterisks, or other symbols.
2828 You are curious, friendly, and have a sense of humor.""" ,
29- # Speech-to-text (STT) is your agent's ears, turning the user's speech into text that the LLM can understand
30- # See all available models at https://docs.livekit.io/agents/models/stt/
31- stt = inference .STT (model = "deepgram/nova-3" , language = "multi" ),
3229 # A Large Language Model (LLM) is your agent's brain, processing user input and generating a response
3330 # See all available models at https://docs.livekit.io/agents/models/llm/
3431 llm = inference .LLM (model = "openai/gpt-5.2-chat-latest" ),
35- # Text-to-speech (TTS) is your agent's voice, turning the LLM's text into speech that the user can hear
36- # See all available models as well as voice selections at https://docs.livekit.io/agents/models/tts/
37- tts = inference .TTS (
38- model = "cartesia/sonic-3" , voice = "9626c31c-bec5-4cca-baa8-f8ba9e84c8bc"
39- ),
4032 )
4133
4234 # To add tools, use the @function_tool decorator.
@@ -75,7 +67,16 @@ async def my_agent(ctx: JobContext):
7567 "room" : ctx .room .name ,
7668 }
7769
70+ # Set up a voice AI pipeline using OpenAI, Cartesia, Deepgram, and the LiveKit turn detector
7871 session = AgentSession (
72+ # Speech-to-text (STT) is your agent's ears, turning the user's speech into text that the LLM can understand
73+ # See all available models at https://docs.livekit.io/agents/models/stt/
74+ stt = inference .STT (model = "deepgram/nova-3" , language = "multi" ),
75+ # Text-to-speech (TTS) is your agent's voice, turning the LLM's text into speech that the user can hear
76+ # See all available models as well as voice selections at https://docs.livekit.io/agents/models/tts/
77+ tts = inference .TTS (
78+ model = "cartesia/sonic-3" , voice = "9626c31c-bec5-4cca-baa8-f8ba9e84c8bc"
79+ ),
7980 # VAD and turn detection are used to determine when the user is speaking and when the agent should respond
8081 # See more at https://docs.livekit.io/agents/build/turns
8182 turn_detection = MultilingualModel (),
@@ -85,13 +86,13 @@ async def my_agent(ctx: JobContext):
8586 preemptive_generation = True ,
8687 )
8788
88- # To use a realtime model instead of a voice pipeline, override the LLM in Assistant
89- # with an OpenAI Realtime model .
89+ # To use a realtime model instead of a voice pipeline, replace the LLM on Assistant
90+ # with a RealtimeModel and remove the STT/TTS from this session .
9091 # (Note: This is for the OpenAI Realtime API. For other providers, see https://docs.livekit.io/agents/models/realtime/))
9192 # 1. Install livekit-agents[openai]
9293 # 2. Set OPENAI_API_KEY in .env.local
9394 # 3. Add `from livekit.plugins import openai` to the top of this file
94- # 4. In Assistant, replace the llm/stt/tts arguments with:
95+ # 4. In Assistant, replace the llm argument with:
9596 # llm=openai.realtime.RealtimeModel(voice="marin")
9697
9798 # # Add a virtual avatar to the session, if desired
0 commit comments