Fix up

bcherry · bcherry · commit 41bf8430011b · 2026-04-29T21:30:17.000-07:00
diff --git a/src/agent.py b/src/agent.py
@@ -26,17 +26,9 @@ def __init__(self) -> None:
             You eagerly assist users with their questions by providing information from your extensive knowledge.
             Your responses are concise, to the point, and without any complex formatting or punctuation including emojis, asterisks, or other symbols.
             You are curious, friendly, and have a sense of humor.""",
-            # Speech-to-text (STT) is your agent's ears, turning the user's speech into text that the LLM can understand
-            # See all available models at https://docs.livekit.io/agents/models/stt/
-            stt=inference.STT(model="deepgram/nova-3", language="multi"),
             # A Large Language Model (LLM) is your agent's brain, processing user input and generating a response
             # See all available models at https://docs.livekit.io/agents/models/llm/
             llm=inference.LLM(model="openai/gpt-5.2-chat-latest"),
-            # Text-to-speech (TTS) is your agent's voice, turning the LLM's text into speech that the user can hear
-            # See all available models as well as voice selections at https://docs.livekit.io/agents/models/tts/
-            tts=inference.TTS(
-                model="cartesia/sonic-3", voice="9626c31c-bec5-4cca-baa8-f8ba9e84c8bc"
-            ),
         )
 
     # To add tools, use the @function_tool decorator.
@@ -75,7 +67,16 @@ async def my_agent(ctx: JobContext):
         "room": ctx.room.name,
     }
 
+    # Set up a voice AI pipeline using OpenAI, Cartesia, Deepgram, and the LiveKit turn detector
     session = AgentSession(
+        # Speech-to-text (STT) is your agent's ears, turning the user's speech into text that the LLM can understand
+        # See all available models at https://docs.livekit.io/agents/models/stt/
+        stt=inference.STT(model="deepgram/nova-3", language="multi"),
+        # Text-to-speech (TTS) is your agent's voice, turning the LLM's text into speech that the user can hear
+        # See all available models as well as voice selections at https://docs.livekit.io/agents/models/tts/
+        tts=inference.TTS(
+            model="cartesia/sonic-3", voice="9626c31c-bec5-4cca-baa8-f8ba9e84c8bc"
+        ),
         # VAD and turn detection are used to determine when the user is speaking and when the agent should respond
         # See more at https://docs.livekit.io/agents/build/turns
         turn_detection=MultilingualModel(),
@@ -85,13 +86,13 @@ async def my_agent(ctx: JobContext):
         preemptive_generation=True,
     )
 
-    # To use a realtime model instead of a voice pipeline, override the LLM in Assistant
-    # with an OpenAI Realtime model.
+    # To use a realtime model instead of a voice pipeline, replace the LLM on Assistant
+    # with a RealtimeModel and remove the STT/TTS from this session.
     # (Note: This is for the OpenAI Realtime API. For other providers, see https://docs.livekit.io/agents/models/realtime/))
     # 1. Install livekit-agents[openai]
     # 2. Set OPENAI_API_KEY in .env.local
     # 3. Add `from livekit.plugins import openai` to the top of this file
-    # 4. In Assistant, replace the llm/stt/tts arguments with:
+    # 4. In Assistant, replace the llm argument with:
     #     llm=openai.realtime.RealtimeModel(voice="marin")
 
     # # Add a virtual avatar to the session, if desired
diff --git a/tests/test_agent.py b/tests/test_agent.py
@@ -5,7 +5,9 @@
 
 
 def _judge_llm() -> llm.LLM:
-    return inference.LLM(model="openai/gpt-5.1")
+    # We can use a different LLM to evaluate the agent's responses than the one used in the agent itself
+    # This allows you to use reasoning capabilities or larger models than would be practical for realtime chat
+    return inference.LLM(model="openai/gpt-5.2")
 
 
 @pytest.mark.asyncio