Skip to content

Commit 41bf843

Browse files
committed
Fix up
1 parent bdcd506 commit 41bf843

2 files changed

Lines changed: 15 additions & 12 deletions

File tree

src/agent.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,17 +26,9 @@ def __init__(self) -> None:
2626
You eagerly assist users with their questions by providing information from your extensive knowledge.
2727
Your responses are concise, to the point, and without any complex formatting or punctuation including emojis, asterisks, or other symbols.
2828
You are curious, friendly, and have a sense of humor.""",
29-
# Speech-to-text (STT) is your agent's ears, turning the user's speech into text that the LLM can understand
30-
# See all available models at https://docs.livekit.io/agents/models/stt/
31-
stt=inference.STT(model="deepgram/nova-3", language="multi"),
3229
# A Large Language Model (LLM) is your agent's brain, processing user input and generating a response
3330
# See all available models at https://docs.livekit.io/agents/models/llm/
3431
llm=inference.LLM(model="openai/gpt-5.2-chat-latest"),
35-
# Text-to-speech (TTS) is your agent's voice, turning the LLM's text into speech that the user can hear
36-
# See all available models as well as voice selections at https://docs.livekit.io/agents/models/tts/
37-
tts=inference.TTS(
38-
model="cartesia/sonic-3", voice="9626c31c-bec5-4cca-baa8-f8ba9e84c8bc"
39-
),
4032
)
4133

4234
# To add tools, use the @function_tool decorator.
@@ -75,7 +67,16 @@ async def my_agent(ctx: JobContext):
7567
"room": ctx.room.name,
7668
}
7769

70+
# Set up a voice AI pipeline using OpenAI, Cartesia, Deepgram, and the LiveKit turn detector
7871
session = AgentSession(
72+
# Speech-to-text (STT) is your agent's ears, turning the user's speech into text that the LLM can understand
73+
# See all available models at https://docs.livekit.io/agents/models/stt/
74+
stt=inference.STT(model="deepgram/nova-3", language="multi"),
75+
# Text-to-speech (TTS) is your agent's voice, turning the LLM's text into speech that the user can hear
76+
# See all available models as well as voice selections at https://docs.livekit.io/agents/models/tts/
77+
tts=inference.TTS(
78+
model="cartesia/sonic-3", voice="9626c31c-bec5-4cca-baa8-f8ba9e84c8bc"
79+
),
7980
# VAD and turn detection are used to determine when the user is speaking and when the agent should respond
8081
# See more at https://docs.livekit.io/agents/build/turns
8182
turn_detection=MultilingualModel(),
@@ -85,13 +86,13 @@ async def my_agent(ctx: JobContext):
8586
preemptive_generation=True,
8687
)
8788

88-
# To use a realtime model instead of a voice pipeline, override the LLM in Assistant
89-
# with an OpenAI Realtime model.
89+
# To use a realtime model instead of a voice pipeline, replace the LLM on Assistant
90+
# with a RealtimeModel and remove the STT/TTS from this session.
9091
# (Note: This is for the OpenAI Realtime API. For other providers, see https://docs.livekit.io/agents/models/realtime/))
9192
# 1. Install livekit-agents[openai]
9293
# 2. Set OPENAI_API_KEY in .env.local
9394
# 3. Add `from livekit.plugins import openai` to the top of this file
94-
# 4. In Assistant, replace the llm/stt/tts arguments with:
95+
# 4. In Assistant, replace the llm argument with:
9596
# llm=openai.realtime.RealtimeModel(voice="marin")
9697

9798
# # Add a virtual avatar to the session, if desired

tests/test_agent.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55

66

77
def _judge_llm() -> llm.LLM:
8-
return inference.LLM(model="openai/gpt-5.1")
8+
# We can use a different LLM to evaluate the agent's responses than the one used in the agent itself
9+
# This allows you to use reasoning capabilities or larger models than would be practical for realtime chat
10+
return inference.LLM(model="openai/gpt-5.2")
911

1012

1113
@pytest.mark.asyncio

0 commit comments

Comments
 (0)