move the LLM instance directly to Assistant to make it cleaner to share with tests (#71)

bcherry · web-flow · commit 8b81be645c0e · 2026-05-07T12:46:41.000-07:00
* Attach models to agent subclass

* Fix up

* comment

* ruff

* 4.1-mini

* remove comment
diff --git a/src/agent.py b/src/agent.py
@@ -19,12 +19,21 @@
 
 load_dotenv(".env.local")
 
-AGENT_MODEL = "openai/gpt-5.2-chat-latest"
-
 
 class Assistant(Agent):
     def __init__(self) -> None:
         super().__init__(
+            # A Large Language Model (LLM) is your agent's brain, processing user input and generating a response
+            # See all available models at https://docs.livekit.io/agents/models/llm/
+            llm=inference.LLM(model="openai/gpt-5.2-chat-latest"),
+            # To use a realtime model instead of a voice pipeline, replace the LLM
+            # with a RealtimeModel and remove the STT/TTS from the AgentSession
+            # (Note: This is for the OpenAI Realtime API. For other providers, see https://docs.livekit.io/agents/models/realtime/)
+            # 1. Install livekit-agents[openai]
+            # 2. Set OPENAI_API_KEY in .env.local
+            # 3. Add `from livekit.plugins import openai` to the top of this file
+            # 4. Replace the llm argument with:
+            #     llm=openai.realtime.RealtimeModel(voice="marin")
             instructions=textwrap.dedent(
                 """\
                 You are a friendly, reliable voice assistant that answers questions, explains topics, and completes tasks with available tools.
@@ -103,9 +112,6 @@ async def my_agent(ctx: JobContext):
         # Speech-to-text (STT) is your agent's ears, turning the user's speech into text that the LLM can understand
         # See all available models at https://docs.livekit.io/agents/models/stt/
         stt=inference.STT(model="deepgram/nova-3", language="multi"),
-        # A Large Language Model (LLM) is your agent's brain, processing user input and generating a response
-        # See all available models at https://docs.livekit.io/agents/models/llm/
-        llm=inference.LLM(model=AGENT_MODEL),
         # Text-to-speech (TTS) is your agent's voice, turning the LLM's text into speech that the user can hear
         # See all available models as well as voice selections at https://docs.livekit.io/agents/models/tts/
         tts=inference.TTS(
@@ -120,16 +126,6 @@ async def my_agent(ctx: JobContext):
         preemptive_generation=True,
     )
 
-    # To use a realtime model instead of a voice pipeline, use the following session setup instead.
-    # (Note: This is for the OpenAI Realtime API. For other providers, see https://docs.livekit.io/agents/models/realtime/))
-    # 1. Install livekit-agents[openai]
-    # 2. Set OPENAI_API_KEY in .env.local
-    # 3. Add `from livekit.plugins import openai` to the top of this file
-    # 4. Use the following session setup instead of the version above
-    # session = AgentSession(
-    #     llm=openai.realtime.RealtimeModel(voice="marin")
-    # )
-
     # Start the session, which initializes the voice pipeline and warms up the models
     await session.start(
         agent=Assistant(),
diff --git a/tests/test_agent.py b/tests/test_agent.py
@@ -3,25 +3,19 @@
 import pytest
 from livekit.agents import AgentSession, inference, llm
 
-from agent import AGENT_MODEL, Assistant
-
-
-def _agent_llm() -> llm.LLM:
-    return inference.LLM(model=AGENT_MODEL)
+from agent import Assistant
 
 
 def _judge_llm() -> llm.LLM:
-    # The judge LLM can be a cheaper model since it only evaluates agent responses
     return inference.LLM(model="openai/gpt-4.1-mini")
 
 
 @pytest.mark.asyncio
 async def test_offers_assistance() -> None:
     """Evaluation of the agent's friendly nature."""
     async with (
-        _agent_llm() as agent_llm,
         _judge_llm() as judge_llm,
-        AgentSession(llm=agent_llm) as session,
+        AgentSession() as session,
     ):
         await session.start(Assistant())
 
@@ -54,9 +48,8 @@ async def test_offers_assistance() -> None:
 async def test_grounding() -> None:
     """Evaluation of the agent's ability to refuse to answer when it doesn't know something."""
     async with (
-        _agent_llm() as agent_llm,
         _judge_llm() as judge_llm,
-        AgentSession(llm=agent_llm) as session,
+        AgentSession() as session,
     ):
         await session.start(Assistant())
 
@@ -99,9 +92,8 @@ async def test_grounding() -> None:
 async def test_refuses_harmful_request() -> None:
     """Evaluation of the agent's ability to refuse inappropriate or harmful requests."""
     async with (
-        _agent_llm() as agent_llm,
         _judge_llm() as judge_llm,
-        AgentSession(llm=agent_llm) as session,
+        AgentSession() as session,
     ):
         await session.start(Assistant())