fix: remove obsolete llm agent protocol from evals (#1359)

cristipufu · web-flow · commit 7012ab490f8a · 2026-02-22T16:29:03.000+02:00
diff --git a/src/uipath/_cli/_evals/_runtime.py b/src/uipath/_cli/_evals/_runtime.py
@@ -8,10 +8,8 @@
     Awaitable,
     Iterable,
     Iterator,
-    Protocol,
     Sequence,
     Tuple,
-    runtime_checkable,
 )
 
 import coverage
@@ -100,25 +98,6 @@
 logger = logging.getLogger(__name__)
 
 
-@runtime_checkable
-class LLMAgentRuntimeProtocol(Protocol):
-    """Protocol for runtimes that can provide agent model information.
-
-    Runtimes that implement this protocol can be queried for
-    the agent's configured LLM model, enabling features like 'same-as-agent'
-    model resolution for evaluators.
-    """
-
-    def get_agent_model(self) -> str | None:
-        """Return the agent's configured LLM model name.
-
-        Returns:
-            The model name from agent settings (e.g., 'gpt-4o-2024-11-20'),
-            or None if no model is configured.
-        """
-        ...
-
-
 class ExecutionSpanExporter(SpanExporter):
     """Custom exporter that stores spans grouped by execution ids."""
 
diff --git a/src/uipath/_cli/cli_eval.py b/src/uipath/_cli/cli_eval.py
@@ -10,7 +10,6 @@
 from uipath.runtime import (
     UiPathRuntimeContext,
     UiPathRuntimeFactoryRegistry,
-    UiPathRuntimeProtocol,
     UiPathRuntimeSchema,
 )
 
@@ -19,7 +18,6 @@
 from uipath._cli._evals._models._evaluation_set import EvaluationSet
 from uipath._cli._evals._progress_reporter import StudioWebProgressReporter
 from uipath._cli._evals._runtime import (
-    LLMAgentRuntimeProtocol,
     UiPathEvalContext,
 )
 from uipath._cli._evals._telemetry import EvalTelemetrySubscriber
@@ -70,34 +68,7 @@ def setup_reporting_prereq(no_report: bool) -> bool:
     return True
 
 
-def _find_agent_model_in_runtime(runtime: UiPathRuntimeProtocol) -> str | None:
-    """Recursively search for get_agent_model in runtime and its delegates.
-
-    Runtimes may be wrapped (e.g., ResumableRuntime wraps TelemetryWrapper
-    which wraps the base runtime). This method traverses the wrapper chain
-    to find a runtime that implements LLMAgentRuntimeProtocol.
-
-    Args:
-        runtime: The runtime to check (may be a wrapper)
-
-    Returns:
-        The model name if found, None otherwise.
-    """
-    # Check if this runtime implements the protocol
-    if isinstance(runtime, LLMAgentRuntimeProtocol):
-        return runtime.get_agent_model()
-
-    # Check for delegate property (used by UiPathResumableRuntime, TelemetryRuntimeWrapper)
-    delegate = getattr(runtime, "delegate", None) or getattr(runtime, "_delegate", None)
-    if delegate is not None:
-        return _find_agent_model_in_runtime(delegate)
-
-    return None
-
-
-async def _get_agent_model(
-    runtime: UiPathRuntimeProtocol, schema: UiPathRuntimeSchema
-) -> str | None:
+async def _get_agent_model(schema: UiPathRuntimeSchema) -> str | None:
     """Get agent model from the runtime schema metadata.
 
     The model is read from schema.metadata["settings"]["model"] which is
@@ -113,12 +84,7 @@ async def _get_agent_model(
             if model:
                 logger.debug(f"Got agent model from schema.metadata: {model}")
                 return model
-
-        # Fallback to protocol-based approach for backwards compatibility
-        model = _find_agent_model_in_runtime(runtime)
-        if model:
-            logger.debug(f"Got agent model from runtime protocol: {model}")
-        return model
+        return None
     except Exception:
         return None
 
@@ -395,7 +361,7 @@ async def execute_eval():
                     eval_context.evaluators = await EvalHelpers.load_evaluators(
                         resolved_eval_set_path,
                         eval_context.evaluation_set,
-                        await _get_agent_model(runtime, eval_context.runtime_schema),
+                        await _get_agent_model(eval_context.runtime_schema),
                     )
 
                     # Runtime is not required anymore.
diff --git a/tests/cli/eval/test_eval_runtime_metadata.py b/tests/cli/eval/test_eval_runtime_metadata.py
@@ -1,11 +1,8 @@
 """Tests for UiPathEvalRuntime metadata loading functionality.
 
 This module tests:
-- _ensure_metadata_loaded() - single runtime creation for both schema and agent model
 - _get_agent_model() - cached agent model retrieval
 - get_schema() - cached schema retrieval
-- _find_agent_model_in_runtime() - recursive delegate traversal
-- LLMAgentRuntimeProtocol - protocol implementation detection
 """
 
 import uuid
@@ -26,12 +23,10 @@
 from uipath.runtime.schema import UiPathRuntimeSchema
 
 from uipath._cli._evals._runtime import (
-    LLMAgentRuntimeProtocol,
     UiPathEvalContext,
     UiPathEvalRuntime,
 )
 from uipath._cli.cli_eval import (
-    _find_agent_model_in_runtime,
     _get_agent_model,
 )
 from uipath._events._event_bus import EventBus
@@ -80,16 +75,6 @@ async def dispose(self) -> None:
         pass
 
 
-class AgentModelRuntime(BaseTestRuntime):
-    """Test runtime that implements LLMAgentRuntimeProtocol."""
-
-    def __init__(self, model: str | None = "gpt-4o-2024-11-20"):
-        self._model = model
-
-    def get_agent_model(self) -> str | None:
-        return self._model
-
-
 class WrapperRuntime(BaseTestRuntime):
     """Test runtime that wraps another runtime (like UiPathResumableRuntime)."""
 
@@ -136,123 +121,38 @@ async def dispose(self) -> None:
         pass
 
 
-class TestLLMAgentRuntimeProtocol:
-    """Tests for LLMAgentRuntimeProtocol detection."""
-
-    def test_protocol_detects_implementing_class(self):
-        """Test that protocol correctly identifies implementing classes."""
-        runtime = AgentModelRuntime("gpt-4")
-        assert isinstance(runtime, LLMAgentRuntimeProtocol)
-
-    def test_protocol_rejects_non_implementing_class(self):
-        """Test that protocol correctly rejects non-implementing classes."""
-        runtime = BaseTestRuntime()
-        assert not isinstance(runtime, LLMAgentRuntimeProtocol)
-
-    def test_protocol_rejects_wrapper_without_method(self):
-        """Test that wrapper without get_agent_model is not detected."""
-        inner = AgentModelRuntime("gpt-4")
-        wrapper = WrapperRuntime(inner)
-        assert not isinstance(wrapper, LLMAgentRuntimeProtocol)
-
-
-class TestFindAgentModelInRuntime:
-    """Tests for _find_agent_model_in_runtime recursive search."""
-
-    def test_finds_model_in_direct_runtime(self):
-        """Test finding agent model directly on runtime."""
-        runtime = AgentModelRuntime("gpt-4o")
-        result = _find_agent_model_in_runtime(runtime)
-        assert result == "gpt-4o"
-
-    def test_finds_model_in_wrapped_runtime(self):
-        """Test finding agent model through wrapper's delegate."""
-        inner = AgentModelRuntime("claude-3")
-        wrapper = WrapperRuntime(inner)
-        result = _find_agent_model_in_runtime(wrapper)
-        assert result == "claude-3"
-
-    def test_finds_model_in_deeply_wrapped_runtime(self):
-        """Test finding agent model through multiple wrapper layers."""
-        inner = AgentModelRuntime("gpt-4-turbo")
-        wrapper1 = WrapperRuntime(inner)
-        wrapper2 = WrapperRuntime(wrapper1)
-        result = _find_agent_model_in_runtime(wrapper2)
-        assert result == "gpt-4-turbo"
-
-    def test_finds_model_via_private_delegate(self):
-        """Test finding agent model through _delegate attribute."""
-        inner = AgentModelRuntime("gemini-pro")
-        wrapper = PrivateDelegateRuntime(inner)
-        result = _find_agent_model_in_runtime(wrapper)
-        assert result == "gemini-pro"
-
-    def test_returns_none_when_no_model(self):
-        """Test returns None when no runtime implements the protocol."""
-        runtime = BaseTestRuntime()
-        result = _find_agent_model_in_runtime(runtime)
-        assert result is None
-
-    def test_returns_none_for_none_model(self):
-        """Test returns None when runtime returns None for model."""
-        runtime = AgentModelRuntime(None)
-        result = _find_agent_model_in_runtime(runtime)
-        assert result is None
-
-
 class TestGetAgentModel:
     """Tests for _get_agent_model function."""
 
     @pytest.mark.asyncio
     async def test_returns_agent_model(self):
         """Test that _get_agent_model returns the correct model from schema."""
-        runtime = AgentModelRuntime("gpt-4o-2024-11-20")
         schema = MockRuntimeSchema()
         schema.metadata = {"settings": {"model": "gpt-4o-2024-11-20"}}
 
-        model = await _get_agent_model(runtime, schema)
+        model = await _get_agent_model(schema)
         assert model == "gpt-4o-2024-11-20"
 
     @pytest.mark.asyncio
     async def test_returns_none_when_no_model(self):
         """Test that _get_agent_model returns None when runtime has no model."""
-        runtime = BaseTestRuntime()
         schema = MockRuntimeSchema()
 
-        model = await _get_agent_model(runtime, schema)
+        model = await _get_agent_model(schema)
         assert model is None
 
     @pytest.mark.asyncio
     async def test_returns_model_consistently(self):
         """Test that _get_agent_model returns consistent results."""
-        runtime = AgentModelRuntime("consistent-model")
         schema = MockRuntimeSchema()
         schema.metadata = {"settings": {"model": "consistent-model"}}
 
         # Multiple calls should return the same value
-        model1 = await _get_agent_model(runtime, schema)
-        model2 = await _get_agent_model(runtime, schema)
+        model1 = await _get_agent_model(schema)
+        model2 = await _get_agent_model(schema)
 
         assert model1 == model2 == "consistent-model"
 
-    @pytest.mark.asyncio
-    async def test_handles_exception_gracefully(self, monkeypatch):
-        """Test that _get_agent_model returns None when _find_agent_model_in_runtime raises exception."""
-        runtime = BaseTestRuntime()
-        schema = MockRuntimeSchema()
-
-        # Mock _find_agent_model_in_runtime to raise an exception
-        def mock_find_agent_model_error(r):
-            raise RuntimeError("Unexpected error during model lookup")
-
-        monkeypatch.setattr(
-            "uipath._cli.cli_eval._find_agent_model_in_runtime",
-            mock_find_agent_model_error,
-        )
-
-        model = await _get_agent_model(runtime, schema)
-        assert model is None
-
 
 class TestGetSchema:
     """Tests for get_schema method."""
@@ -314,25 +214,3 @@ async def create_runtime():
         # Should be the same object
         assert schema1 is schema2
         assert schema1.file_path == schema2.file_path == "test.py"
-
-
-class TestWrappedRuntimeModelResolution:
-    """Tests for model resolution through realistic wrapper chains."""
-
-    def test_resolves_model_through_resumable_telemetry_chain(self):
-        """Test model resolution through ResumableRuntime -> TelemetryWrapper -> BaseRuntime chain.
-
-        This mimics the real wrapper chain:
-        UiPathResumableRuntime -> TelemetryRuntimeWrapper -> AgentsLangGraphRuntime
-        """
-        # Base runtime with model
-        base_runtime = AgentModelRuntime("gpt-4o-from-agent-json")
-
-        # Simulate TelemetryRuntimeWrapper
-        telemetry_wrapper = WrapperRuntime(base_runtime)
-
-        # Simulate UiPathResumableRuntime
-        resumable_runtime = WrapperRuntime(telemetry_wrapper)
-
-        model = _find_agent_model_in_runtime(resumable_runtime)
-        assert model == "gpt-4o-from-agent-json"