feat: GenAI Client(evals) - BREAKING_CHANGE: The agent engine resource name is now passed as a separate agent parameter to create_evaluation_run methods, rather than being part of the AgentInfo object. This parameter is now required if agent_info is provided

jsondai · copybara-github · commit dab185a9e6eb · 2026-03-20T14:07:08.000-07:00
PiperOrigin-RevId: 886965034
diff --git a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py
@@ -19,6 +19,8 @@
 from google.genai import types as genai_types
 import pandas as pd
 import pytest
+from unittest import mock
+import uuid
 
 GCS_DEST = "gs://lakeyk-limited-bucket/eval_run_output"
 GENERAL_QUALITY_METRIC = types.EvaluationRunMetric(
@@ -79,8 +81,8 @@
         )
     ]
 )
+AGENT_RESOURCE_NAME = "projects/123/locations/us-central1/reasoningEngines/456"
 AGENT_INFO = types.evals.AgentInfo(
-    agent_resource_name=("projects/123/locations/us-central1/reasoningEngines/456"),
     name="agent-1",
     agents={
         "agent-1": types.evals.AgentConfig(
@@ -124,6 +126,7 @@ def test_create_eval_run_data_source_evaluation_set(client):
             BLEU_COMPUTATION_BASED_METRIC,
         ],
         agent_info=AGENT_INFO,
+        agent=AGENT_RESOURCE_NAME,
         labels={"label1": "value1"},
     )
     assert isinstance(evaluation_run, types.EvaluationRun)
@@ -148,7 +151,7 @@ def test_create_eval_run_data_source_evaluation_set(client):
     ] == types.EvaluationRunInferenceConfig(
         agent_configs=AGENT_INFO.agents,
         agent_run_config=types.AgentRunConfig(
-            agent_engine=AGENT_INFO.agent_resource_name,
+            agent_engine=AGENT_RESOURCE_NAME,
             user_simulator_config={"max_turn": 5},
         ),
     )
@@ -219,6 +222,7 @@ def test_create_eval_run_with_user_simulator_config(client):
         dest=GCS_DEST,
         metrics=[GENERAL_QUALITY_METRIC],
         agent_info=AGENT_INFO,
+        agent=AGENT_RESOURCE_NAME,
         user_simulator_config=types.evals.UserSimulatorConfig(
             max_turn=5,
         ),
@@ -243,7 +247,7 @@ def test_create_eval_run_with_user_simulator_config(client):
     ] == types.EvaluationRunInferenceConfig(
         agent_configs=AGENT_INFO.agents,
         agent_run_config=types.AgentRunConfig(
-            agent_engine=AGENT_INFO.agent_resource_name,
+            agent_engine=AGENT_RESOURCE_NAME,
             user_simulator_config=types.evals.UserSimulatorConfig(max_turn=5),
         ),
     )
@@ -290,8 +294,14 @@ def test_create_eval_run_with_inference_configs(client):
     assert evaluation_run.error is None
 
 
-def test_create_eval_run_with_metric_resource_name(client):
+@mock.patch("uuid.uuid4")
+def test_create_eval_run_with_metric_resource_name(mock_uuid4, client):
     """Tests create_evaluation_run with metric_resource_name."""
+    mock_uuid4.side_effect = [
+        uuid.UUID("d392c573-9e81-4a30-b984-8a6aa4656369"),
+        uuid.UUID("49128576-accd-459e-aace-41391e163b3c"),
+        uuid.UUID("9bcc726e-d2cf-448c-967b-f49480d8c1c2"),
+    ]
     client._api_client._http_options.api_version = "v1beta1"
     client._api_client._http_options.base_url = (
         "https://us-central1-staging-aiplatform.sandbox.googleapis.com/"
@@ -733,6 +743,7 @@ async def test_create_eval_run_async_with_user_simulator_config(client):
         dest=GCS_DEST,
         metrics=[GENERAL_QUALITY_METRIC],
         agent_info=AGENT_INFO,
+        agent=AGENT_RESOURCE_NAME,
         user_simulator_config=types.evals.UserSimulatorConfig(
             max_turn=5,
         ),
@@ -757,7 +768,7 @@ async def test_create_eval_run_async_with_user_simulator_config(client):
     ] == types.EvaluationRunInferenceConfig(
         agent_configs=AGENT_INFO.agents,
         agent_run_config=types.AgentRunConfig(
-            agent_engine=AGENT_INFO.agent_resource_name,
+            agent_engine=AGENT_RESOURCE_NAME,
             user_simulator_config=types.evals.UserSimulatorConfig(max_turn=5),
         ),
     )
diff --git a/tests/unit/vertexai/genai/test_evals.py b/tests/unit/vertexai/genai/test_evals.py
@@ -3503,16 +3503,11 @@ def my_search_tool(query: str) -> str:
 
         agent_info = vertexai_genai_types.evals.AgentInfo.load_from_agent(
             agent=mock_agent,
-            agent_resource_name="projects/123/locations/abc/reasoningEngines/456",
         )
 
         assert agent_info.name == "mock_agent"
         assert agent_info.agents["mock_agent"].instruction == "mock instruction"
         assert agent_info.agents["mock_agent"].description == "mock description"
-        assert (
-            agent_info.agent_resource_name
-            == "projects/123/locations/abc/reasoningEngines/456"
-        )
         assert len(agent_info.agents["mock_agent"].tools) == 1
         assert isinstance(agent_info.agents["mock_agent"].tools[0], genai_types.Tool)
         assert agent_info.agents["mock_agent"].tools[0].function_declarations == [
diff --git a/vertexai/_genai/_evals_common.py b/vertexai/_genai/_evals_common.py
@@ -451,14 +451,14 @@ def _resolve_inference_configs(
 
 def _add_evaluation_run_labels(
     labels: Optional[dict[str, str]] = None,
-    parsed_agent_info: Optional[types.evals.AgentInfo] = None,
+    agent: Optional[str] = None,
 ) -> Optional[dict[str, str]]:
     """Adds labels to the evaluation run."""
-    if parsed_agent_info and parsed_agent_info.agent_resource_name:
+    if agent:
         labels = labels or {}
-        labels["vertex-ai-evaluation-agent-engine-id"] = (
-            parsed_agent_info.agent_resource_name.split("reasoningEngines/")[-1]
-        )
+        labels["vertex-ai-evaluation-agent-engine-id"] = agent.split(
+            "reasoningEngines/"
+        )[-1]
     return labels
 
 
diff --git a/vertexai/_genai/evals.py b/vertexai/_genai/evals.py
@@ -2101,6 +2101,7 @@ def create_evaluation_run(
         name: Optional[str] = None,
         display_name: Optional[str] = None,
         agent_info: Optional[evals_types.AgentInfoOrDict] = None,
+        agent: Optional[str] = None,
         user_simulator_config: Optional[evals_types.UserSimulatorConfigOrDict] = None,
         inference_configs: Optional[
             dict[str, types.EvaluationRunInferenceConfigOrDict]
@@ -2118,6 +2119,10 @@ def create_evaluation_run(
           display_name: The display name of the evaluation run.
           agent_info: The agent info to evaluate. Mutually exclusive with
               `inference_configs`.
+          agent: The agent engine resource name in str type, with format
+              `projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine_id}`.
+              If provided, runs inference with the deployed agent to get agent responses
+              for evaluation. This is required if `agent_info` is provided.
           user_simulator_config: The user simulator configuration for agent evaluation.
               If `agent_info` is provided without `inference_configs`, this config is used
               to automatically construct the inference configuration. If not specified,
@@ -2158,7 +2163,7 @@ def create_evaluation_run(
                 candidate_name: types.EvaluationRunInferenceConfig(
                     agent_configs=parsed_agent_info.agents,
                     agent_run_config=types.AgentRunConfig(
-                        agent_engine=parsed_agent_info.agent_resource_name,
+                        agent_engine=agent,
                         user_simulator_config=parsed_user_simulator_config,
                     ),
                 )
@@ -2181,9 +2186,7 @@ def create_evaluation_run(
         resolved_inference_configs = _evals_common._resolve_inference_configs(
             self._api_client, resolved_dataset, inference_configs, parsed_agent_info
         )
-        resolved_labels = _evals_common._add_evaluation_run_labels(
-            labels, parsed_agent_info
-        )
+        resolved_labels = _evals_common._add_evaluation_run_labels(labels, agent)
         resolved_name = name or f"evaluation_run_{uuid.uuid4()}"
         return self._create_evaluation_run(
             name=resolved_name,
@@ -3307,6 +3310,7 @@ async def create_evaluation_run(
         name: Optional[str] = None,
         display_name: Optional[str] = None,
         agent_info: Optional[evals_types.AgentInfo] = None,
+        agent: Optional[str] = None,
         user_simulator_config: Optional[evals_types.UserSimulatorConfigOrDict] = None,
         inference_configs: Optional[
             dict[str, types.EvaluationRunInferenceConfigOrDict]
@@ -3324,6 +3328,10 @@ async def create_evaluation_run(
           display_name: The display name of the evaluation run.
           agent_info: The agent info to evaluate. Mutually exclusive with
               `inference_configs`.
+          agent: The agent engine resource name in str type, with format
+              `projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine_id}`.
+              If provided, runs inference with the deployed agent to get agent responses
+              for evaluation. This is required if `agent_info` is provided.
           user_simulator_config: The user simulator configuration for agent evaluation.
               If `agent_info` is provided without `inference_configs`, this config is used
               to automatically construct the inference configuration. If not specified,
@@ -3364,7 +3372,7 @@ async def create_evaluation_run(
                 candidate_name: types.EvaluationRunInferenceConfig(
                     agent_configs=parsed_agent_info.agents,
                     agent_run_config=types.AgentRunConfig(
-                        agent_engine=parsed_agent_info.agent_resource_name,
+                        agent_engine=agent,
                         user_simulator_config=parsed_user_simulator_config,
                     ),
                 )
@@ -3387,9 +3395,7 @@ async def create_evaluation_run(
         resolved_inference_configs = _evals_common._resolve_inference_configs(
             self._api_client, resolved_dataset, inference_configs, parsed_agent_info
         )
-        resolved_labels = _evals_common._add_evaluation_run_labels(
-            labels, parsed_agent_info
-        )
+        resolved_labels = _evals_common._add_evaluation_run_labels(labels, agent)
         resolved_name = name or f"evaluation_run_{uuid.uuid4()}"
 
         result = await self._create_evaluation_run(
diff --git a/vertexai/_genai/types/evals.py b/vertexai/_genai/types/evals.py
@@ -388,11 +388,6 @@ class AgentDataDict(TypedDict, total=False):
 class AgentInfo(_common.BaseModel):
     """The agent info of an agent system, used for agent evaluation."""
 
-    agent_resource_name: Optional[str] = Field(
-        default=None,
-        description="""The agent engine used to run agent. Agent engine resource name in str type, with format
-            `projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine_id}`.""",
-    )
     name: Optional[str] = Field(
         default=None, description="""Agent candidate name, used as an identifier."""
     )
@@ -407,14 +402,11 @@ class AgentInfo(_common.BaseModel):
     )
 
     @classmethod
-    def load_from_agent(
-        cls, agent: Any, agent_resource_name: Optional[str] = None
-    ) -> "AgentInfo":
+    def load_from_agent(cls, agent: Any) -> "AgentInfo":
         """Loads agent info from an ADK agent.
 
         Args:
           agent: The root agent to get the agent info from, data type is google.adk.agents.LLMAgent type.
-          agent_resource_name: Optional. The agent engine resource name for the deployed agent.
 
         Returns:
           The agent info of the agent system.
@@ -423,18 +415,14 @@ def load_from_agent(
         ```
         from vertexai._genai import types
 
-        agent_info = types.evals.AgentInfo.load_from_agent(
-            agent=my_agent,
-            agent_resource_name="projects/123/locations/us-central1/reasoningEngines/456"
-        )
+        agent_info = types.evals.AgentInfo.load_from_agent(agent=my_agent)
         ```
         """
         agent_name = getattr(agent, "name", None)
         if not agent_name:
             raise ValueError(f"Agent {agent} must have a name.")
         return cls(  # pytype: disable=missing-parameter
             name=agent_name,
-            agent_resource_name=agent_resource_name,
             agents=AgentData.get_agents_map(agent),
             root_agent_id=agent_name,
         )
@@ -443,10 +431,6 @@ def load_from_agent(
 class AgentInfoDict(TypedDict, total=False):
     """The agent info of an agent system, used for agent evaluation."""
 
-    agent_resource_name: Optional[str]
-    """The agent engine used to run agent. Agent engine resource name in str type, with format
-            `projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine_id}`."""
-
     name: Optional[str]
     """Agent candidate name, used as an identifier."""