Skip to content

Commit dab185a

Browse files
jsondaicopybara-github
authored andcommitted
feat: GenAI Client(evals) - BREAKING_CHANGE: The agent engine resource name is now passed as a separate agent parameter to create_evaluation_run methods, rather than being part of the AgentInfo object. This parameter is now required if agent_info is provided
PiperOrigin-RevId: 886965034
1 parent 6a03f78 commit dab185a

File tree

5 files changed

+37
-41
lines changed

5 files changed

+37
-41
lines changed

tests/unit/vertexai/genai/replays/test_create_evaluation_run.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
from google.genai import types as genai_types
2020
import pandas as pd
2121
import pytest
22+
from unittest import mock
23+
import uuid
2224

2325
GCS_DEST = "gs://lakeyk-limited-bucket/eval_run_output"
2426
GENERAL_QUALITY_METRIC = types.EvaluationRunMetric(
@@ -79,8 +81,8 @@
7981
)
8082
]
8183
)
84+
AGENT_RESOURCE_NAME = "projects/123/locations/us-central1/reasoningEngines/456"
8285
AGENT_INFO = types.evals.AgentInfo(
83-
agent_resource_name=("projects/123/locations/us-central1/reasoningEngines/456"),
8486
name="agent-1",
8587
agents={
8688
"agent-1": types.evals.AgentConfig(
@@ -124,6 +126,7 @@ def test_create_eval_run_data_source_evaluation_set(client):
124126
BLEU_COMPUTATION_BASED_METRIC,
125127
],
126128
agent_info=AGENT_INFO,
129+
agent=AGENT_RESOURCE_NAME,
127130
labels={"label1": "value1"},
128131
)
129132
assert isinstance(evaluation_run, types.EvaluationRun)
@@ -148,7 +151,7 @@ def test_create_eval_run_data_source_evaluation_set(client):
148151
] == types.EvaluationRunInferenceConfig(
149152
agent_configs=AGENT_INFO.agents,
150153
agent_run_config=types.AgentRunConfig(
151-
agent_engine=AGENT_INFO.agent_resource_name,
154+
agent_engine=AGENT_RESOURCE_NAME,
152155
user_simulator_config={"max_turn": 5},
153156
),
154157
)
@@ -219,6 +222,7 @@ def test_create_eval_run_with_user_simulator_config(client):
219222
dest=GCS_DEST,
220223
metrics=[GENERAL_QUALITY_METRIC],
221224
agent_info=AGENT_INFO,
225+
agent=AGENT_RESOURCE_NAME,
222226
user_simulator_config=types.evals.UserSimulatorConfig(
223227
max_turn=5,
224228
),
@@ -243,7 +247,7 @@ def test_create_eval_run_with_user_simulator_config(client):
243247
] == types.EvaluationRunInferenceConfig(
244248
agent_configs=AGENT_INFO.agents,
245249
agent_run_config=types.AgentRunConfig(
246-
agent_engine=AGENT_INFO.agent_resource_name,
250+
agent_engine=AGENT_RESOURCE_NAME,
247251
user_simulator_config=types.evals.UserSimulatorConfig(max_turn=5),
248252
),
249253
)
@@ -290,8 +294,14 @@ def test_create_eval_run_with_inference_configs(client):
290294
assert evaluation_run.error is None
291295

292296

293-
def test_create_eval_run_with_metric_resource_name(client):
297+
@mock.patch("uuid.uuid4")
298+
def test_create_eval_run_with_metric_resource_name(mock_uuid4, client):
294299
"""Tests create_evaluation_run with metric_resource_name."""
300+
mock_uuid4.side_effect = [
301+
uuid.UUID("d392c573-9e81-4a30-b984-8a6aa4656369"),
302+
uuid.UUID("49128576-accd-459e-aace-41391e163b3c"),
303+
uuid.UUID("9bcc726e-d2cf-448c-967b-f49480d8c1c2"),
304+
]
295305
client._api_client._http_options.api_version = "v1beta1"
296306
client._api_client._http_options.base_url = (
297307
"https://us-central1-staging-aiplatform.sandbox.googleapis.com/"
@@ -733,6 +743,7 @@ async def test_create_eval_run_async_with_user_simulator_config(client):
733743
dest=GCS_DEST,
734744
metrics=[GENERAL_QUALITY_METRIC],
735745
agent_info=AGENT_INFO,
746+
agent=AGENT_RESOURCE_NAME,
736747
user_simulator_config=types.evals.UserSimulatorConfig(
737748
max_turn=5,
738749
),
@@ -757,7 +768,7 @@ async def test_create_eval_run_async_with_user_simulator_config(client):
757768
] == types.EvaluationRunInferenceConfig(
758769
agent_configs=AGENT_INFO.agents,
759770
agent_run_config=types.AgentRunConfig(
760-
agent_engine=AGENT_INFO.agent_resource_name,
771+
agent_engine=AGENT_RESOURCE_NAME,
761772
user_simulator_config=types.evals.UserSimulatorConfig(max_turn=5),
762773
),
763774
)

tests/unit/vertexai/genai/test_evals.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3503,16 +3503,11 @@ def my_search_tool(query: str) -> str:
35033503

35043504
agent_info = vertexai_genai_types.evals.AgentInfo.load_from_agent(
35053505
agent=mock_agent,
3506-
agent_resource_name="projects/123/locations/abc/reasoningEngines/456",
35073506
)
35083507

35093508
assert agent_info.name == "mock_agent"
35103509
assert agent_info.agents["mock_agent"].instruction == "mock instruction"
35113510
assert agent_info.agents["mock_agent"].description == "mock description"
3512-
assert (
3513-
agent_info.agent_resource_name
3514-
== "projects/123/locations/abc/reasoningEngines/456"
3515-
)
35163511
assert len(agent_info.agents["mock_agent"].tools) == 1
35173512
assert isinstance(agent_info.agents["mock_agent"].tools[0], genai_types.Tool)
35183513
assert agent_info.agents["mock_agent"].tools[0].function_declarations == [

vertexai/_genai/_evals_common.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -451,14 +451,14 @@ def _resolve_inference_configs(
451451

452452
def _add_evaluation_run_labels(
453453
labels: Optional[dict[str, str]] = None,
454-
parsed_agent_info: Optional[types.evals.AgentInfo] = None,
454+
agent: Optional[str] = None,
455455
) -> Optional[dict[str, str]]:
456456
"""Adds labels to the evaluation run."""
457-
if parsed_agent_info and parsed_agent_info.agent_resource_name:
457+
if agent:
458458
labels = labels or {}
459-
labels["vertex-ai-evaluation-agent-engine-id"] = (
460-
parsed_agent_info.agent_resource_name.split("reasoningEngines/")[-1]
461-
)
459+
labels["vertex-ai-evaluation-agent-engine-id"] = agent.split(
460+
"reasoningEngines/"
461+
)[-1]
462462
return labels
463463

464464

vertexai/_genai/evals.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2101,6 +2101,7 @@ def create_evaluation_run(
21012101
name: Optional[str] = None,
21022102
display_name: Optional[str] = None,
21032103
agent_info: Optional[evals_types.AgentInfoOrDict] = None,
2104+
agent: Optional[str] = None,
21042105
user_simulator_config: Optional[evals_types.UserSimulatorConfigOrDict] = None,
21052106
inference_configs: Optional[
21062107
dict[str, types.EvaluationRunInferenceConfigOrDict]
@@ -2118,6 +2119,10 @@ def create_evaluation_run(
21182119
display_name: The display name of the evaluation run.
21192120
agent_info: The agent info to evaluate. Mutually exclusive with
21202121
`inference_configs`.
2122+
agent: The agent engine resource name in str type, with format
2123+
`projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine_id}`.
2124+
If provided, runs inference with the deployed agent to get agent responses
2125+
for evaluation. This is required if `agent_info` is provided.
21212126
user_simulator_config: The user simulator configuration for agent evaluation.
21222127
If `agent_info` is provided without `inference_configs`, this config is used
21232128
to automatically construct the inference configuration. If not specified,
@@ -2158,7 +2163,7 @@ def create_evaluation_run(
21582163
candidate_name: types.EvaluationRunInferenceConfig(
21592164
agent_configs=parsed_agent_info.agents,
21602165
agent_run_config=types.AgentRunConfig(
2161-
agent_engine=parsed_agent_info.agent_resource_name,
2166+
agent_engine=agent,
21622167
user_simulator_config=parsed_user_simulator_config,
21632168
),
21642169
)
@@ -2181,9 +2186,7 @@ def create_evaluation_run(
21812186
resolved_inference_configs = _evals_common._resolve_inference_configs(
21822187
self._api_client, resolved_dataset, inference_configs, parsed_agent_info
21832188
)
2184-
resolved_labels = _evals_common._add_evaluation_run_labels(
2185-
labels, parsed_agent_info
2186-
)
2189+
resolved_labels = _evals_common._add_evaluation_run_labels(labels, agent)
21872190
resolved_name = name or f"evaluation_run_{uuid.uuid4()}"
21882191
return self._create_evaluation_run(
21892192
name=resolved_name,
@@ -3307,6 +3310,7 @@ async def create_evaluation_run(
33073310
name: Optional[str] = None,
33083311
display_name: Optional[str] = None,
33093312
agent_info: Optional[evals_types.AgentInfo] = None,
3313+
agent: Optional[str] = None,
33103314
user_simulator_config: Optional[evals_types.UserSimulatorConfigOrDict] = None,
33113315
inference_configs: Optional[
33123316
dict[str, types.EvaluationRunInferenceConfigOrDict]
@@ -3324,6 +3328,10 @@ async def create_evaluation_run(
33243328
display_name: The display name of the evaluation run.
33253329
agent_info: The agent info to evaluate. Mutually exclusive with
33263330
`inference_configs`.
3331+
agent: The agent engine resource name in str type, with format
3332+
`projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine_id}`.
3333+
If provided, runs inference with the deployed agent to get agent responses
3334+
for evaluation. This is required if `agent_info` is provided.
33273335
user_simulator_config: The user simulator configuration for agent evaluation.
33283336
If `agent_info` is provided without `inference_configs`, this config is used
33293337
to automatically construct the inference configuration. If not specified,
@@ -3364,7 +3372,7 @@ async def create_evaluation_run(
33643372
candidate_name: types.EvaluationRunInferenceConfig(
33653373
agent_configs=parsed_agent_info.agents,
33663374
agent_run_config=types.AgentRunConfig(
3367-
agent_engine=parsed_agent_info.agent_resource_name,
3375+
agent_engine=agent,
33683376
user_simulator_config=parsed_user_simulator_config,
33693377
),
33703378
)
@@ -3387,9 +3395,7 @@ async def create_evaluation_run(
33873395
resolved_inference_configs = _evals_common._resolve_inference_configs(
33883396
self._api_client, resolved_dataset, inference_configs, parsed_agent_info
33893397
)
3390-
resolved_labels = _evals_common._add_evaluation_run_labels(
3391-
labels, parsed_agent_info
3392-
)
3398+
resolved_labels = _evals_common._add_evaluation_run_labels(labels, agent)
33933399
resolved_name = name or f"evaluation_run_{uuid.uuid4()}"
33943400

33953401
result = await self._create_evaluation_run(

vertexai/_genai/types/evals.py

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -388,11 +388,6 @@ class AgentDataDict(TypedDict, total=False):
388388
class AgentInfo(_common.BaseModel):
389389
"""The agent info of an agent system, used for agent evaluation."""
390390

391-
agent_resource_name: Optional[str] = Field(
392-
default=None,
393-
description="""The agent engine used to run agent. Agent engine resource name in str type, with format
394-
`projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine_id}`.""",
395-
)
396391
name: Optional[str] = Field(
397392
default=None, description="""Agent candidate name, used as an identifier."""
398393
)
@@ -407,14 +402,11 @@ class AgentInfo(_common.BaseModel):
407402
)
408403

409404
@classmethod
410-
def load_from_agent(
411-
cls, agent: Any, agent_resource_name: Optional[str] = None
412-
) -> "AgentInfo":
405+
def load_from_agent(cls, agent: Any) -> "AgentInfo":
413406
"""Loads agent info from an ADK agent.
414407
415408
Args:
416409
agent: The root agent to get the agent info from, data type is google.adk.agents.LLMAgent type.
417-
agent_resource_name: Optional. The agent engine resource name for the deployed agent.
418410
419411
Returns:
420412
The agent info of the agent system.
@@ -423,18 +415,14 @@ def load_from_agent(
423415
```
424416
from vertexai._genai import types
425417
426-
agent_info = types.evals.AgentInfo.load_from_agent(
427-
agent=my_agent,
428-
agent_resource_name="projects/123/locations/us-central1/reasoningEngines/456"
429-
)
418+
agent_info = types.evals.AgentInfo.load_from_agent(agent=my_agent)
430419
```
431420
"""
432421
agent_name = getattr(agent, "name", None)
433422
if not agent_name:
434423
raise ValueError(f"Agent {agent} must have a name.")
435424
return cls( # pytype: disable=missing-parameter
436425
name=agent_name,
437-
agent_resource_name=agent_resource_name,
438426
agents=AgentData.get_agents_map(agent),
439427
root_agent_id=agent_name,
440428
)
@@ -443,10 +431,6 @@ def load_from_agent(
443431
class AgentInfoDict(TypedDict, total=False):
444432
"""The agent info of an agent system, used for agent evaluation."""
445433

446-
agent_resource_name: Optional[str]
447-
"""The agent engine used to run agent. Agent engine resource name in str type, with format
448-
`projects/{project}/locations/{location}/reasoningEngines/{reasoning_engine_id}`."""
449-
450434
name: Optional[str]
451435
"""Agent candidate name, used as an identifier."""
452436

0 commit comments

Comments
 (0)