Skip to content

Commit 2b0a98c

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: GenAI Client(evals): Add validation for agent_data in EvaluationDataset in create_evaluation_run
PiperOrigin-RevId: 881253576
1 parent eacc86c commit 2b0a98c

File tree

4 files changed

+212
-0
lines changed

4 files changed

+212
-0
lines changed

tests/unit/vertexai/genai/test_evals.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3292,6 +3292,136 @@ def my_search_tool(query: str) -> str:
32923292
mock_from_callable.assert_called_once_with(callable=my_search_tool)
32933293

32943294

3295+
class TestValidateDatasetAgentData:
3296+
"""Unit tests for the _validate_dataset_agent_data function."""
3297+
3298+
def test_valid_agent_data_in_df(self):
3299+
dataset = vertexai_genai_types.EvaluationDataset(
3300+
eval_dataset_df=pd.DataFrame(
3301+
[
3302+
{
3303+
"agent_data": {
3304+
"turns": [{"turn_index": 0, "turn_id": "1", "events": []}]
3305+
}
3306+
},
3307+
{
3308+
"agent_data": '{"turns": [{"turn_index": 0, "turn_id": "2", "events": []}]}'
3309+
},
3310+
{
3311+
"agent_data": vertexai_genai_types.evals.AgentData(
3312+
turns=[{"turn_index": 0, "turn_id": "3", "events": []}]
3313+
)
3314+
},
3315+
]
3316+
)
3317+
)
3318+
_evals_utils._validate_dataset_agent_data(dataset)
3319+
3320+
def test_valid_agent_data_in_eval_cases(self):
3321+
dataset = vertexai_genai_types.EvaluationDataset(
3322+
eval_cases=[
3323+
vertexai_genai_types.EvalCase(
3324+
agent_data={
3325+
"turns": [{"turn_index": 0, "turn_id": "1", "events": []}]
3326+
}
3327+
),
3328+
vertexai_genai_types.EvalCase(
3329+
agent_data=json.loads(
3330+
'{"turns": [{"turn_index": 0, "turn_id": "2", "events": []}]}'
3331+
)
3332+
),
3333+
vertexai_genai_types.EvalCase(
3334+
agent_data=vertexai_genai_types.evals.AgentData(
3335+
turns=[{"turn_index": 0, "turn_id": "3", "events": []}]
3336+
)
3337+
),
3338+
]
3339+
)
3340+
_evals_utils._validate_dataset_agent_data(dataset)
3341+
3342+
def test_invalid_json_string_raises_error(self):
3343+
dataset = vertexai_genai_types.EvaluationDataset(
3344+
eval_dataset_df=pd.DataFrame([{"agent_data": '{"turns":'}])
3345+
)
3346+
with pytest.raises(ValueError, match="is not valid JSON"):
3347+
_evals_utils._validate_dataset_agent_data(dataset)
3348+
3349+
def test_invalid_dict_raises_error(self):
3350+
dataset = vertexai_genai_types.EvaluationDataset(
3351+
eval_dataset_df=pd.DataFrame([{"agent_data": {"agents": 123}}])
3352+
)
3353+
with pytest.raises(ValueError, match="is inconsistent with AgentData type"):
3354+
_evals_utils._validate_dataset_agent_data(dataset)
3355+
3356+
def test_valid_agent_data_with_error_in_dict(self):
3357+
dataset = vertexai_genai_types.EvaluationDataset(
3358+
eval_dataset_df=pd.DataFrame(
3359+
[{"agent_data": {"error": "some error message"}}]
3360+
)
3361+
)
3362+
_evals_utils._validate_dataset_agent_data(dataset)
3363+
3364+
def test_valid_agent_data_with_error_in_string(self):
3365+
dataset = vertexai_genai_types.EvaluationDataset(
3366+
eval_dataset_df=pd.DataFrame(
3367+
[{"agent_data": '{"error": "some error message"}'}]
3368+
)
3369+
)
3370+
_evals_utils._validate_dataset_agent_data(dataset)
3371+
3372+
def test_invalid_agent_data_type_raises_error(self):
3373+
dataset = vertexai_genai_types.EvaluationDataset(
3374+
eval_dataset_df=pd.DataFrame([{"agent_data": 123}])
3375+
)
3376+
with pytest.raises(ValueError, match="is inconsistent with AgentData type"):
3377+
_evals_utils._validate_dataset_agent_data(dataset)
3378+
3379+
def test_conflict_with_inference_configs_raises_error(self):
3380+
dataset = vertexai_genai_types.EvaluationDataset(
3381+
eval_dataset_df=pd.DataFrame(
3382+
[
3383+
{
3384+
"agent_data": {
3385+
"agents": {"agent1": {"agent_id": "agent1"}},
3386+
"turns": [],
3387+
}
3388+
}
3389+
]
3390+
)
3391+
)
3392+
inference_configs = {
3393+
"cand1": {"agent_configs": {"agent1": {"agent_id": "agent1"}}}
3394+
}
3395+
with pytest.raises(
3396+
ValueError,
3397+
match="Cannot provide 'agents' in the dataset's 'agent_data'",
3398+
):
3399+
_evals_utils._validate_dataset_agent_data(dataset, inference_configs)
3400+
3401+
def test_no_conflict_with_inference_configs(self):
3402+
dataset = vertexai_genai_types.EvaluationDataset(
3403+
eval_dataset_df=pd.DataFrame([{"agent_data": {"turns": []}}])
3404+
)
3405+
inference_configs = {"cand1": {"agent_configs": {"agent1": {"name": "agent1"}}}}
3406+
_evals_utils._validate_dataset_agent_data(dataset, inference_configs)
3407+
3408+
def test_no_conflict_if_inference_configs_has_no_agent_configs(self):
3409+
dataset = vertexai_genai_types.EvaluationDataset(
3410+
eval_dataset_df=pd.DataFrame(
3411+
[
3412+
{
3413+
"agent_data": {
3414+
"agents": {"agent1": {"agent_id": "agent1"}},
3415+
"turns": [],
3416+
}
3417+
}
3418+
]
3419+
)
3420+
)
3421+
inference_configs = {"cand1": {"model": "gemini-pro"}}
3422+
_evals_utils._validate_dataset_agent_data(dataset, inference_configs)
3423+
3424+
32953425
class TestEvent:
32963426
"""Unit tests for the Event class."""
32973427

vertexai/_genai/_evals_constant.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,5 +69,8 @@
6969
SESSION_INPUT,
7070
CONTEXT,
7171
HISTORY,
72+
STARTING_PROMPT,
73+
CONVERSATION_PLAN,
74+
AGENT_DATA,
7275
}
7376
)

vertexai/_genai/_evals_utils.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import abc
1818
import logging
1919
import os
20+
import json
2021
from typing import Any, Optional, Union
2122

2223
from google.genai import types as genai_types
@@ -370,3 +371,77 @@ def _postprocess_user_scenarios_response(
370371
return types.EvaluationDataset(
371372
eval_cases=eval_cases, eval_dataset_df=eval_dataset_df
372373
)
374+
375+
376+
def _validate_dataset_agent_data(
377+
dataset: types.EvaluationDataset,
378+
inference_configs: Optional[dict[str, Any]] = None,
379+
) -> None:
380+
"""Validates agent_data in the EvaluationDataset.
381+
382+
Checks that agent_data matches the expected AgentData type and that
383+
'agents' are not defined in both the dataset's agent_data and inference_configs.
384+
"""
385+
has_inference_agent_configs = False
386+
if inference_configs:
387+
for cand_config in inference_configs.values():
388+
if isinstance(cand_config, dict) and cand_config.get("agent_configs"):
389+
has_inference_agent_configs = True
390+
elif hasattr(cand_config, "agent_configs") and cand_config.agent_configs:
391+
has_inference_agent_configs = True
392+
393+
def _validate_single_agent_data(agent_data_val: Any, identifier: str) -> None:
394+
395+
if not agent_data_val:
396+
return
397+
398+
agent_data_obj = None
399+
if isinstance(agent_data_val, str):
400+
try:
401+
agent_data_val = json.loads(agent_data_val)
402+
if "error" in agent_data_val:
403+
return
404+
agent_data_obj = types.evals.AgentData.model_validate(agent_data_val)
405+
except json.JSONDecodeError as e:
406+
raise ValueError(
407+
f"{identifier}: 'agent_data' is not valid JSON: {e}"
408+
) from e
409+
elif isinstance(agent_data_val, dict) and "error" in agent_data_val:
410+
return
411+
elif isinstance(agent_data_val, dict):
412+
try:
413+
agent_data_obj = types.evals.AgentData.model_validate(agent_data_val)
414+
except Exception as e:
415+
raise ValueError(
416+
f"{identifier}: 'agent_data' "
417+
f"is inconsistent with AgentData type: {e}"
418+
) from e
419+
elif isinstance(agent_data_val, types.evals.AgentData):
420+
agent_data_obj = agent_data_val
421+
else:
422+
raise ValueError(
423+
f"{identifier}: 'agent_data' is inconsistent with AgentData type. "
424+
f"Got {type(agent_data_val)}"
425+
)
426+
427+
if agent_data_obj and agent_data_obj.agents and has_inference_agent_configs:
428+
raise ValueError(
429+
f"{identifier}: Cannot provide 'agents' in the dataset's 'agent_data' "
430+
"and 'agent_configs' in inference_configs at the same time."
431+
)
432+
433+
if (
434+
dataset.eval_dataset_df is not None
435+
and "agent_data" in dataset.eval_dataset_df.columns
436+
):
437+
for idx, row in dataset.eval_dataset_df.iterrows():
438+
_validate_single_agent_data(row.get("agent_data"), f"Row {idx}")
439+
440+
if dataset.eval_cases:
441+
for idx, eval_case in enumerate(dataset.eval_cases):
442+
agent_data = None
443+
if isinstance(eval_case, dict):
444+
agent_data = eval_case.get("agent_data", None)
445+
elif hasattr(eval_case, "agent_data"):
446+
agent_data = eval_case.agent_data
447+
_validate_single_agent_data(agent_data, f"EvalCase {idx}")

vertexai/_genai/evals.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1830,6 +1830,8 @@ def create_evaluation_run(
18301830
if isinstance(agent_info, dict)
18311831
else (agent_info or evals_types.AgentInfo())
18321832
)
1833+
if isinstance(dataset, types.EvaluationDataset):
1834+
_evals_utils._validate_dataset_agent_data(dataset, inference_configs)
18331835
resolved_dataset = _evals_common._resolve_dataset(
18341836
self._api_client, dataset, dest, agent_info_pydantic
18351837
)
@@ -2758,6 +2760,8 @@ async def create_evaluation_run(
27582760
if isinstance(agent_info, dict)
27592761
else (agent_info or evals_types.AgentInfo())
27602762
)
2763+
if isinstance(dataset, types.EvaluationDataset):
2764+
_evals_utils._validate_dataset_agent_data(dataset, inference_configs)
27612765
resolved_dataset = _evals_common._resolve_dataset(
27622766
self._api_client, dataset, dest, agent_info_pydantic
27632767
)

0 commit comments

Comments
 (0)