Skip to content

Commit c6ec3bf

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: GenAI Client(evals): Add user scenario generation function to eval components
PiperOrigin-RevId: 872110328
1 parent 204e5b4 commit c6ec3bf

File tree

7 files changed

+580
-6
lines changed

7 files changed

+580
-6
lines changed
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
# pylint: disable=protected-access,bad-continuation,missing-function-docstring
16+
17+
from tests.unit.vertexai.genai.replays import pytest_helper
18+
from vertexai import types
19+
import pytest
20+
21+
22+
def test_gen_user_scenarios(client):
23+
"""Tests that generate_user_scenarios() correctly calls the API and parses the response."""
24+
eval_dataset = client.evals.generate_user_scenarios(
25+
agents={
26+
"booking-agent": types.evals.AgentConfig(
27+
agent_id="booking-agent",
28+
agent_type="service_agent",
29+
description="An agent capable of booking flights and hotels.",
30+
instruction="You are a helpful travel assistant. Use tools to find flights.",
31+
tools=[
32+
{
33+
"function_declarations": [
34+
{
35+
"name": "search_flights",
36+
"description": "Search for available flights.",
37+
}
38+
]
39+
}
40+
],
41+
)
42+
},
43+
user_scenario_generation_config=types.evals.UserScenarioGenerationConfig(
44+
user_scenario_count=2,
45+
simulation_instruction=(
46+
"Generate scenarios where the user tries to book a flight but"
47+
" changes their mind about the destination."
48+
),
49+
environment_data="Today is Monday. Flights to Paris are available.",
50+
model_name="gemini-2.5-flash",
51+
),
52+
root_agent_id="booking-agent",
53+
)
54+
assert isinstance(eval_dataset, types.EvaluationDataset)
55+
assert len(eval_dataset.eval_cases) == 2
56+
assert (
57+
eval_dataset.eval_cases[0].user_scenario.starting_prompt
58+
== "I want to find a flight from New York to London."
59+
)
60+
assert (
61+
eval_dataset.eval_cases[0].user_scenario.conversation_plan
62+
== "Actually, I meant Paris, not London. Please search for flights to Paris."
63+
)
64+
65+
66+
pytest_plugins = ("pytest_asyncio",)
67+
68+
69+
@pytest.mark.asyncio
70+
async def test_gen_user_scenarios_async(client):
71+
"""Tests that generate_user_scenarios() async correctly calls the API and parses the response."""
72+
eval_dataset = await client.aio.evals.generate_user_scenarios(
73+
agents={
74+
"booking-agent": types.evals.AgentConfig(
75+
agent_id="booking-agent",
76+
agent_type="service_agent",
77+
description="An agent capable of booking flights and hotels.",
78+
instruction="You are a helpful travel assistant. Use tools to find flights.",
79+
tools=[
80+
{
81+
"function_declarations": [
82+
{
83+
"name": "search_flights",
84+
"description": "Search for available flights.",
85+
}
86+
]
87+
}
88+
],
89+
)
90+
},
91+
user_scenario_generation_config=types.evals.UserScenarioGenerationConfig(
92+
user_scenario_count=2,
93+
simulation_instruction=(
94+
"Generate scenarios where the user tries to book a flight but"
95+
" changes their mind about the destination."
96+
),
97+
environment_data="Today is Monday. Flights to Paris are available.",
98+
model_name="gemini-2.5-flash",
99+
),
100+
root_agent_id="booking-agent",
101+
)
102+
assert isinstance(eval_dataset, types.EvaluationDataset)
103+
assert len(eval_dataset.eval_cases) == 2
104+
assert (
105+
eval_dataset.eval_cases[1].user_scenario.starting_prompt
106+
== "Find me a flight from Boston to Rome for next month."
107+
)
108+
assert (
109+
eval_dataset.eval_cases[1].user_scenario.conversation_plan
110+
== "Wait, change of plans. I need to go to Milan instead, and it needs to be a round trip, returning two weeks after departure."
111+
)
112+
113+
114+
pytestmark = pytest_helper.setup(
115+
file=__file__,
116+
globals_for_file=globals(),
117+
test_method="evals.generate_user_scenarios",
118+
)

tests/unit/vertexai/genai/test_evals.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import re
2121
import statistics
2222
import sys
23+
import unittest
2324
from unittest import mock
2425

2526
import google.auth.credentials
@@ -5500,3 +5501,70 @@ def read_file_contents_side_effect(src: str) -> str:
55005501
}
55015502
),
55025503
)
5504+
5505+
5506+
class TestEvalsGenerateUserScenarios(unittest.TestCase):
5507+
"""Unit tests for the Evals generate_user_scenarios method."""
5508+
5509+
def setUp(self):
5510+
self.addCleanup(mock.patch.stopall)
5511+
self.mock_client = mock.MagicMock(spec=client.Client)
5512+
self.mock_client.vertexai = True
5513+
self.mock_api_client = mock.MagicMock()
5514+
self.mock_client._api_client = self.mock_api_client
5515+
5516+
self.mock_response = mock.MagicMock()
5517+
self.mock_response.body = json.dumps(
5518+
{
5519+
"userScenarios": [
5520+
{"startingPrompt": "Prompt 1", "conversationPlan": "Plan 1"},
5521+
{"startingPrompt": "Prompt 2", "conversationPlan": "Plan 2"},
5522+
]
5523+
}
5524+
)
5525+
self.mock_api_client.request.return_value = self.mock_response
5526+
5527+
def test_generate_user_scenarios(self):
5528+
"""Tests that generate_user_scenarios correctly calls the API and parses the response."""
5529+
evals_module = evals.Evals(api_client_=self.mock_api_client)
5530+
5531+
eval_dataset = evals_module.generate_user_scenarios(
5532+
agents={"agent_1": {}},
5533+
user_scenario_generation_config={"user_scenario_count": 2},
5534+
root_agent_id="agent_1",
5535+
)
5536+
assert isinstance(eval_dataset, vertexai_genai_types.EvaluationDataset)
5537+
assert len(eval_dataset.eval_cases) == 2
5538+
assert eval_dataset.eval_cases[0].user_scenario.starting_prompt == "Prompt 1"
5539+
assert eval_dataset.eval_cases[0].user_scenario.conversation_plan == "Plan 1"
5540+
assert eval_dataset.eval_cases[1].user_scenario.starting_prompt == "Prompt 2"
5541+
assert eval_dataset.eval_cases[1].user_scenario.conversation_plan == "Plan 2"
5542+
5543+
assert eval_dataset.eval_dataset_df is not None
5544+
assert len(eval_dataset.eval_dataset_df) == 2
5545+
assert eval_dataset.eval_dataset_df.iloc[0]["starting_prompt"] == "Prompt 1"
5546+
5547+
self.mock_api_client.request.assert_called_once()
5548+
5549+
@pytest.mark.asyncio
5550+
async def test_async_generate_user_scenarios(self):
5551+
"""Tests that async generate_user_scenarios correctly calls the API and parses the response."""
5552+
5553+
self.mock_api_client.async_request = mock.AsyncMock(
5554+
return_value=self.mock_response
5555+
)
5556+
async_evals_module = evals.AsyncEvals(api_client_=self.mock_api_client)
5557+
5558+
eval_dataset = await async_evals_module.generate_user_scenarios(
5559+
agents={"agent_1": {}},
5560+
user_scenario_generation_config={"user_scenario_count": 2},
5561+
root_agent_id="agent_1",
5562+
)
5563+
assert isinstance(eval_dataset, vertexai_genai_types.EvaluationDataset)
5564+
assert len(eval_dataset.eval_cases) == 2
5565+
assert eval_dataset.eval_cases[0].user_scenario.starting_prompt == "Prompt 1"
5566+
5567+
assert eval_dataset.eval_dataset_df is not None
5568+
assert len(eval_dataset.eval_dataset_df) == 2
5569+
5570+
self.mock_api_client.async_request.assert_called_once()

vertexai/_genai/_evals_utils.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import os
2020
from typing import Any, Optional, Union
2121

22+
from google.genai import types as genai_types
2223
from google.genai._api_client import BaseApiClient
2324
from google.genai._common import get_value_by_path as getv
2425
from google.genai._common import set_value_by_path as setv
@@ -335,3 +336,37 @@ class EvalDataConverter(abc.ABC):
335336
def convert(self, raw_data: Any) -> "types.EvaluationDataset":
336337
"""Converts a loaded raw dataset into an EvaluationDataset."""
337338
raise NotImplementedError()
339+
340+
341+
def _postprocess_user_scenarios_response(
342+
response: types.GenerateUserScenariosResponse,
343+
) -> types.EvaluationDataset:
344+
"""Postprocesses the response from generating user scenarios."""
345+
eval_cases = []
346+
data_for_df = []
347+
if hasattr(response, "user_scenarios") and response.user_scenarios:
348+
for scenario in response.user_scenarios:
349+
prompt_content = None
350+
if scenario.starting_prompt:
351+
prompt_content = genai_types.Content(
352+
parts=[genai_types.Part(text=scenario.starting_prompt)]
353+
)
354+
eval_case = types.EvalCase(
355+
prompt=prompt_content,
356+
user_scenario=scenario,
357+
)
358+
eval_cases.append(eval_case)
359+
data_for_df.append(
360+
{
361+
"starting_prompt": scenario.starting_prompt,
362+
"conversation_plan": scenario.conversation_plan,
363+
}
364+
)
365+
eval_dataset_df = None
366+
if pd is not None:
367+
eval_dataset_df = pd.DataFrame(data_for_df)
368+
else:
369+
logger.warning("Pandas is not installed. eval_dataset_df will be None.")
370+
return types.EvaluationDataset(
371+
eval_cases=eval_cases, eval_dataset_df=eval_dataset_df
372+
)

0 commit comments

Comments
 (0)