Skip to content

Commit 9199189

Browse files
google-genai-botcopybara-github
authored andcommitted
feat(eval): Add ADK CLI for eval_set generation
PiperOrigin-RevId: 895946363
1 parent a220910 commit 9199189

File tree

5 files changed

+488
-0
lines changed

5 files changed

+488
-0
lines changed

src/google/adk/cli/cli_tools_click.py

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1064,6 +1064,7 @@ def cli_optimize(
10641064
from .cli_eval import _collect_eval_results
10651065
from .cli_eval import _collect_inferences
10661066
from .cli_eval import get_root_agent
1067+
10671068
except ModuleNotFoundError as mnf:
10681069
raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE) from mnf
10691070

@@ -1199,6 +1200,7 @@ def cli_add_eval_case(
11991200
from ..evaluation.eval_case import EvalCase
12001201
from ..evaluation.eval_case import SessionInput
12011202
from .cli_eval import get_eval_sets_manager
1203+
12021204
except ModuleNotFoundError as mnf:
12031205
raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE) from mnf
12041206

@@ -1247,6 +1249,127 @@ def cli_add_eval_case(
12471249
raise click.ClickException(f"Failed to add eval case(s): {e}") from e
12481250

12491251

1252+
@eval_set.command("generate_eval_cases", cls=HelpfulCommand)
1253+
@click.argument(
1254+
"agent_module_file_path",
1255+
type=click.Path(
1256+
exists=True, dir_okay=True, file_okay=False, resolve_path=True
1257+
),
1258+
)
1259+
@click.argument("eval_set_id", type=str, required=True)
1260+
@click.option(
1261+
"--user_simulation_config_file",
1262+
type=click.Path(
1263+
exists=True, dir_okay=False, file_okay=True, resolve_path=True
1264+
),
1265+
help=(
1266+
"A path to file containing JSON serialized "
1267+
"UserScenarioGenerationConfig dict."
1268+
),
1269+
required=True,
1270+
)
1271+
@eval_options()
1272+
def cli_generate_eval_cases(
1273+
agent_module_file_path: str,
1274+
eval_set_id: str,
1275+
user_simulation_config_file: str,
1276+
eval_storage_uri: Optional[str] = None,
1277+
log_level: str = "INFO",
1278+
):
1279+
"""Generates eval cases dynamically and adds them to the given eval set.
1280+
1281+
Uses Vertex AI Eval SDK to generate conversation scenarios based on an
1282+
Agent's info and definitions. It will automatically create the empty eval_set
1283+
if it has not been created in advance.
1284+
1285+
Args:
1286+
agent_module_file_path: The path to the agent module file.
1287+
eval_set_id: The id of the eval set to generate cases for.
1288+
user_simulation_config_file: The path to the user simulation config file.
1289+
eval_storage_uri: The eval storage uri.
1290+
log_level: The log level.
1291+
"""
1292+
logs.setup_adk_logger(getattr(logging, log_level.upper()))
1293+
try:
1294+
from ..evaluation._vertex_ai_scenario_generation_facade import ScenarioGenerator
1295+
from ..evaluation.conversation_scenarios import ConversationGenerationConfig
1296+
from ..evaluation.eval_case import EvalCase
1297+
from ..evaluation.eval_case import SessionInput
1298+
from .cli_eval import get_eval_sets_manager
1299+
from .cli_eval import get_root_agent
1300+
from .utils.state import create_empty_state
1301+
1302+
except ModuleNotFoundError as mnf:
1303+
raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE) from mnf
1304+
1305+
app_name = os.path.basename(agent_module_file_path)
1306+
agents_dir = os.path.dirname(agent_module_file_path)
1307+
1308+
try:
1309+
eval_sets_manager = get_eval_sets_manager(eval_storage_uri, agents_dir)
1310+
root_agent = get_root_agent(agent_module_file_path)
1311+
1312+
# Try to create if it doesn't already exist.
1313+
if (
1314+
eval_sets_manager.get_eval_set(
1315+
app_name=app_name, eval_set_id=eval_set_id
1316+
)
1317+
is None
1318+
):
1319+
eval_sets_manager.create_eval_set(
1320+
app_name=app_name, eval_set_id=eval_set_id
1321+
)
1322+
click.echo(f"Eval set '{eval_set_id}' created for app '{app_name}'.")
1323+
else:
1324+
click.echo(f"Eval set '{eval_set_id}' already exists.")
1325+
1326+
with open(user_simulation_config_file, "r") as f:
1327+
config = ConversationGenerationConfig.model_validate_json(f.read())
1328+
1329+
generator = ScenarioGenerator()
1330+
click.echo("Generating scenarios utilizing Vertex AI Eval SDK...")
1331+
scenarios = generator.generate_scenarios(root_agent, config)
1332+
1333+
# TODO(pthodoroff): Expose initial session state when simulation library
1334+
# supports it.
1335+
initial_session_state = create_empty_state(root_agent)
1336+
1337+
session_input = SessionInput(
1338+
app_name=app_name, user_id="test_user_id", state=initial_session_state
1339+
)
1340+
1341+
for scenario in scenarios:
1342+
scenario_str = json.dumps(scenario.model_dump(), sort_keys=True)
1343+
eval_id = hashlib.sha256(scenario_str.encode("utf-8")).hexdigest()[:8]
1344+
eval_case = EvalCase(
1345+
eval_id=eval_id,
1346+
conversation_scenario=scenario,
1347+
session_input=session_input,
1348+
creation_timestamp=datetime.now().timestamp(),
1349+
)
1350+
1351+
if (
1352+
eval_sets_manager.get_eval_case(
1353+
app_name=app_name, eval_set_id=eval_set_id, eval_case_id=eval_id
1354+
)
1355+
is None
1356+
):
1357+
eval_sets_manager.add_eval_case(
1358+
app_name=app_name, eval_set_id=eval_set_id, eval_case=eval_case
1359+
)
1360+
click.echo(
1361+
f"Eval case '{eval_case.eval_id}' added to eval set"
1362+
f" '{eval_set_id}'."
1363+
)
1364+
else:
1365+
click.echo(
1366+
f"Eval case '{eval_case.eval_id}' already exists in eval set"
1367+
f" '{eval_set_id}', skipped adding."
1368+
)
1369+
except Exception as e:
1370+
raise click.ClickException(f"Failed to generate eval case(s): {e}") from e
1371+
1372+
12501373
def web_options():
12511374
"""Decorator to add web UI options to click commands."""
12521375

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Vertex AI Scenario Generation Facade."""
16+
17+
from __future__ import annotations
18+
19+
import logging
20+
import os
21+
22+
from . import conversation_scenarios
23+
from ..agents import base_agent
24+
from ..dependencies.vertexai import vertexai
25+
26+
types = vertexai.types
27+
28+
29+
logger = logging.getLogger("google_adk." + __name__)
30+
31+
_ERROR_MESSAGE_SUFFIX = """
32+
You should specify both project id and location. This metric uses Vertex Gen AI
33+
Eval SDK, and it requires google cloud credentials.
34+
35+
If using an .env file add the values there, or explicitly set in the code using
36+
the template below:
37+
38+
os.environ['GOOGLE_CLOUD_LOCATION'] = <LOCATION>
39+
os.environ['GOOGLE_CLOUD_PROJECT'] = <PROJECT ID>
40+
"""
41+
42+
43+
class ScenarioGenerator:
44+
"""Facade for generating eval scenarios using Vertex Gen AI Eval SDK.
45+
46+
Using this class requires a GCP project. Please set GOOGLE_CLOUD_PROJECT and
47+
GOOGLE_CLOUD_LOCATION in your .env file.
48+
"""
49+
50+
def __init__(self):
51+
project_id = os.environ.get("GOOGLE_CLOUD_PROJECT")
52+
location = os.environ.get("GOOGLE_CLOUD_LOCATION")
53+
api_key = os.environ.get("GOOGLE_API_KEY")
54+
55+
if api_key:
56+
self._client = vertexai.Client(api_key=api_key)
57+
elif project_id or location:
58+
if not project_id:
59+
raise ValueError("Missing project id." + _ERROR_MESSAGE_SUFFIX)
60+
if not location:
61+
raise ValueError("Missing location." + _ERROR_MESSAGE_SUFFIX)
62+
self._client = vertexai.Client(project=project_id, location=location)
63+
else:
64+
raise ValueError(
65+
"Either API Key or Google cloud Project id and location should be"
66+
" specified."
67+
)
68+
69+
def generate_scenarios(
70+
self,
71+
agent: base_agent.BaseAgent,
72+
config: conversation_scenarios.ConversationGenerationConfig,
73+
) -> list[conversation_scenarios.ConversationScenario]:
74+
"""Generates conversation scenarios for the specified agent.
75+
76+
Args:
77+
agent: The root agent representing the system under test.
78+
config: The configuration for ConversationGenerationConfig.
79+
80+
Returns:
81+
A list of ADK ConversationScenario objects.
82+
"""
83+
agent_info = types.evals.AgentInfo.load_from_agent(agent=agent)
84+
85+
vertex_config = types.evals.UserScenarioGenerationConfig(
86+
count=config.count,
87+
generation_instruction=config.generation_instruction,
88+
environment_context=config.environment_context,
89+
model_name=config.model_name,
90+
)
91+
92+
eval_dataset = self._client.evals.generate_conversation_scenarios(
93+
agent_info=agent_info,
94+
config=vertex_config,
95+
)
96+
97+
scenarios = []
98+
for eval_case in eval_dataset.eval_cases:
99+
if not eval_case.user_scenario:
100+
continue
101+
scenarios.append(
102+
conversation_scenarios.ConversationScenario(
103+
starting_prompt=eval_case.user_scenario.starting_prompt,
104+
conversation_plan=eval_case.user_scenario.conversation_plan,
105+
)
106+
)
107+
108+
return scenarios

src/google/adk/evaluation/conversation_scenarios.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,3 +75,33 @@ class ConversationScenarios(EvalBaseModel):
7575
scenarios: list[ConversationScenario] = Field(
7676
default_factory=list, description="""A list of ConversationScenario."""
7777
)
78+
79+
80+
class ConversationGenerationConfig(EvalBaseModel):
81+
"""Configuration for generating conversation scenarios."""
82+
83+
count: int = Field(
84+
description="The number of conversation scenarios to generate."
85+
)
86+
generation_instruction: Optional[str] = Field(
87+
default=None,
88+
description=(
89+
"Optional natural language goal to guide the EvalSet generation."
90+
),
91+
)
92+
environment_context: Optional[str] = Field(
93+
default=None,
94+
description=(
95+
"Context describing the backend data or state accessible to the"
96+
" agent's tools. This acts as the 'ground truth' for the simulation,"
97+
" ensuring generated queries reference data that actually exists"
98+
" (e.g., a list of available models so the generator knows what the"
99+
" 'get_model_available' tool will return)."
100+
),
101+
)
102+
model_name: str = Field(
103+
description=(
104+
"The name of the Gemini model to use for generating the scenarios"
105+
" (e.g., 'gemini-2.5-flash')."
106+
)
107+
)
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Tests for generate_eval_cases CLI command."""
16+
17+
import json
18+
import os
19+
import pathlib
20+
21+
from click.testing import CliRunner
22+
# We must mock or import the command safely for pytest
23+
from google.adk.cli.cli_tools_click import cli_generate_eval_cases
24+
import pytest
25+
26+
27+
def test_cli_generate_eval_cases_integration(tmp_path):
28+
"""E2E Test for the Vertex AI Scenario Generation Facade via the CLI."""
29+
# This requires identical project setup to Kokoro's e2e_test_gcp_ubuntu_docker
30+
if not os.environ.get("GOOGLE_CLOUD_PROJECT"):
31+
pytest.skip(
32+
"GOOGLE_CLOUD_PROJECT is not set. Skipping generation CLI integration"
33+
" test."
34+
)
35+
36+
# 1. Provide a UserSimulationConfig proxy
37+
config_file = tmp_path / "user_sim_config.json"
38+
config_data = {
39+
"generation_instruction": (
40+
"Generate a test conversation scenario where the user asks a simple"
41+
" question."
42+
),
43+
"count": 1,
44+
"model_name": "gemini-2.5-flash",
45+
}
46+
with open(config_file, "w") as f:
47+
json.dump(config_data, f)
48+
49+
eval_set_id = "cli_gen_test_eval_set"
50+
51+
# 2. Invoke the command via click's testing runner
52+
runner = CliRunner()
53+
result = runner.invoke(
54+
cli_generate_eval_cases,
55+
[
56+
str(
57+
pathlib.Path(__file__).parent
58+
/ "fixture"
59+
/ "home_automation_agent"
60+
),
61+
eval_set_id,
62+
f"--user_simulation_config_file={config_file}",
63+
"--log_level=DEBUG",
64+
],
65+
)
66+
67+
# 3. Assert correct output
68+
assert (
69+
result.exit_code == 0
70+
), f"Command failed: {result.exception}\nOutput: {result.output}"
71+
assert "Generating scenarios utilizing Vertex AI Eval SDK..." in result.output
72+
assert "added to eval set" in result.output

0 commit comments

Comments
 (0)