feat(eval): Add ADK CLI for eval_set generation

google-genai-bot · copybara-github · commit 9199189e5767 · 2026-04-07T09:43:35.000-07:00
PiperOrigin-RevId: 895946363
diff --git a/src/google/adk/cli/cli_tools_click.py b/src/google/adk/cli/cli_tools_click.py
@@ -1064,6 +1064,7 @@ def cli_optimize(
     from .cli_eval import _collect_eval_results
     from .cli_eval import _collect_inferences
     from .cli_eval import get_root_agent
+
   except ModuleNotFoundError as mnf:
     raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE) from mnf
 
@@ -1199,6 +1200,7 @@ def cli_add_eval_case(
     from ..evaluation.eval_case import EvalCase
     from ..evaluation.eval_case import SessionInput
     from .cli_eval import get_eval_sets_manager
+
   except ModuleNotFoundError as mnf:
     raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE) from mnf
 
@@ -1247,6 +1249,127 @@ def cli_add_eval_case(
     raise click.ClickException(f"Failed to add eval case(s): {e}") from e
 
 
+@eval_set.command("generate_eval_cases", cls=HelpfulCommand)
+@click.argument(
+    "agent_module_file_path",
+    type=click.Path(
+        exists=True, dir_okay=True, file_okay=False, resolve_path=True
+    ),
+)
+@click.argument("eval_set_id", type=str, required=True)
+@click.option(
+    "--user_simulation_config_file",
+    type=click.Path(
+        exists=True, dir_okay=False, file_okay=True, resolve_path=True
+    ),
+    help=(
+        "A path to file containing JSON serialized "
+        "UserScenarioGenerationConfig dict."
+    ),
+    required=True,
+)
+@eval_options()
+def cli_generate_eval_cases(
+    agent_module_file_path: str,
+    eval_set_id: str,
+    user_simulation_config_file: str,
+    eval_storage_uri: Optional[str] = None,
+    log_level: str = "INFO",
+):
+  """Generates eval cases dynamically and adds them to the given eval set.
+
+  Uses Vertex AI Eval SDK to generate conversation scenarios based on an
+  Agent's info and definitions. It will automatically create the empty eval_set
+  if it has not been created in advance.
+
+  Args:
+    agent_module_file_path: The path to the agent module file.
+    eval_set_id: The id of the eval set to generate cases for.
+    user_simulation_config_file: The path to the user simulation config file.
+    eval_storage_uri: The eval storage uri.
+    log_level: The log level.
+  """
+  logs.setup_adk_logger(getattr(logging, log_level.upper()))
+  try:
+    from ..evaluation._vertex_ai_scenario_generation_facade import ScenarioGenerator
+    from ..evaluation.conversation_scenarios import ConversationGenerationConfig
+    from ..evaluation.eval_case import EvalCase
+    from ..evaluation.eval_case import SessionInput
+    from .cli_eval import get_eval_sets_manager
+    from .cli_eval import get_root_agent
+    from .utils.state import create_empty_state
+
+  except ModuleNotFoundError as mnf:
+    raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE) from mnf
+
+  app_name = os.path.basename(agent_module_file_path)
+  agents_dir = os.path.dirname(agent_module_file_path)
+
+  try:
+    eval_sets_manager = get_eval_sets_manager(eval_storage_uri, agents_dir)
+    root_agent = get_root_agent(agent_module_file_path)
+
+    # Try to create if it doesn't already exist.
+    if (
+        eval_sets_manager.get_eval_set(
+            app_name=app_name, eval_set_id=eval_set_id
+        )
+        is None
+    ):
+      eval_sets_manager.create_eval_set(
+          app_name=app_name, eval_set_id=eval_set_id
+      )
+      click.echo(f"Eval set '{eval_set_id}' created for app '{app_name}'.")
+    else:
+      click.echo(f"Eval set '{eval_set_id}' already exists.")
+
+    with open(user_simulation_config_file, "r") as f:
+      config = ConversationGenerationConfig.model_validate_json(f.read())
+
+    generator = ScenarioGenerator()
+    click.echo("Generating scenarios utilizing Vertex AI Eval SDK...")
+    scenarios = generator.generate_scenarios(root_agent, config)
+
+    # TODO(pthodoroff): Expose initial session state when simulation library
+    # supports it.
+    initial_session_state = create_empty_state(root_agent)
+
+    session_input = SessionInput(
+        app_name=app_name, user_id="test_user_id", state=initial_session_state
+    )
+
+    for scenario in scenarios:
+      scenario_str = json.dumps(scenario.model_dump(), sort_keys=True)
+      eval_id = hashlib.sha256(scenario_str.encode("utf-8")).hexdigest()[:8]
+      eval_case = EvalCase(
+          eval_id=eval_id,
+          conversation_scenario=scenario,
+          session_input=session_input,
+          creation_timestamp=datetime.now().timestamp(),
+      )
+
+      if (
+          eval_sets_manager.get_eval_case(
+              app_name=app_name, eval_set_id=eval_set_id, eval_case_id=eval_id
+          )
+          is None
+      ):
+        eval_sets_manager.add_eval_case(
+            app_name=app_name, eval_set_id=eval_set_id, eval_case=eval_case
+        )
+        click.echo(
+            f"Eval case '{eval_case.eval_id}' added to eval set"
+            f" '{eval_set_id}'."
+        )
+      else:
+        click.echo(
+            f"Eval case '{eval_case.eval_id}' already exists in eval set"
+            f" '{eval_set_id}', skipped adding."
+        )
+  except Exception as e:
+    raise click.ClickException(f"Failed to generate eval case(s): {e}") from e
+
+
 def web_options():
   """Decorator to add web UI options to click commands."""
 
diff --git a/src/google/adk/evaluation/_vertex_ai_scenario_generation_facade.py b/src/google/adk/evaluation/_vertex_ai_scenario_generation_facade.py
@@ -0,0 +1,108 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Vertex AI Scenario Generation Facade."""
+
+from __future__ import annotations
+
+import logging
+import os
+
+from . import conversation_scenarios
+from ..agents import base_agent
+from ..dependencies.vertexai import vertexai
+
+types = vertexai.types
+
+
+logger = logging.getLogger("google_adk." + __name__)
+
+_ERROR_MESSAGE_SUFFIX = """
+You should specify both project id and location. This metric uses Vertex Gen AI
+Eval SDK, and it requires google cloud credentials.
+
+If using an .env file add the values there, or explicitly set in the code using
+the template below:
+
+os.environ['GOOGLE_CLOUD_LOCATION'] = <LOCATION>
+os.environ['GOOGLE_CLOUD_PROJECT'] = <PROJECT ID>
+"""
+
+
+class ScenarioGenerator:
+  """Facade for generating eval scenarios using Vertex Gen AI Eval SDK.
+
+  Using this class requires a GCP project. Please set GOOGLE_CLOUD_PROJECT and
+  GOOGLE_CLOUD_LOCATION in your .env file.
+  """
+
+  def __init__(self):
+    project_id = os.environ.get("GOOGLE_CLOUD_PROJECT")
+    location = os.environ.get("GOOGLE_CLOUD_LOCATION")
+    api_key = os.environ.get("GOOGLE_API_KEY")
+
+    if api_key:
+      self._client = vertexai.Client(api_key=api_key)
+    elif project_id or location:
+      if not project_id:
+        raise ValueError("Missing project id." + _ERROR_MESSAGE_SUFFIX)
+      if not location:
+        raise ValueError("Missing location." + _ERROR_MESSAGE_SUFFIX)
+      self._client = vertexai.Client(project=project_id, location=location)
+    else:
+      raise ValueError(
+          "Either API Key or Google cloud Project id and location should be"
+          " specified."
+      )
+
+  def generate_scenarios(
+      self,
+      agent: base_agent.BaseAgent,
+      config: conversation_scenarios.ConversationGenerationConfig,
+  ) -> list[conversation_scenarios.ConversationScenario]:
+    """Generates conversation scenarios for the specified agent.
+
+    Args:
+      agent: The root agent representing the system under test.
+      config: The configuration for ConversationGenerationConfig.
+
+    Returns:
+      A list of ADK ConversationScenario objects.
+    """
+    agent_info = types.evals.AgentInfo.load_from_agent(agent=agent)
+
+    vertex_config = types.evals.UserScenarioGenerationConfig(
+        count=config.count,
+        generation_instruction=config.generation_instruction,
+        environment_context=config.environment_context,
+        model_name=config.model_name,
+    )
+
+    eval_dataset = self._client.evals.generate_conversation_scenarios(
+        agent_info=agent_info,
+        config=vertex_config,
+    )
+
+    scenarios = []
+    for eval_case in eval_dataset.eval_cases:
+      if not eval_case.user_scenario:
+        continue
+      scenarios.append(
+          conversation_scenarios.ConversationScenario(
+              starting_prompt=eval_case.user_scenario.starting_prompt,
+              conversation_plan=eval_case.user_scenario.conversation_plan,
+          )
+      )
+
+    return scenarios
diff --git a/src/google/adk/evaluation/conversation_scenarios.py b/src/google/adk/evaluation/conversation_scenarios.py
@@ -75,3 +75,33 @@ class ConversationScenarios(EvalBaseModel):
   scenarios: list[ConversationScenario] = Field(
       default_factory=list, description="""A list of ConversationScenario."""
   )
+
+
+class ConversationGenerationConfig(EvalBaseModel):
+  """Configuration for generating conversation scenarios."""
+
+  count: int = Field(
+      description="The number of conversation scenarios to generate."
+  )
+  generation_instruction: Optional[str] = Field(
+      default=None,
+      description=(
+          "Optional natural language goal to guide the EvalSet generation."
+      ),
+  )
+  environment_context: Optional[str] = Field(
+      default=None,
+      description=(
+          "Context describing the backend data or state accessible to the"
+          " agent's tools. This acts as the 'ground truth' for the simulation,"
+          " ensuring generated queries reference data that actually exists"
+          " (e.g., a list of available models so the generator knows what the"
+          " 'get_model_available' tool will return)."
+      ),
+  )
+  model_name: str = Field(
+      description=(
+          "The name of the Gemini model to use for generating the scenarios"
+          " (e.g., 'gemini-2.5-flash')."
+      )
+  )
diff --git a/tests/integration/test_generate_eval_cases_cli.py b/tests/integration/test_generate_eval_cases_cli.py
@@ -0,0 +1,72 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for generate_eval_cases CLI command."""
+
+import json
+import os
+import pathlib
+
+from click.testing import CliRunner
+# We must mock or import the command safely for pytest
+from google.adk.cli.cli_tools_click import cli_generate_eval_cases
+import pytest
+
+
+def test_cli_generate_eval_cases_integration(tmp_path):
+  """E2E Test for the Vertex AI Scenario Generation Facade via the CLI."""
+  # This requires identical project setup to Kokoro's e2e_test_gcp_ubuntu_docker
+  if not os.environ.get("GOOGLE_CLOUD_PROJECT"):
+    pytest.skip(
+        "GOOGLE_CLOUD_PROJECT is not set. Skipping generation CLI integration"
+        " test."
+    )
+
+  # 1. Provide a UserSimulationConfig proxy
+  config_file = tmp_path / "user_sim_config.json"
+  config_data = {
+      "generation_instruction": (
+          "Generate a test conversation scenario where the user asks a simple"
+          " question."
+      ),
+      "count": 1,
+      "model_name": "gemini-2.5-flash",
+  }
+  with open(config_file, "w") as f:
+    json.dump(config_data, f)
+
+  eval_set_id = "cli_gen_test_eval_set"
+
+  # 2. Invoke the command via click's testing runner
+  runner = CliRunner()
+  result = runner.invoke(
+      cli_generate_eval_cases,
+      [
+          str(
+              pathlib.Path(__file__).parent
+              / "fixture"
+              / "home_automation_agent"
+          ),
+          eval_set_id,
+          f"--user_simulation_config_file={config_file}",
+          "--log_level=DEBUG",
+      ],
+  )
+
+  # 3. Assert correct output
+  assert (
+      result.exit_code == 0
+  ), f"Command failed: {result.exception}\nOutput: {result.output}"
+  assert "Generating scenarios utilizing Vertex AI Eval SDK..." in result.output
+  assert "added to eval set" in result.output
diff --git a/tests/unittests/evaluation/test_vertex_ai_scenario_generation_facade.py b/tests/unittests/evaluation/test_vertex_ai_scenario_generation_facade.py