Skip to content

Commit 974f9d3

Browse files
ankursharmascopybara-github
authored andcommitted
fix: Re-adding eval related changes
Due to reasons that are being investigated, some of the recent changes got unintentionally reverted. We are adding those back in this PR. PiperOrigin-RevId: 789123312
1 parent 7d06fb7 commit 974f9d3

2 files changed

Lines changed: 239 additions & 78 deletions

File tree

src/google/adk/cli/cli_tools_click.py

Lines changed: 136 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,6 @@
3333
from . import cli_deploy
3434
from .. import version
3535
from ..evaluation.constants import MISSING_EVAL_DEPENDENCIES_MESSAGE
36-
from ..evaluation.local_eval_set_results_manager import LocalEvalSetResultsManager
37-
from ..sessions.in_memory_session_service import InMemorySessionService
3836
from .cli import run_cli
3937
from .fast_api import get_fast_api_app
4038
from .utils import envs
@@ -289,7 +287,7 @@ def cli_run(
289287
exists=True, dir_okay=True, file_okay=False, resolve_path=True
290288
),
291289
)
292-
@click.argument("eval_set_file_path", nargs=-1)
290+
@click.argument("eval_set_file_path_or_id", nargs=-1)
293291
@click.option("--config_file_path", help="Optional. The path to config file.")
294292
@click.option(
295293
"--print_detailed_results",
@@ -309,7 +307,7 @@ def cli_run(
309307
)
310308
def cli_eval(
311309
agent_module_file_path: str,
312-
eval_set_file_path: list[str],
310+
eval_set_file_path_or_id: list[str],
313311
config_file_path: str,
314312
print_detailed_results: bool,
315313
eval_storage_uri: Optional[str] = None,
@@ -319,38 +317,75 @@ def cli_eval(
319317
AGENT_MODULE_FILE_PATH: The path to the __init__.py file that contains a
320318
module by the name "agent". "agent" module contains a root_agent.
321319
322-
EVAL_SET_FILE_PATH: You can specify one or more eval set file paths.
320+
EVAL_SET_FILE_PATH_OR_ID: You can specify one or more eval set file paths or
321+
eval set id.
323322
323+
Mixing of eval set file paths with eval set ids is not allowed.
324+
325+
*Eval Set File Path*
324326
For each file, all evals will be run by default.
325327
326328
If you want to run only specific evals from a eval set, first create a comma
327329
separated list of eval names and then add that as a suffix to the eval set
328330
file name, demarcated by a `:`.
329331
330-
For example,
332+
For example, we have `sample_eval_set_file.json` file that has following the
333+
eval cases:
334+
sample_eval_set_file.json:
335+
|....... eval_1
336+
|....... eval_2
337+
|....... eval_3
338+
|....... eval_4
339+
|....... eval_5
331340
332341
sample_eval_set_file.json:eval_1,eval_2,eval_3
333342
334343
This will only run eval_1, eval_2 and eval_3 from sample_eval_set_file.json.
335344
345+
*Eval Set Id*
346+
For each eval set, all evals will be run by default.
347+
348+
If you want to run only specific evals from a eval set, first create a comma
349+
separated list of eval names and then add that as a suffix to the eval set
350+
file name, demarcated by a `:`.
351+
352+
For example, we have `sample_eval_set_id` that has following the eval cases:
353+
sample_eval_set_id:
354+
|....... eval_1
355+
|....... eval_2
356+
|....... eval_3
357+
|....... eval_4
358+
|....... eval_5
359+
360+
If we did:
361+
sample_eval_set_id:eval_1,eval_2,eval_3
362+
363+
This will only run eval_1, eval_2 and eval_3 from sample_eval_set_id.
364+
336365
CONFIG_FILE_PATH: The path to config file.
337366
338367
PRINT_DETAILED_RESULTS: Prints detailed results on the console.
339368
"""
340369
envs.load_dotenv_for_agent(agent_module_file_path, ".")
341370

342371
try:
372+
from ..evaluation.base_eval_service import InferenceConfig
373+
from ..evaluation.base_eval_service import InferenceRequest
374+
from ..evaluation.eval_metrics import EvalMetric
375+
from ..evaluation.eval_result import EvalCaseResult
376+
from ..evaluation.evaluator import EvalStatus
377+
from ..evaluation.in_memory_eval_sets_manager import InMemoryEvalSetsManager
378+
from ..evaluation.local_eval_service import LocalEvalService
379+
from ..evaluation.local_eval_set_results_manager import LocalEvalSetResultsManager
343380
from ..evaluation.local_eval_sets_manager import load_eval_set_from_file
344-
from .cli_eval import EvalCaseResult
345-
from .cli_eval import EvalMetric
346-
from .cli_eval import EvalStatus
381+
from ..evaluation.local_eval_sets_manager import LocalEvalSetsManager
382+
from .cli_eval import _collect_eval_results
383+
from .cli_eval import _collect_inferences
347384
from .cli_eval import get_evaluation_criteria_or_default
348385
from .cli_eval import get_root_agent
349386
from .cli_eval import parse_and_get_evals_to_run
350-
from .cli_eval import run_evals
351-
from .cli_eval import try_get_reset_func
352-
except ModuleNotFoundError:
353-
raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE)
387+
except ModuleNotFoundError as mnf:
388+
raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE) from mnf
354389

355390
evaluation_criteria = get_evaluation_criteria_or_default(config_file_path)
356391
eval_metrics = []
@@ -362,81 +397,104 @@ def cli_eval(
362397
print(f"Using evaluation criteria: {evaluation_criteria}")
363398

364399
root_agent = get_root_agent(agent_module_file_path)
365-
reset_func = try_get_reset_func(agent_module_file_path)
366-
367-
gcs_eval_sets_manager = None
400+
app_name = os.path.basename(agent_module_file_path)
401+
agents_dir = os.path.dirname(agent_module_file_path)
402+
eval_sets_manager = None
368403
eval_set_results_manager = None
404+
369405
if eval_storage_uri:
370406
gcs_eval_managers = evals.create_gcs_eval_managers_from_uri(
371407
eval_storage_uri
372408
)
373-
gcs_eval_sets_manager = gcs_eval_managers.eval_sets_manager
409+
eval_sets_manager = gcs_eval_managers.eval_sets_manager
374410
eval_set_results_manager = gcs_eval_managers.eval_set_results_manager
375411
else:
376-
eval_set_results_manager = LocalEvalSetResultsManager(
377-
agents_dir=os.path.dirname(agent_module_file_path)
378-
)
379-
eval_set_file_path_to_evals = parse_and_get_evals_to_run(eval_set_file_path)
380-
eval_set_id_to_eval_cases = {}
381-
382-
# Read the eval_set files and get the cases.
383-
for eval_set_file_path, eval_case_ids in eval_set_file_path_to_evals.items():
384-
if gcs_eval_sets_manager:
385-
eval_set = gcs_eval_sets_manager._load_eval_set_from_blob(
386-
eval_set_file_path
387-
)
388-
if not eval_set:
412+
eval_set_results_manager = LocalEvalSetResultsManager(agents_dir=agents_dir)
413+
414+
inference_requests = []
415+
eval_set_file_or_id_to_evals = parse_and_get_evals_to_run(
416+
eval_set_file_path_or_id
417+
)
418+
419+
# Check if the first entry is a file that exists, if it does then we assume
420+
# rest of the entries are also files. We enforce this assumption in the if
421+
# block.
422+
if eval_set_file_or_id_to_evals and os.path.exists(
423+
list(eval_set_file_or_id_to_evals.keys())[0]
424+
):
425+
eval_sets_manager = InMemoryEvalSetsManager()
426+
427+
# Read the eval_set files and get the cases.
428+
for (
429+
eval_set_file_path,
430+
eval_case_ids,
431+
) in eval_set_file_or_id_to_evals.items():
432+
try:
433+
eval_set = load_eval_set_from_file(
434+
eval_set_file_path, eval_set_file_path
435+
)
436+
except FileNotFoundError as fne:
389437
raise click.ClickException(
390-
f"Eval set {eval_set_file_path} not found in GCS."
438+
f"`{eval_set_file_path}` should be a valid eval set file."
439+
) from fne
440+
441+
eval_sets_manager.create_eval_set(
442+
app_name=app_name, eval_set_id=eval_set.eval_set_id
443+
)
444+
for eval_case in eval_set.eval_cases:
445+
eval_sets_manager.add_eval_case(
446+
app_name=app_name,
447+
eval_set_id=eval_set.eval_set_id,
448+
eval_case=eval_case,
391449
)
392-
else:
393-
eval_set = load_eval_set_from_file(eval_set_file_path, eval_set_file_path)
394-
eval_cases = eval_set.eval_cases
395-
396-
if eval_case_ids:
397-
# There are eval_ids that we should select.
398-
eval_cases = [
399-
e for e in eval_set.eval_cases if e.eval_id in eval_case_ids
400-
]
401-
402-
eval_set_id_to_eval_cases[eval_set.eval_set_id] = eval_cases
403-
404-
async def _collect_eval_results() -> list[EvalCaseResult]:
405-
session_service = InMemorySessionService()
406-
eval_case_results = []
407-
async for eval_case_result in run_evals(
408-
eval_set_id_to_eval_cases,
409-
root_agent,
410-
reset_func,
411-
eval_metrics,
412-
session_service=session_service,
413-
):
414-
eval_case_result.session_details = await session_service.get_session(
415-
app_name=os.path.basename(agent_module_file_path),
416-
user_id=eval_case_result.user_id,
417-
session_id=eval_case_result.session_id,
450+
inference_requests.append(
451+
InferenceRequest(
452+
app_name=app_name,
453+
eval_set_id=eval_set.eval_set_id,
454+
eval_case_ids=eval_case_ids,
455+
inference_config=InferenceConfig(),
456+
)
457+
)
458+
else:
459+
# We assume that what we have are eval set ids instead.
460+
eval_sets_manager = (
461+
eval_sets_manager
462+
if eval_storage_uri
463+
else LocalEvalSetsManager(agents_dir=agents_dir)
464+
)
465+
466+
for eval_set_id_key, eval_case_ids in eval_set_file_or_id_to_evals.items():
467+
inference_requests.append(
468+
InferenceRequest(
469+
app_name=app_name,
470+
eval_set_id=eval_set_id_key,
471+
eval_case_ids=eval_case_ids,
472+
inference_config=InferenceConfig(),
473+
)
418474
)
419-
eval_case_results.append(eval_case_result)
420-
return eval_case_results
421475

422476
try:
423-
eval_results = asyncio.run(_collect_eval_results())
424-
except ModuleNotFoundError:
425-
raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE)
426-
427-
# Write eval set results.
428-
eval_set_id_to_eval_results = collections.defaultdict(list)
429-
for eval_case_result in eval_results:
430-
eval_set_id = eval_case_result.eval_set_id
431-
eval_set_id_to_eval_results[eval_set_id].append(eval_case_result)
432-
433-
for eval_set_id, eval_case_results in eval_set_id_to_eval_results.items():
434-
eval_set_results_manager.save_eval_set_result(
435-
app_name=os.path.basename(agent_module_file_path),
436-
eval_set_id=eval_set_id,
437-
eval_case_results=eval_case_results,
477+
eval_service = LocalEvalService(
478+
root_agent=root_agent,
479+
eval_sets_manager=eval_sets_manager,
480+
eval_set_results_manager=eval_set_results_manager,
438481
)
439482

483+
inference_results = asyncio.run(
484+
_collect_inferences(
485+
inference_requests=inference_requests, eval_service=eval_service
486+
)
487+
)
488+
eval_results = asyncio.run(
489+
_collect_eval_results(
490+
inference_results=inference_results,
491+
eval_service=eval_service,
492+
eval_metrics=eval_metrics,
493+
)
494+
)
495+
except ModuleNotFoundError as mnf:
496+
raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE) from mnf
497+
440498
print("*********************************************************************")
441499
eval_run_summary = {}
442500

@@ -890,8 +948,10 @@ def cli_deploy_cloud_run(
890948
port: int,
891949
trace_to_cloud: bool,
892950
with_ui: bool,
893-
verbosity: str,
894951
adk_version: str,
952+
verbosity: str = "WARNING",
953+
reload: bool = True,
954+
allow_origins: Optional[list[str]] = None,
895955
log_level: Optional[str] = None,
896956
session_service_uri: Optional[str] = None,
897957
artifact_service_uri: Optional[str] = None,
@@ -923,6 +983,7 @@ def cli_deploy_cloud_run(
923983
temp_folder=temp_folder,
924984
port=port,
925985
trace_to_cloud=trace_to_cloud,
986+
allow_origins=allow_origins,
926987
with_ui=with_ui,
927988
log_level=log_level,
928989
verbosity=verbosity,

0 commit comments

Comments
 (0)