3333from . import cli_deploy
3434from .. import version
3535from ..evaluation .constants import MISSING_EVAL_DEPENDENCIES_MESSAGE
36- from ..evaluation .local_eval_set_results_manager import LocalEvalSetResultsManager
37- from ..sessions .in_memory_session_service import InMemorySessionService
3836from .cli import run_cli
3937from .fast_api import get_fast_api_app
4038from .utils import envs
@@ -289,7 +287,7 @@ def cli_run(
289287 exists = True , dir_okay = True , file_okay = False , resolve_path = True
290288 ),
291289)
292- @click .argument ("eval_set_file_path " , nargs = - 1 )
290+ @click .argument ("eval_set_file_path_or_id " , nargs = - 1 )
293291@click .option ("--config_file_path" , help = "Optional. The path to config file." )
294292@click .option (
295293 "--print_detailed_results" ,
@@ -309,7 +307,7 @@ def cli_run(
309307)
310308def cli_eval (
311309 agent_module_file_path : str ,
312- eval_set_file_path : list [str ],
310+ eval_set_file_path_or_id : list [str ],
313311 config_file_path : str ,
314312 print_detailed_results : bool ,
315313 eval_storage_uri : Optional [str ] = None ,
@@ -319,38 +317,75 @@ def cli_eval(
319317 AGENT_MODULE_FILE_PATH: The path to the __init__.py file that contains a
320318 module by the name "agent". "agent" module contains a root_agent.
321319
322- EVAL_SET_FILE_PATH: You can specify one or more eval set file paths.
320+ EVAL_SET_FILE_PATH_OR_ID: You can specify one or more eval set file paths or
321+ eval set id.
323322
323+ Mixing of eval set file paths with eval set ids is not allowed.
324+
325+ *Eval Set File Path*
324326 For each file, all evals will be run by default.
325327
326328 If you want to run only specific evals from a eval set, first create a comma
327329 separated list of eval names and then add that as a suffix to the eval set
328330 file name, demarcated by a `:`.
329331
330- For example,
332+ For example, we have `sample_eval_set_file.json` file that has following the
333+ eval cases:
334+ sample_eval_set_file.json:
335+ |....... eval_1
336+ |....... eval_2
337+ |....... eval_3
338+ |....... eval_4
339+ |....... eval_5
331340
332341 sample_eval_set_file.json:eval_1,eval_2,eval_3
333342
334343 This will only run eval_1, eval_2 and eval_3 from sample_eval_set_file.json.
335344
345+ *Eval Set Id*
346+ For each eval set, all evals will be run by default.
347+
348+ If you want to run only specific evals from a eval set, first create a comma
349+ separated list of eval names and then add that as a suffix to the eval set
350+ file name, demarcated by a `:`.
351+
352+ For example, we have `sample_eval_set_id` that has following the eval cases:
353+ sample_eval_set_id:
354+ |....... eval_1
355+ |....... eval_2
356+ |....... eval_3
357+ |....... eval_4
358+ |....... eval_5
359+
360+ If we did:
361+ sample_eval_set_id:eval_1,eval_2,eval_3
362+
363+ This will only run eval_1, eval_2 and eval_3 from sample_eval_set_id.
364+
336365 CONFIG_FILE_PATH: The path to config file.
337366
338367 PRINT_DETAILED_RESULTS: Prints detailed results on the console.
339368 """
340369 envs .load_dotenv_for_agent (agent_module_file_path , "." )
341370
342371 try :
372+ from ..evaluation .base_eval_service import InferenceConfig
373+ from ..evaluation .base_eval_service import InferenceRequest
374+ from ..evaluation .eval_metrics import EvalMetric
375+ from ..evaluation .eval_result import EvalCaseResult
376+ from ..evaluation .evaluator import EvalStatus
377+ from ..evaluation .in_memory_eval_sets_manager import InMemoryEvalSetsManager
378+ from ..evaluation .local_eval_service import LocalEvalService
379+ from ..evaluation .local_eval_set_results_manager import LocalEvalSetResultsManager
343380 from ..evaluation .local_eval_sets_manager import load_eval_set_from_file
344- from .cli_eval import EvalCaseResult
345- from .cli_eval import EvalMetric
346- from .cli_eval import EvalStatus
381+ from .. evaluation . local_eval_sets_manager import LocalEvalSetsManager
382+ from .cli_eval import _collect_eval_results
383+ from .cli_eval import _collect_inferences
347384 from .cli_eval import get_evaluation_criteria_or_default
348385 from .cli_eval import get_root_agent
349386 from .cli_eval import parse_and_get_evals_to_run
350- from .cli_eval import run_evals
351- from .cli_eval import try_get_reset_func
352- except ModuleNotFoundError :
353- raise click .ClickException (MISSING_EVAL_DEPENDENCIES_MESSAGE )
387+ except ModuleNotFoundError as mnf :
388+ raise click .ClickException (MISSING_EVAL_DEPENDENCIES_MESSAGE ) from mnf
354389
355390 evaluation_criteria = get_evaluation_criteria_or_default (config_file_path )
356391 eval_metrics = []
@@ -362,81 +397,104 @@ def cli_eval(
362397 print (f"Using evaluation criteria: { evaluation_criteria } " )
363398
364399 root_agent = get_root_agent (agent_module_file_path )
365- reset_func = try_get_reset_func (agent_module_file_path )
366-
367- gcs_eval_sets_manager = None
400+ app_name = os . path . basename (agent_module_file_path )
401+ agents_dir = os . path . dirname ( agent_module_file_path )
402+ eval_sets_manager = None
368403 eval_set_results_manager = None
404+
369405 if eval_storage_uri :
370406 gcs_eval_managers = evals .create_gcs_eval_managers_from_uri (
371407 eval_storage_uri
372408 )
373- gcs_eval_sets_manager = gcs_eval_managers .eval_sets_manager
409+ eval_sets_manager = gcs_eval_managers .eval_sets_manager
374410 eval_set_results_manager = gcs_eval_managers .eval_set_results_manager
375411 else :
376- eval_set_results_manager = LocalEvalSetResultsManager (
377- agents_dir = os .path .dirname (agent_module_file_path )
378- )
379- eval_set_file_path_to_evals = parse_and_get_evals_to_run (eval_set_file_path )
380- eval_set_id_to_eval_cases = {}
381-
382- # Read the eval_set files and get the cases.
383- for eval_set_file_path , eval_case_ids in eval_set_file_path_to_evals .items ():
384- if gcs_eval_sets_manager :
385- eval_set = gcs_eval_sets_manager ._load_eval_set_from_blob (
386- eval_set_file_path
387- )
388- if not eval_set :
412+ eval_set_results_manager = LocalEvalSetResultsManager (agents_dir = agents_dir )
413+
414+ inference_requests = []
415+ eval_set_file_or_id_to_evals = parse_and_get_evals_to_run (
416+ eval_set_file_path_or_id
417+ )
418+
419+ # Check if the first entry is a file that exists, if it does then we assume
420+ # rest of the entries are also files. We enforce this assumption in the if
421+ # block.
422+ if eval_set_file_or_id_to_evals and os .path .exists (
423+ list (eval_set_file_or_id_to_evals .keys ())[0 ]
424+ ):
425+ eval_sets_manager = InMemoryEvalSetsManager ()
426+
427+ # Read the eval_set files and get the cases.
428+ for (
429+ eval_set_file_path ,
430+ eval_case_ids ,
431+ ) in eval_set_file_or_id_to_evals .items ():
432+ try :
433+ eval_set = load_eval_set_from_file (
434+ eval_set_file_path , eval_set_file_path
435+ )
436+ except FileNotFoundError as fne :
389437 raise click .ClickException (
390- f"Eval set { eval_set_file_path } not found in GCS."
438+ f"`{ eval_set_file_path } ` should be a valid eval set file."
439+ ) from fne
440+
441+ eval_sets_manager .create_eval_set (
442+ app_name = app_name , eval_set_id = eval_set .eval_set_id
443+ )
444+ for eval_case in eval_set .eval_cases :
445+ eval_sets_manager .add_eval_case (
446+ app_name = app_name ,
447+ eval_set_id = eval_set .eval_set_id ,
448+ eval_case = eval_case ,
391449 )
392- else :
393- eval_set = load_eval_set_from_file (eval_set_file_path , eval_set_file_path )
394- eval_cases = eval_set .eval_cases
395-
396- if eval_case_ids :
397- # There are eval_ids that we should select.
398- eval_cases = [
399- e for e in eval_set .eval_cases if e .eval_id in eval_case_ids
400- ]
401-
402- eval_set_id_to_eval_cases [eval_set .eval_set_id ] = eval_cases
403-
404- async def _collect_eval_results () -> list [EvalCaseResult ]:
405- session_service = InMemorySessionService ()
406- eval_case_results = []
407- async for eval_case_result in run_evals (
408- eval_set_id_to_eval_cases ,
409- root_agent ,
410- reset_func ,
411- eval_metrics ,
412- session_service = session_service ,
413- ):
414- eval_case_result .session_details = await session_service .get_session (
415- app_name = os .path .basename (agent_module_file_path ),
416- user_id = eval_case_result .user_id ,
417- session_id = eval_case_result .session_id ,
450+ inference_requests .append (
451+ InferenceRequest (
452+ app_name = app_name ,
453+ eval_set_id = eval_set .eval_set_id ,
454+ eval_case_ids = eval_case_ids ,
455+ inference_config = InferenceConfig (),
456+ )
457+ )
458+ else :
459+ # We assume that what we have are eval set ids instead.
460+ eval_sets_manager = (
461+ eval_sets_manager
462+ if eval_storage_uri
463+ else LocalEvalSetsManager (agents_dir = agents_dir )
464+ )
465+
466+ for eval_set_id_key , eval_case_ids in eval_set_file_or_id_to_evals .items ():
467+ inference_requests .append (
468+ InferenceRequest (
469+ app_name = app_name ,
470+ eval_set_id = eval_set_id_key ,
471+ eval_case_ids = eval_case_ids ,
472+ inference_config = InferenceConfig (),
473+ )
418474 )
419- eval_case_results .append (eval_case_result )
420- return eval_case_results
421475
422476 try :
423- eval_results = asyncio .run (_collect_eval_results ())
424- except ModuleNotFoundError :
425- raise click .ClickException (MISSING_EVAL_DEPENDENCIES_MESSAGE )
426-
427- # Write eval set results.
428- eval_set_id_to_eval_results = collections .defaultdict (list )
429- for eval_case_result in eval_results :
430- eval_set_id = eval_case_result .eval_set_id
431- eval_set_id_to_eval_results [eval_set_id ].append (eval_case_result )
432-
433- for eval_set_id , eval_case_results in eval_set_id_to_eval_results .items ():
434- eval_set_results_manager .save_eval_set_result (
435- app_name = os .path .basename (agent_module_file_path ),
436- eval_set_id = eval_set_id ,
437- eval_case_results = eval_case_results ,
477+ eval_service = LocalEvalService (
478+ root_agent = root_agent ,
479+ eval_sets_manager = eval_sets_manager ,
480+ eval_set_results_manager = eval_set_results_manager ,
438481 )
439482
483+ inference_results = asyncio .run (
484+ _collect_inferences (
485+ inference_requests = inference_requests , eval_service = eval_service
486+ )
487+ )
488+ eval_results = asyncio .run (
489+ _collect_eval_results (
490+ inference_results = inference_results ,
491+ eval_service = eval_service ,
492+ eval_metrics = eval_metrics ,
493+ )
494+ )
495+ except ModuleNotFoundError as mnf :
496+ raise click .ClickException (MISSING_EVAL_DEPENDENCIES_MESSAGE ) from mnf
497+
440498 print ("*********************************************************************" )
441499 eval_run_summary = {}
442500
@@ -890,8 +948,10 @@ def cli_deploy_cloud_run(
890948 port : int ,
891949 trace_to_cloud : bool ,
892950 with_ui : bool ,
893- verbosity : str ,
894951 adk_version : str ,
952+ verbosity : str = "WARNING" ,
953+ reload : bool = True ,
954+ allow_origins : Optional [list [str ]] = None ,
895955 log_level : Optional [str ] = None ,
896956 session_service_uri : Optional [str ] = None ,
897957 artifact_service_uri : Optional [str ] = None ,
@@ -923,6 +983,7 @@ def cli_deploy_cloud_run(
923983 temp_folder = temp_folder ,
924984 port = port ,
925985 trace_to_cloud = trace_to_cloud ,
986+ allow_origins = allow_origins ,
926987 with_ui = with_ui ,
927988 log_level = log_level ,
928989 verbosity = verbosity ,
0 commit comments