@@ -122,15 +122,19 @@ def mock_api_client_fixture():
122122
123123@pytest .fixture
124124def mock_eval_dependencies (mock_api_client_fixture ):
125- with mock .patch ("google.cloud.storage.Client" ) as mock_storage_client , mock .patch (
126- "google.cloud.bigquery.Client"
127- ) as mock_bq_client , mock .patch (
128- "vertexai._genai.evals.Evals.evaluate_instances"
129- ) as mock_evaluate_instances , mock .patch (
130- "vertexai._genai._gcs_utils.GcsUtils.upload_json_to_prefix"
131- ) as mock_upload_to_gcs , mock .patch (
132- "vertexai._genai._evals_metric_loaders.LazyLoadedPrebuiltMetric._fetch_and_parse"
133- ) as mock_fetch_prebuilt_metric :
125+ with (
126+ mock .patch ("google.cloud.storage.Client" ) as mock_storage_client ,
127+ mock .patch ("google.cloud.bigquery.Client" ) as mock_bq_client ,
128+ mock .patch (
129+ "vertexai._genai.evals.Evals.evaluate_instances"
130+ ) as mock_evaluate_instances ,
131+ mock .patch (
132+ "vertexai._genai._gcs_utils.GcsUtils.upload_json_to_prefix"
133+ ) as mock_upload_to_gcs ,
134+ mock .patch (
135+ "vertexai._genai._evals_metric_loaders.LazyLoadedPrebuiltMetric._fetch_and_parse"
136+ ) as mock_fetch_prebuilt_metric ,
137+ ):
134138
135139 def mock_evaluate_instances_side_effect (* args , ** kwargs ):
136140 metric_config = kwargs .get ("metric_config" , {})
@@ -3386,14 +3390,8 @@ def test_run_inference_with_agent_engine_falls_back_to_managed_sessions_api(
33863390 assert inference_result .candidate_name == "agent_engine_0"
33873391
33883392 @mock .patch .object (_evals_utils , "EvalDatasetLoader" )
3389- @mock .patch ("vertexai._genai._evals_common.InMemorySessionService" ) # fmt: skip
3390- @mock .patch ("vertexai._genai._evals_common.Runner" )
3391- @mock .patch ("vertexai._genai._evals_common.LlmAgent" )
33923393 def test_run_inference_with_local_agent (
33933394 self ,
3394- mock_llm_agent ,
3395- mock_runner ,
3396- mock_session_service ,
33973395 mock_eval_dataset_loader ,
33983396 ):
33993397 mock_df = pd .DataFrame (
@@ -3421,8 +3419,15 @@ def test_run_inference_with_local_agent(
34213419 mock_agent_instance .instruction = "mock instruction"
34223420 mock_agent_instance .tools = []
34233421 mock_agent_instance .sub_agents = []
3424- mock_llm_agent .return_value = mock_agent_instance
3422+
3423+ # Mock ADK modules for lazy imports in _execute_local_agent_run_with_retry_async
3424+ mock_session_service = mock .MagicMock ()
34253425 mock_session_service .return_value .create_session = mock .AsyncMock ()
3426+ mock_runner = mock .MagicMock ()
3427+ mock_adk_sessions_module = mock .MagicMock ()
3428+ mock_adk_sessions_module .InMemorySessionService = mock_session_service
3429+ mock_adk_runners_module = mock .MagicMock ()
3430+ mock_adk_runners_module .Runner = mock_runner
34263431 mock_runner_instance = mock_runner .return_value
34273432 stream_run_return_value_1 = [
34283433 mock .Mock (
@@ -3473,10 +3478,19 @@ def run_async_side_effect(*args, **kwargs):
34733478
34743479 mock_runner_instance .run_async .side_effect = run_async_side_effect
34753480
3476- inference_result = self .client .evals .run_inference (
3477- agent = mock_agent_instance ,
3478- src = mock_df ,
3479- )
3481+ with mock .patch .dict (
3482+ sys .modules ,
3483+ {
3484+ "google.adk" : mock .MagicMock (),
3485+ "google.adk.sessions" : mock_adk_sessions_module ,
3486+ "google.adk.runners" : mock_adk_runners_module ,
3487+ "google.adk.agents" : mock .MagicMock (),
3488+ },
3489+ ):
3490+ inference_result = self .client .evals .run_inference (
3491+ agent = mock_agent_instance ,
3492+ src = mock_df ,
3493+ )
34803494
34813495 mock_eval_dataset_loader .return_value .load .assert_called_once_with (mock_df )
34823496 assert mock_session_service .call_count == 2
@@ -3602,11 +3616,14 @@ def test_run_inference_with_litellm_string_prompt_format(
36023616 mock_api_client_fixture ,
36033617 ):
36043618 """Tests inference with LiteLLM using a simple prompt string."""
3605- with mock .patch (
3606- "vertexai._genai._evals_common.litellm"
3607- ) as mock_litellm , mock .patch (
3608- "vertexai._genai._evals_common._call_litellm_completion"
3609- ) as mock_call_litellm_completion :
3619+ with (
3620+ mock .patch (
3621+ "vertexai._genai._evals_common.litellm"
3622+ ) as mock_litellm ,
3623+ mock .patch (
3624+ "vertexai._genai._evals_common._call_litellm_completion"
3625+ ) as mock_call_litellm_completion ,
3626+ ):
36103627 mock_litellm .utils .get_valid_models .return_value = ["gpt-4o" ]
36113628 prompt_df = pd .DataFrame ([{"prompt" : "What is LiteLLM?" }])
36123629 expected_messages = [{"role" : "user" , "content" : "What is LiteLLM?" }]
@@ -3658,11 +3675,14 @@ def test_run_inference_with_litellm_openai_request_format(
36583675 mock_api_client_fixture ,
36593676 ):
36603677 """Tests inference with LiteLLM where the row contains a chat completion request body."""
3661- with mock .patch (
3662- "vertexai._genai._evals_common.litellm"
3663- ) as mock_litellm , mock .patch (
3664- "vertexai._genai._evals_common._call_litellm_completion"
3665- ) as mock_call_litellm_completion :
3678+ with (
3679+ mock .patch (
3680+ "vertexai._genai._evals_common.litellm"
3681+ ) as mock_litellm ,
3682+ mock .patch (
3683+ "vertexai._genai._evals_common._call_litellm_completion"
3684+ ) as mock_call_litellm_completion ,
3685+ ):
36663686 mock_litellm .utils .get_valid_models .return_value = ["gpt-4o" ]
36673687 prompt_df = pd .DataFrame (
36683688 [
@@ -4178,21 +4198,23 @@ def test_run_agent_internal_multi_turn_with_agent(self, mock_run_agent):
41784198 ]
41794199 assert "mock_agent" in agent_data ["agents" ]
41804200
4181- @mock .patch ("vertexai._genai._evals_common.ADK_SessionInput" ) # fmt: skip
4182- @mock .patch ("vertexai._genai._evals_common.EvaluationGenerator" ) # fmt: skip
4183- @mock .patch ("vertexai._genai._evals_common.LlmBackedUserSimulator" ) # fmt: skip
4184- @mock .patch ("vertexai._genai._evals_common.ConversationScenario" ) # fmt: skip
4185- @mock .patch ("vertexai._genai._evals_common.LlmBackedUserSimulatorConfig" ) # fmt: skip
41864201 @pytest .mark .asyncio
4187- async def test_run_adk_user_simulation_with_intermediate_events (
4188- self ,
4189- mock_config ,
4190- mock_scenario ,
4191- mock_simulator ,
4192- mock_generator ,
4193- mock_session_input ,
4194- ):
4202+ async def test_run_adk_user_simulation_with_intermediate_events (self ):
41954203 """Tests that intermediate invocation events (e.g. tool calls) are parsed successfully."""
4204+ mock_scenario = mock .MagicMock ()
4205+ mock_config = mock .MagicMock ()
4206+ mock_simulator = mock .MagicMock ()
4207+ mock_generator = mock .MagicMock ()
4208+ mock_session_input = mock .MagicMock ()
4209+ mock_adk_eval_scenarios = mock .MagicMock ()
4210+ mock_adk_eval_scenarios .ConversationScenario = mock_scenario
4211+ mock_adk_eval_case = mock .MagicMock ()
4212+ mock_adk_eval_case .SessionInput = mock_session_input
4213+ mock_adk_eval_generator = mock .MagicMock ()
4214+ mock_adk_eval_generator .EvaluationGenerator = mock_generator
4215+ mock_adk_simulator_module = mock .MagicMock ()
4216+ mock_adk_simulator_module .LlmBackedUserSimulator = mock_simulator
4217+ mock_adk_simulator_module .LlmBackedUserSimulatorConfig = mock_config
41964218 row = pd .Series (
41974219 {
41984220 "starting_prompt" : "I want a laptop." ,
@@ -4245,7 +4267,19 @@ async def test_run_adk_user_simulation_with_intermediate_events(
42454267 mock_generator ._generate_inferences_from_root_agent = mock .AsyncMock (
42464268 return_value = [mock_invocation ]
42474269 )
4248- turns = await _evals_common ._run_adk_user_simulation (row , mock_agent )
4270+ with mock .patch .dict (
4271+ sys .modules ,
4272+ {
4273+ "google.adk" : mock .MagicMock (),
4274+ "google.adk.evaluation" : mock .MagicMock (),
4275+ "google.adk.evaluation.conversation_scenarios" : mock_adk_eval_scenarios ,
4276+ "google.adk.evaluation.eval_case" : mock_adk_eval_case ,
4277+ "google.adk.evaluation.evaluation_generator" : mock_adk_eval_generator ,
4278+ "google.adk.evaluation.simulation" : mock .MagicMock (),
4279+ "google.adk.evaluation.simulation.llm_backed_user_simulator" : mock_adk_simulator_module ,
4280+ },
4281+ ):
4282+ turns = await _evals_common ._run_adk_user_simulation (row , mock_agent )
42494283
42504284 assert len (turns ) == 1
42514285 turn = turns [0 ]
@@ -7086,20 +7120,50 @@ def test_build_request_payload_tool_use_quality_v1_with_agent_data_tool_call(
70867120class TestRunAdkUserSimulation :
70877121 """Unit tests for the _run_adk_user_simulation function."""
70887122
7089- @mock .patch ("vertexai._genai._evals_common.ADK_SessionInput" ) # fmt: skip
7090- @mock .patch ("vertexai._genai._evals_common.EvaluationGenerator" ) # fmt: skip
7091- @mock .patch ("vertexai._genai._evals_common.LlmBackedUserSimulator" ) # fmt: skip
7092- @mock .patch ("vertexai._genai._evals_common.ConversationScenario" ) # fmt: skip
7093- @mock .patch ("vertexai._genai._evals_common.LlmBackedUserSimulatorConfig" ) # fmt: skip
7123+ def _build_adk_mock_modules (self ):
7124+ """Builds mock ADK modules for lazy imports in _run_adk_user_simulation."""
7125+ mock_scenario_cls = mock .MagicMock ()
7126+ mock_config_cls = mock .MagicMock ()
7127+ mock_simulator_cls = mock .MagicMock ()
7128+ mock_generator_cls = mock .MagicMock ()
7129+ mock_session_input_cls = mock .MagicMock ()
7130+ mock_modules = {
7131+ "google.adk" : mock .MagicMock (),
7132+ "google.adk.evaluation" : mock .MagicMock (),
7133+ "google.adk.evaluation.conversation_scenarios" : mock .MagicMock (
7134+ ConversationScenario = mock_scenario_cls
7135+ ),
7136+ "google.adk.evaluation.eval_case" : mock .MagicMock (
7137+ SessionInput = mock_session_input_cls
7138+ ),
7139+ "google.adk.evaluation.evaluation_generator" : mock .MagicMock (
7140+ EvaluationGenerator = mock_generator_cls
7141+ ),
7142+ "google.adk.evaluation.simulation" : mock .MagicMock (),
7143+ "google.adk.evaluation.simulation.llm_backed_user_simulator" : mock .MagicMock (
7144+ LlmBackedUserSimulator = mock_simulator_cls ,
7145+ LlmBackedUserSimulatorConfig = mock_config_cls ,
7146+ ),
7147+ }
7148+ return (
7149+ mock_modules ,
7150+ mock_scenario_cls ,
7151+ mock_config_cls ,
7152+ mock_simulator_cls ,
7153+ mock_generator_cls ,
7154+ mock_session_input_cls ,
7155+ )
7156+
70947157 @pytest .mark .asyncio
7095- async def test_run_adk_user_simulation_success (
7096- self ,
7097- mock_config_cls ,
7098- mock_scenario_cls ,
7099- mock_simulator_cls ,
7100- mock_generator_cls ,
7101- mock_session_input_cls ,
7102- ):
7158+ async def test_run_adk_user_simulation_success (self ):
7159+ (
7160+ mock_modules ,
7161+ mock_scenario_cls ,
7162+ _ ,
7163+ _ ,
7164+ mock_generator_cls ,
7165+ mock_session_input_cls ,
7166+ ) = self ._build_adk_mock_modules ()
71037167 row = pd .Series (
71047168 {
71057169 "starting_prompt" : "start" ,
@@ -7119,7 +7183,8 @@ async def test_run_adk_user_simulation_success(
71197183 return_value = [mock_invocation ]
71207184 )
71217185
7122- turns = await _evals_common ._run_adk_user_simulation (row , mock_agent )
7186+ with mock .patch .dict (sys .modules , mock_modules ):
7187+ turns = await _evals_common ._run_adk_user_simulation (row , mock_agent )
71237188
71247189 assert len (turns ) == 1
71257190 turn = turns [0 ]
@@ -7138,40 +7203,26 @@ async def test_run_adk_user_simulation_success(
71387203 )
71397204 mock_session_input_cls .assert_called_once ()
71407205
7141- @mock .patch ("vertexai._genai._evals_common.ADK_SessionInput" ) # fmt: skip
7142- @mock .patch ("vertexai._genai._evals_common.EvaluationGenerator" ) # fmt: skip
7143- @mock .patch ("vertexai._genai._evals_common.LlmBackedUserSimulator" ) # fmt: skip
7144- @mock .patch ("vertexai._genai._evals_common.ConversationScenario" ) # fmt: skip
7145- @mock .patch ("vertexai._genai._evals_common.LlmBackedUserSimulatorConfig" ) # fmt: skip
71467206 @pytest .mark .asyncio
7147- async def test_run_adk_user_simulation_missing_columns (
7148- self ,
7149- mock_config_cls ,
7150- mock_scenario_cls ,
7151- mock_simulator_cls ,
7152- mock_generator_cls ,
7153- mock_session_input_cls ,
7154- ):
7207+ async def test_run_adk_user_simulation_missing_columns (self ):
7208+ mock_modules , _ , _ , _ , _ , _ = self ._build_adk_mock_modules ()
71557209 row = pd .Series ({"conversation_plan" : "plan" })
71567210 mock_agent = mock .Mock ()
71577211
7158- with pytest .raises (ValueError , match = "User simulation requires" ):
7159- await _evals_common ._run_adk_user_simulation (row , mock_agent )
7212+ with mock .patch .dict (sys .modules , mock_modules ):
7213+ with pytest .raises (ValueError , match = "User simulation requires" ):
7214+ await _evals_common ._run_adk_user_simulation (row , mock_agent )
71607215
7161- @mock .patch ("vertexai._genai._evals_common.ADK_SessionInput" ) # fmt: skip
7162- @mock .patch ("vertexai._genai._evals_common.EvaluationGenerator" ) # fmt: skip
7163- @mock .patch ("vertexai._genai._evals_common.LlmBackedUserSimulator" ) # fmt: skip
7164- @mock .patch ("vertexai._genai._evals_common.ConversationScenario" ) # fmt: skip
7165- @mock .patch ("vertexai._genai._evals_common.LlmBackedUserSimulatorConfig" ) # fmt: skip
71667216 @pytest .mark .asyncio
7167- async def test_run_adk_user_simulation_missing_session_inputs (
7168- self ,
7169- mock_config_cls ,
7170- mock_scenario_cls ,
7171- mock_simulator_cls ,
7172- mock_generator_cls ,
7173- mock_session_input_cls ,
7174- ):
7217+ async def test_run_adk_user_simulation_missing_session_inputs (self ):
7218+ (
7219+ mock_modules ,
7220+ mock_scenario_cls ,
7221+ _ ,
7222+ _ ,
7223+ mock_generator_cls ,
7224+ mock_session_input_cls ,
7225+ ) = self ._build_adk_mock_modules ()
71757226 row = pd .Series (
71767227 {
71777228 "starting_prompt" : "start" ,
@@ -7190,7 +7241,8 @@ async def test_run_adk_user_simulation_missing_session_inputs(
71907241 return_value = [mock_invocation ]
71917242 )
71927243
7193- await _evals_common ._run_adk_user_simulation (row , mock_agent )
7244+ with mock .patch .dict (sys .modules , mock_modules ):
7245+ await _evals_common ._run_adk_user_simulation (row , mock_agent )
71947246
71957247 mock_scenario_cls .assert_called_once_with (
71967248 starting_prompt = "start" ,
0 commit comments