@@ -1886,9 +1886,123 @@ def test_run_agent_internal_error_response(self, mock_run_agent):
18861886
18871887 assert "response" in result_df .columns
18881888 response_content = result_df ["response" ][0 ]
1889- assert "Unexpected response type from agent run" in response_content
1889+ assert "agent run failed " in response_content
18901890 assert not result_df ["intermediate_events" ][0 ]
18911891
1892+ @mock .patch .object (_evals_common , "_run_agent" )
1893+ def test_run_agent_internal_multi_turn_success (self , mock_run_agent ):
1894+ mock_run_agent .return_value = [
1895+ [
1896+ {"turn_index" : 0 , "turn_id" : "t1" , "events" : []},
1897+ {"turn_index" : 1 , "turn_id" : "t2" , "events" : []},
1898+ ]
1899+ ]
1900+ prompt_dataset = pd .DataFrame ({"prompt" : ["p1" ], "conversation_plan" : ["plan" ]})
1901+ mock_agent_engine = mock .Mock ()
1902+ mock_api_client = mock .Mock ()
1903+ result_df = _evals_common ._run_agent_internal (
1904+ api_client = mock_api_client ,
1905+ agent_engine = mock_agent_engine ,
1906+ agent = None ,
1907+ prompt_dataset = prompt_dataset ,
1908+ )
1909+
1910+ assert "agent_data" in result_df .columns
1911+ agent_data = result_df ["agent_data" ][0 ]
1912+ assert agent_data ["turns" ] == [
1913+ {"turn_index" : 0 , "turn_id" : "t1" , "events" : []},
1914+ {"turn_index" : 1 , "turn_id" : "t2" , "events" : []},
1915+ ]
1916+
1917+ @mock .patch ("vertexai._genai._evals_common.ADK_SessionInput" ) # fmt: skip
1918+ @mock .patch ("vertexai._genai._evals_common.EvaluationGenerator" ) # fmt: skip
1919+ @mock .patch ("vertexai._genai._evals_common.LlmBackedUserSimulator" ) # fmt: skip
1920+ @mock .patch ("vertexai._genai._evals_common.ConversationScenario" ) # fmt: skip
1921+ @mock .patch ("vertexai._genai._evals_common.LlmBackedUserSimulatorConfig" ) # fmt: skip
1922+ @pytest .mark .asyncio
1923+ async def test_run_adk_user_simulation_with_intermediate_events (
1924+ self ,
1925+ mock_config ,
1926+ mock_scenario ,
1927+ mock_simulator ,
1928+ mock_generator ,
1929+ mock_session_input ,
1930+ ):
1931+ """Tests that intermediate invocation events (e.g. tool calls) are parsed successfully."""
1932+ row = pd .Series (
1933+ {
1934+ "starting_prompt" : "I want a laptop." ,
1935+ "conversation_plan" : "Ask for a laptop" ,
1936+ "session_inputs" : json .dumps ({"user_id" : "u1" }),
1937+ }
1938+ )
1939+ mock_agent = mock .Mock ()
1940+
1941+ mock_invocation = mock .Mock ()
1942+ mock_invocation .invocation_id = "turn_123"
1943+ mock_invocation .creation_timestamp = 1771811084.88
1944+ mock_invocation .user_content .model_dump .return_value = {
1945+ "parts" : [{"text" : "I want a laptop." }],
1946+ "role" : "user" ,
1947+ }
1948+ mock_event_1 = mock .Mock ()
1949+ mock_event_1 .author = "ecommerce_agent"
1950+ mock_event_1 .content .model_dump .return_value = {
1951+ "parts" : [
1952+ {
1953+ "function_call" : {
1954+ "name" : "search_products" ,
1955+ "args" : {"query" : "laptop" },
1956+ }
1957+ }
1958+ ]
1959+ }
1960+ mock_event_2 = mock .Mock ()
1961+ mock_event_2 .author = "ecommerce_agent"
1962+ mock_event_2 .content .model_dump .return_value = {
1963+ "parts" : [
1964+ {
1965+ "function_response" : {
1966+ "name" : "search_products" ,
1967+ "response" : {"products" : []},
1968+ }
1969+ }
1970+ ]
1971+ }
1972+
1973+ mock_invocation .intermediate_data .invocation_events = [
1974+ mock_event_1 ,
1975+ mock_event_2 ,
1976+ ]
1977+ mock_invocation .final_response .model_dump .return_value = {
1978+ "parts" : [{"text" : "There are no laptops matching your search." }],
1979+ "role" : "model" ,
1980+ }
1981+ mock_generator ._generate_inferences_from_root_agent = mock .AsyncMock (
1982+ return_value = [mock_invocation ]
1983+ )
1984+ turns = await _evals_common ._run_adk_user_simulation (row , mock_agent )
1985+
1986+ assert len (turns ) == 1
1987+ turn = turns [0 ]
1988+ assert turn ["turn_index" ] == 0
1989+ assert turn ["turn_id" ] == "turn_123"
1990+ assert len (turn ["events" ]) == 4
1991+ assert turn ["events" ][0 ]["author" ] == "user"
1992+ assert turn ["events" ][0 ]["content" ]["parts" ][0 ]["text" ] == "I want a laptop."
1993+ assert turn ["events" ][1 ]["author" ] == "ecommerce_agent"
1994+ assert "function_call" in turn ["events" ][1 ]["content" ]["parts" ][0 ]
1995+ assert turn ["events" ][2 ]["author" ] == "ecommerce_agent"
1996+ assert "function_response" in turn ["events" ][2 ]["content" ]["parts" ][0 ]
1997+ assert turn ["events" ][3 ]["author" ] == "agent"
1998+ assert (
1999+ turn ["events" ][3 ]["content" ]["parts" ][0 ]["text" ]
2000+ == "There are no laptops matching your search."
2001+ )
2002+ mock_invocation .user_content .model_dump .assert_called_with (mode = "json" )
2003+ mock_event_1 .content .model_dump .assert_called_with (mode = "json" )
2004+ mock_invocation .final_response .model_dump .assert_called_with (mode = "json" )
2005+
18922006 @mock .patch .object (_evals_common , "_run_agent" )
18932007 def test_run_agent_internal_malformed_event (self , mock_run_agent ):
18942008 mock_run_agent .return_value = [
@@ -1916,6 +2030,28 @@ def test_run_agent_internal_malformed_event(self, mock_run_agent):
19162030 assert not result_df ["intermediate_events" ][0 ]
19172031
19182032
2033+ class TestIsMultiTurnAgentRun :
2034+ """Unit tests for the _is_multi_turn_agent_run function."""
2035+
2036+ def test_is_multi_turn_agent_run_with_config (self ):
2037+ config = vertexai_genai_types .evals .UserSimulatorConfig (model_name = "gemini-pro" )
2038+ assert _evals_common ._is_multi_turn_agent_run (
2039+ user_simulator_config = config , prompt_dataset = pd .DataFrame ()
2040+ )
2041+
2042+ def test_is_multi_turn_agent_run_with_conversation_plan (self ):
2043+ prompt_dataset = pd .DataFrame ({"conversation_plan" : ["plan" ]})
2044+ assert _evals_common ._is_multi_turn_agent_run (
2045+ user_simulator_config = None , prompt_dataset = prompt_dataset
2046+ )
2047+
2048+ def test_is_multi_turn_agent_run_false (self ):
2049+ prompt_dataset = pd .DataFrame ({"prompt" : ["prompt" ]})
2050+ assert not _evals_common ._is_multi_turn_agent_run (
2051+ user_simulator_config = None , prompt_dataset = prompt_dataset
2052+ )
2053+
2054+
19192055class TestMetricPromptBuilder :
19202056 """Unit tests for the MetricPromptBuilder class."""
19212057
@@ -4229,6 +4365,81 @@ def test_tool_use_quality_metric_no_tool_call_logs_warning(
42294365 )
42304366
42314367
4368+ @pytest .mark .usefixtures ("google_auth_mock" )
4369+ class TestRunAdkUserSimulation :
4370+ """Unit tests for the _run_adk_user_simulation function."""
4371+
4372+ @mock .patch ("vertexai._genai._evals_common.ADK_SessionInput" ) # fmt: skip
4373+ @mock .patch ("vertexai._genai._evals_common.EvaluationGenerator" ) # fmt: skip
4374+ @mock .patch ("vertexai._genai._evals_common.LlmBackedUserSimulator" ) # fmt: skip
4375+ @mock .patch ("vertexai._genai._evals_common.ConversationScenario" ) # fmt: skip
4376+ @mock .patch ("vertexai._genai._evals_common.LlmBackedUserSimulatorConfig" ) # fmt: skip
4377+ @pytest .mark .asyncio
4378+ async def test_run_adk_user_simulation_success (
4379+ self ,
4380+ mock_config_cls ,
4381+ mock_scenario_cls ,
4382+ mock_simulator_cls ,
4383+ mock_generator_cls ,
4384+ mock_session_input_cls ,
4385+ ):
4386+ row = pd .Series (
4387+ {
4388+ "starting_prompt" : "start" ,
4389+ "conversation_plan" : "plan" ,
4390+ "session_inputs" : json .dumps ({"user_id" : "u1" }),
4391+ }
4392+ )
4393+ mock_agent = mock .Mock ()
4394+ mock_invocation = mock .Mock ()
4395+ mock_invocation .user_content .model_dump .return_value = {"text" : "user msg" }
4396+ mock_invocation .final_response .model_dump .return_value = {"text" : "agent msg" }
4397+ mock_invocation .intermediate_data = None
4398+ mock_invocation .creation_timestamp = 12345
4399+ mock_invocation .invocation_id = "turn1"
4400+
4401+ mock_generator_cls ._generate_inferences_from_root_agent = mock .AsyncMock (
4402+ return_value = [mock_invocation ]
4403+ )
4404+
4405+ turns = await _evals_common ._run_adk_user_simulation (row , mock_agent )
4406+
4407+ assert len (turns ) == 1
4408+ turn = turns [0 ]
4409+ assert turn ["turn_index" ] == 0
4410+ assert turn ["turn_id" ] == "turn1"
4411+ assert len (turn ["events" ]) == 2
4412+ assert turn ["events" ][0 ]["author" ] == "user"
4413+ assert turn ["events" ][0 ]["content" ] == {"text" : "user msg" }
4414+ assert turn ["events" ][1 ]["author" ] == "agent"
4415+ assert turn ["events" ][1 ]["content" ] == {"text" : "agent msg" }
4416+
4417+ mock_scenario_cls .assert_called_once_with (
4418+ starting_prompt = "start" , conversation_plan = "plan"
4419+ )
4420+ mock_session_input_cls .assert_called_once ()
4421+
4422+ @mock .patch ("vertexai._genai._evals_common.ADK_SessionInput" ) # fmt: skip
4423+ @mock .patch ("vertexai._genai._evals_common.EvaluationGenerator" ) # fmt: skip
4424+ @mock .patch ("vertexai._genai._evals_common.LlmBackedUserSimulator" ) # fmt: skip
4425+ @mock .patch ("vertexai._genai._evals_common.ConversationScenario" ) # fmt: skip
4426+ @mock .patch ("vertexai._genai._evals_common.LlmBackedUserSimulatorConfig" ) # fmt: skip
4427+ @pytest .mark .asyncio
4428+ async def test_run_adk_user_simulation_missing_columns (
4429+ self ,
4430+ mock_config_cls ,
4431+ mock_scenario_cls ,
4432+ mock_simulator_cls ,
4433+ mock_generator_cls ,
4434+ mock_session_input_cls ,
4435+ ):
4436+ row = pd .Series ({"conversation_plan" : "plan" })
4437+ mock_agent = mock .Mock ()
4438+
4439+ with pytest .raises (ValueError , match = "User simulation requires" ):
4440+ await _evals_common ._run_adk_user_simulation (row , mock_agent )
4441+
4442+
42324443@pytest .mark .usefixtures ("google_auth_mock" )
42334444class TestLLMMetricHandlerPayload :
42344445 def setup_method (self ):
0 commit comments