Skip to content

Commit 429a182

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: GenAI Client(evals): Add local ADK agent multi-turn agent scraping via ADK user simulation library
PiperOrigin-RevId: 877565225
1 parent 7d58adb commit 429a182

File tree

4 files changed

+460
-54
lines changed

4 files changed

+460
-54
lines changed

tests/unit/vertexai/genai/test_evals.py

Lines changed: 212 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1886,9 +1886,123 @@ def test_run_agent_internal_error_response(self, mock_run_agent):
18861886

18871887
assert "response" in result_df.columns
18881888
response_content = result_df["response"][0]
1889-
assert "Unexpected response type from agent run" in response_content
1889+
assert "agent run failed" in response_content
18901890
assert not result_df["intermediate_events"][0]
18911891

1892+
@mock.patch.object(_evals_common, "_run_agent")
1893+
def test_run_agent_internal_multi_turn_success(self, mock_run_agent):
1894+
mock_run_agent.return_value = [
1895+
[
1896+
{"turn_index": 0, "turn_id": "t1", "events": []},
1897+
{"turn_index": 1, "turn_id": "t2", "events": []},
1898+
]
1899+
]
1900+
prompt_dataset = pd.DataFrame({"prompt": ["p1"], "conversation_plan": ["plan"]})
1901+
mock_agent_engine = mock.Mock()
1902+
mock_api_client = mock.Mock()
1903+
result_df = _evals_common._run_agent_internal(
1904+
api_client=mock_api_client,
1905+
agent_engine=mock_agent_engine,
1906+
agent=None,
1907+
prompt_dataset=prompt_dataset,
1908+
)
1909+
1910+
assert "agent_data" in result_df.columns
1911+
agent_data = result_df["agent_data"][0]
1912+
assert agent_data["turns"] == [
1913+
{"turn_index": 0, "turn_id": "t1", "events": []},
1914+
{"turn_index": 1, "turn_id": "t2", "events": []},
1915+
]
1916+
1917+
@mock.patch("vertexai._genai._evals_common.ADK_SessionInput") # fmt: skip
1918+
@mock.patch("vertexai._genai._evals_common.EvaluationGenerator") # fmt: skip
1919+
@mock.patch("vertexai._genai._evals_common.LlmBackedUserSimulator") # fmt: skip
1920+
@mock.patch("vertexai._genai._evals_common.ConversationScenario") # fmt: skip
1921+
@mock.patch("vertexai._genai._evals_common.LlmBackedUserSimulatorConfig") # fmt: skip
1922+
@pytest.mark.asyncio
1923+
async def test_run_adk_user_simulation_with_intermediate_events(
1924+
self,
1925+
mock_config,
1926+
mock_scenario,
1927+
mock_simulator,
1928+
mock_generator,
1929+
mock_session_input,
1930+
):
1931+
"""Tests that intermediate invocation events (e.g. tool calls) are parsed successfully."""
1932+
row = pd.Series(
1933+
{
1934+
"starting_prompt": "I want a laptop.",
1935+
"conversation_plan": "Ask for a laptop",
1936+
"session_inputs": json.dumps({"user_id": "u1"}),
1937+
}
1938+
)
1939+
mock_agent = mock.Mock()
1940+
1941+
mock_invocation = mock.Mock()
1942+
mock_invocation.invocation_id = "turn_123"
1943+
mock_invocation.creation_timestamp = 1771811084.88
1944+
mock_invocation.user_content.model_dump.return_value = {
1945+
"parts": [{"text": "I want a laptop."}],
1946+
"role": "user",
1947+
}
1948+
mock_event_1 = mock.Mock()
1949+
mock_event_1.author = "ecommerce_agent"
1950+
mock_event_1.content.model_dump.return_value = {
1951+
"parts": [
1952+
{
1953+
"function_call": {
1954+
"name": "search_products",
1955+
"args": {"query": "laptop"},
1956+
}
1957+
}
1958+
]
1959+
}
1960+
mock_event_2 = mock.Mock()
1961+
mock_event_2.author = "ecommerce_agent"
1962+
mock_event_2.content.model_dump.return_value = {
1963+
"parts": [
1964+
{
1965+
"function_response": {
1966+
"name": "search_products",
1967+
"response": {"products": []},
1968+
}
1969+
}
1970+
]
1971+
}
1972+
1973+
mock_invocation.intermediate_data.invocation_events = [
1974+
mock_event_1,
1975+
mock_event_2,
1976+
]
1977+
mock_invocation.final_response.model_dump.return_value = {
1978+
"parts": [{"text": "There are no laptops matching your search."}],
1979+
"role": "model",
1980+
}
1981+
mock_generator._generate_inferences_from_root_agent = mock.AsyncMock(
1982+
return_value=[mock_invocation]
1983+
)
1984+
turns = await _evals_common._run_adk_user_simulation(row, mock_agent)
1985+
1986+
assert len(turns) == 1
1987+
turn = turns[0]
1988+
assert turn["turn_index"] == 0
1989+
assert turn["turn_id"] == "turn_123"
1990+
assert len(turn["events"]) == 4
1991+
assert turn["events"][0]["author"] == "user"
1992+
assert turn["events"][0]["content"]["parts"][0]["text"] == "I want a laptop."
1993+
assert turn["events"][1]["author"] == "ecommerce_agent"
1994+
assert "function_call" in turn["events"][1]["content"]["parts"][0]
1995+
assert turn["events"][2]["author"] == "ecommerce_agent"
1996+
assert "function_response" in turn["events"][2]["content"]["parts"][0]
1997+
assert turn["events"][3]["author"] == "agent"
1998+
assert (
1999+
turn["events"][3]["content"]["parts"][0]["text"]
2000+
== "There are no laptops matching your search."
2001+
)
2002+
mock_invocation.user_content.model_dump.assert_called_with(mode="json")
2003+
mock_event_1.content.model_dump.assert_called_with(mode="json")
2004+
mock_invocation.final_response.model_dump.assert_called_with(mode="json")
2005+
18922006
@mock.patch.object(_evals_common, "_run_agent")
18932007
def test_run_agent_internal_malformed_event(self, mock_run_agent):
18942008
mock_run_agent.return_value = [
@@ -1916,6 +2030,28 @@ def test_run_agent_internal_malformed_event(self, mock_run_agent):
19162030
assert not result_df["intermediate_events"][0]
19172031

19182032

2033+
class TestIsMultiTurnAgentRun:
2034+
"""Unit tests for the _is_multi_turn_agent_run function."""
2035+
2036+
def test_is_multi_turn_agent_run_with_config(self):
2037+
config = vertexai_genai_types.evals.UserSimulatorConfig(model_name="gemini-pro")
2038+
assert _evals_common._is_multi_turn_agent_run(
2039+
user_simulator_config=config, prompt_dataset=pd.DataFrame()
2040+
)
2041+
2042+
def test_is_multi_turn_agent_run_with_conversation_plan(self):
2043+
prompt_dataset = pd.DataFrame({"conversation_plan": ["plan"]})
2044+
assert _evals_common._is_multi_turn_agent_run(
2045+
user_simulator_config=None, prompt_dataset=prompt_dataset
2046+
)
2047+
2048+
def test_is_multi_turn_agent_run_false(self):
2049+
prompt_dataset = pd.DataFrame({"prompt": ["prompt"]})
2050+
assert not _evals_common._is_multi_turn_agent_run(
2051+
user_simulator_config=None, prompt_dataset=prompt_dataset
2052+
)
2053+
2054+
19192055
class TestMetricPromptBuilder:
19202056
"""Unit tests for the MetricPromptBuilder class."""
19212057

@@ -4229,6 +4365,81 @@ def test_tool_use_quality_metric_no_tool_call_logs_warning(
42294365
)
42304366

42314367

4368+
@pytest.mark.usefixtures("google_auth_mock")
4369+
class TestRunAdkUserSimulation:
4370+
"""Unit tests for the _run_adk_user_simulation function."""
4371+
4372+
@mock.patch("vertexai._genai._evals_common.ADK_SessionInput") # fmt: skip
4373+
@mock.patch("vertexai._genai._evals_common.EvaluationGenerator") # fmt: skip
4374+
@mock.patch("vertexai._genai._evals_common.LlmBackedUserSimulator") # fmt: skip
4375+
@mock.patch("vertexai._genai._evals_common.ConversationScenario") # fmt: skip
4376+
@mock.patch("vertexai._genai._evals_common.LlmBackedUserSimulatorConfig") # fmt: skip
4377+
@pytest.mark.asyncio
4378+
async def test_run_adk_user_simulation_success(
4379+
self,
4380+
mock_config_cls,
4381+
mock_scenario_cls,
4382+
mock_simulator_cls,
4383+
mock_generator_cls,
4384+
mock_session_input_cls,
4385+
):
4386+
row = pd.Series(
4387+
{
4388+
"starting_prompt": "start",
4389+
"conversation_plan": "plan",
4390+
"session_inputs": json.dumps({"user_id": "u1"}),
4391+
}
4392+
)
4393+
mock_agent = mock.Mock()
4394+
mock_invocation = mock.Mock()
4395+
mock_invocation.user_content.model_dump.return_value = {"text": "user msg"}
4396+
mock_invocation.final_response.model_dump.return_value = {"text": "agent msg"}
4397+
mock_invocation.intermediate_data = None
4398+
mock_invocation.creation_timestamp = 12345
4399+
mock_invocation.invocation_id = "turn1"
4400+
4401+
mock_generator_cls._generate_inferences_from_root_agent = mock.AsyncMock(
4402+
return_value=[mock_invocation]
4403+
)
4404+
4405+
turns = await _evals_common._run_adk_user_simulation(row, mock_agent)
4406+
4407+
assert len(turns) == 1
4408+
turn = turns[0]
4409+
assert turn["turn_index"] == 0
4410+
assert turn["turn_id"] == "turn1"
4411+
assert len(turn["events"]) == 2
4412+
assert turn["events"][0]["author"] == "user"
4413+
assert turn["events"][0]["content"] == {"text": "user msg"}
4414+
assert turn["events"][1]["author"] == "agent"
4415+
assert turn["events"][1]["content"] == {"text": "agent msg"}
4416+
4417+
mock_scenario_cls.assert_called_once_with(
4418+
starting_prompt="start", conversation_plan="plan"
4419+
)
4420+
mock_session_input_cls.assert_called_once()
4421+
4422+
@mock.patch("vertexai._genai._evals_common.ADK_SessionInput") # fmt: skip
4423+
@mock.patch("vertexai._genai._evals_common.EvaluationGenerator") # fmt: skip
4424+
@mock.patch("vertexai._genai._evals_common.LlmBackedUserSimulator") # fmt: skip
4425+
@mock.patch("vertexai._genai._evals_common.ConversationScenario") # fmt: skip
4426+
@mock.patch("vertexai._genai._evals_common.LlmBackedUserSimulatorConfig") # fmt: skip
4427+
@pytest.mark.asyncio
4428+
async def test_run_adk_user_simulation_missing_columns(
4429+
self,
4430+
mock_config_cls,
4431+
mock_scenario_cls,
4432+
mock_simulator_cls,
4433+
mock_generator_cls,
4434+
mock_session_input_cls,
4435+
):
4436+
row = pd.Series({"conversation_plan": "plan"})
4437+
mock_agent = mock.Mock()
4438+
4439+
with pytest.raises(ValueError, match="User simulation requires"):
4440+
await _evals_common._run_adk_user_simulation(row, mock_agent)
4441+
4442+
42324443
@pytest.mark.usefixtures("google_auth_mock")
42334444
class TestLLMMetricHandlerPayload:
42344445
def setup_method(self):

0 commit comments

Comments
 (0)