Skip to content

Commit 9ea4aa6

Browse files
jsondaicopybara-github
authored andcommitted
feat: GenAI Client(evals) - Support N+1 Agent Engine inference via agent_data in run_inference()
PiperOrigin-RevId: 909006778
1 parent 5788c50 commit 9ea4aa6

2 files changed

Lines changed: 378 additions & 136 deletions

File tree

tests/unit/vertexai/genai/replays/test_run_inference.py

Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,211 @@ def test_inference_with_eval_cases_multi_turn_agent_data(client):
9595
assert "agent_data" in inference_result.eval_dataset_df.columns
9696

9797

98+
def test_inference_with_eval_cases_agent_engine_agent_data(client):
99+
"""Tests N+1 inference with agent_data via remote Agent Engine."""
100+
agent_engine = client.agent_engines.get(
101+
name="projects/977012026409/locations/us-central1"
102+
"/reasoningEngines/7188347537655332864"
103+
)
104+
105+
eval_case = types.EvalCase(
106+
agent_data=types.evals.AgentData(
107+
turns=[
108+
types.evals.ConversationTurn(
109+
turn_index=0,
110+
events=[
111+
types.evals.AgentEvent(
112+
author="user",
113+
content=genai_types.Content(
114+
role="user",
115+
parts=[genai_types.Part(text="My name is Bob.")],
116+
),
117+
),
118+
types.evals.AgentEvent(
119+
author="model",
120+
content=genai_types.Content(
121+
role="model",
122+
parts=[
123+
genai_types.Part(text="Hi Bob! Nice to meet you.")
124+
],
125+
),
126+
),
127+
],
128+
),
129+
types.evals.ConversationTurn(
130+
turn_index=1,
131+
events=[
132+
types.evals.AgentEvent(
133+
author="user",
134+
content=genai_types.Content(
135+
role="user",
136+
parts=[genai_types.Part(text="What is my name?")],
137+
),
138+
),
139+
],
140+
),
141+
],
142+
),
143+
)
144+
eval_dataset = types.EvaluationDataset(eval_cases=[eval_case])
145+
146+
inference_result = client.evals.run_inference(
147+
agent=agent_engine,
148+
src=eval_dataset,
149+
)
150+
assert isinstance(inference_result, types.EvaluationDataset)
151+
assert inference_result.eval_dataset_df is not None
152+
assert "agent_data" in inference_result.eval_dataset_df.columns
153+
154+
155+
def test_inference_with_prompt_column_local_agent(client):
156+
"""Tests run_inference with a prompt column and a local ADK agent.
157+
158+
Verifies the existing prompt-based inference path: a DataFrame with
159+
a 'prompt' column is passed alongside a local LlmAgent. The agent
160+
should respond to the prompt normally.
161+
"""
162+
import pandas as pd
163+
164+
agent = LlmAgent(
165+
name="prompt_agent",
166+
model="gemini-2.5-flash",
167+
instruction="You are a helpful assistant. Answer questions concisely.",
168+
)
169+
170+
prompt_df = pd.DataFrame(
171+
{
172+
"prompt": ["What is the capital of France?"],
173+
}
174+
)
175+
eval_dataset = types.EvaluationDataset(eval_dataset_df=prompt_df)
176+
177+
inference_result = client.evals.run_inference(
178+
agent=agent,
179+
src=eval_dataset,
180+
)
181+
assert isinstance(inference_result, types.EvaluationDataset)
182+
result_df = inference_result.eval_dataset_df
183+
assert result_df is not None
184+
assert "response" in result_df.columns
185+
# The response should be a non-empty string (actual model answer).
186+
response_val = result_df["response"].iloc[0]
187+
assert response_val is not None
188+
assert isinstance(response_val, str)
189+
assert len(response_val) > 0
190+
191+
192+
def test_inference_with_completed_and_incomplete_agent_data(client):
193+
"""Tests run_inference with a mix of completed and N+1 agent traces.
194+
195+
Provides two eval_cases:
196+
- Row 0: completed trace (last event from agent) — BYOD, no inference.
197+
- Row 1: incomplete trace (last event from user) — N+1 inference.
198+
199+
The completed row should return the existing agent response without
200+
making any API calls. The N+1 row should run inference normally.
201+
"""
202+
agent = LlmAgent(
203+
name="mixed_agent",
204+
model="gemini-2.5-flash",
205+
instruction="You are a helpful assistant. Answer questions concisely.",
206+
)
207+
208+
# Row 0: Completed trace — last event is from the agent.
209+
completed_case = types.EvalCase(
210+
agent_data=types.evals.AgentData(
211+
turns=[
212+
types.evals.ConversationTurn(
213+
turn_index=0,
214+
events=[
215+
types.evals.AgentEvent(
216+
author="user",
217+
content=genai_types.Content(
218+
role="user",
219+
parts=[genai_types.Part(text="What color is the sky?")],
220+
),
221+
),
222+
types.evals.AgentEvent(
223+
author="mixed_agent",
224+
content=genai_types.Content(
225+
role="model",
226+
parts=[genai_types.Part(text="The sky is blue.")],
227+
),
228+
),
229+
],
230+
),
231+
],
232+
),
233+
)
234+
235+
# Row 1: N+1 trace — last event is from the user.
236+
n_plus_1_case = types.EvalCase(
237+
agent_data=types.evals.AgentData(
238+
turns=[
239+
types.evals.ConversationTurn(
240+
turn_index=0,
241+
events=[
242+
types.evals.AgentEvent(
243+
author="user",
244+
content=genai_types.Content(
245+
role="user",
246+
parts=[
247+
genai_types.Part(text="My favorite number is 7.")
248+
],
249+
),
250+
),
251+
types.evals.AgentEvent(
252+
author="mixed_agent",
253+
content=genai_types.Content(
254+
role="model",
255+
parts=[genai_types.Part(text="Got it, 7!")],
256+
),
257+
),
258+
],
259+
),
260+
types.evals.ConversationTurn(
261+
turn_index=1,
262+
events=[
263+
types.evals.AgentEvent(
264+
author="user",
265+
content=genai_types.Content(
266+
role="user",
267+
parts=[
268+
genai_types.Part(text="What is my favorite number?")
269+
],
270+
),
271+
),
272+
],
273+
),
274+
],
275+
),
276+
)
277+
278+
eval_dataset = types.EvaluationDataset(eval_cases=[completed_case, n_plus_1_case])
279+
280+
inference_result = client.evals.run_inference(
281+
agent=agent,
282+
src=eval_dataset,
283+
)
284+
assert isinstance(inference_result, types.EvaluationDataset)
285+
result_df = inference_result.eval_dataset_df
286+
assert result_df is not None
287+
assert len(result_df) == 2
288+
289+
# Row 0 (completed trace): response should contain the existing
290+
# agent answer — "The sky is blue."
291+
row0_response = result_df["response"].iloc[0]
292+
assert row0_response is not None
293+
assert "blue" in row0_response.lower()
294+
295+
# Row 1 (N+1 inference): response should be a non-empty string
296+
# from the model (actual inference).
297+
row1_response = result_df["response"].iloc[1]
298+
assert row1_response is not None
299+
assert isinstance(row1_response, str)
300+
assert len(row1_response) > 0
301+
302+
98303
pytestmark = pytest_helper.setup(
99304
file=__file__,
100305
globals_for_file=globals(),

0 commit comments

Comments
 (0)