@@ -95,6 +95,211 @@ def test_inference_with_eval_cases_multi_turn_agent_data(client):
9595 assert "agent_data" in inference_result .eval_dataset_df .columns
9696
9797
98+ def test_inference_with_eval_cases_agent_engine_agent_data (client ):
99+ """Tests N+1 inference with agent_data via remote Agent Engine."""
100+ agent_engine = client .agent_engines .get (
101+ name = "projects/977012026409/locations/us-central1"
102+ "/reasoningEngines/7188347537655332864"
103+ )
104+
105+ eval_case = types .EvalCase (
106+ agent_data = types .evals .AgentData (
107+ turns = [
108+ types .evals .ConversationTurn (
109+ turn_index = 0 ,
110+ events = [
111+ types .evals .AgentEvent (
112+ author = "user" ,
113+ content = genai_types .Content (
114+ role = "user" ,
115+ parts = [genai_types .Part (text = "My name is Bob." )],
116+ ),
117+ ),
118+ types .evals .AgentEvent (
119+ author = "model" ,
120+ content = genai_types .Content (
121+ role = "model" ,
122+ parts = [
123+ genai_types .Part (text = "Hi Bob! Nice to meet you." )
124+ ],
125+ ),
126+ ),
127+ ],
128+ ),
129+ types .evals .ConversationTurn (
130+ turn_index = 1 ,
131+ events = [
132+ types .evals .AgentEvent (
133+ author = "user" ,
134+ content = genai_types .Content (
135+ role = "user" ,
136+ parts = [genai_types .Part (text = "What is my name?" )],
137+ ),
138+ ),
139+ ],
140+ ),
141+ ],
142+ ),
143+ )
144+ eval_dataset = types .EvaluationDataset (eval_cases = [eval_case ])
145+
146+ inference_result = client .evals .run_inference (
147+ agent = agent_engine ,
148+ src = eval_dataset ,
149+ )
150+ assert isinstance (inference_result , types .EvaluationDataset )
151+ assert inference_result .eval_dataset_df is not None
152+ assert "agent_data" in inference_result .eval_dataset_df .columns
153+
154+
155+ def test_inference_with_prompt_column_local_agent (client ):
156+ """Tests run_inference with a prompt column and a local ADK agent.
157+
158+ Verifies the existing prompt-based inference path: a DataFrame with
159+ a 'prompt' column is passed alongside a local LlmAgent. The agent
160+ should respond to the prompt normally.
161+ """
162+ import pandas as pd
163+
164+ agent = LlmAgent (
165+ name = "prompt_agent" ,
166+ model = "gemini-2.5-flash" ,
167+ instruction = "You are a helpful assistant. Answer questions concisely." ,
168+ )
169+
170+ prompt_df = pd .DataFrame (
171+ {
172+ "prompt" : ["What is the capital of France?" ],
173+ }
174+ )
175+ eval_dataset = types .EvaluationDataset (eval_dataset_df = prompt_df )
176+
177+ inference_result = client .evals .run_inference (
178+ agent = agent ,
179+ src = eval_dataset ,
180+ )
181+ assert isinstance (inference_result , types .EvaluationDataset )
182+ result_df = inference_result .eval_dataset_df
183+ assert result_df is not None
184+ assert "response" in result_df .columns
185+ # The response should be a non-empty string (actual model answer).
186+ response_val = result_df ["response" ].iloc [0 ]
187+ assert response_val is not None
188+ assert isinstance (response_val , str )
189+ assert len (response_val ) > 0
190+
191+
192+ def test_inference_with_completed_and_incomplete_agent_data (client ):
193+ """Tests run_inference with a mix of completed and N+1 agent traces.
194+
195+ Provides two eval_cases:
196+ - Row 0: completed trace (last event from agent) — BYOD, no inference.
197+ - Row 1: incomplete trace (last event from user) — N+1 inference.
198+
199+ The completed row should return the existing agent response without
200+ making any API calls. The N+1 row should run inference normally.
201+ """
202+ agent = LlmAgent (
203+ name = "mixed_agent" ,
204+ model = "gemini-2.5-flash" ,
205+ instruction = "You are a helpful assistant. Answer questions concisely." ,
206+ )
207+
208+ # Row 0: Completed trace — last event is from the agent.
209+ completed_case = types .EvalCase (
210+ agent_data = types .evals .AgentData (
211+ turns = [
212+ types .evals .ConversationTurn (
213+ turn_index = 0 ,
214+ events = [
215+ types .evals .AgentEvent (
216+ author = "user" ,
217+ content = genai_types .Content (
218+ role = "user" ,
219+ parts = [genai_types .Part (text = "What color is the sky?" )],
220+ ),
221+ ),
222+ types .evals .AgentEvent (
223+ author = "mixed_agent" ,
224+ content = genai_types .Content (
225+ role = "model" ,
226+ parts = [genai_types .Part (text = "The sky is blue." )],
227+ ),
228+ ),
229+ ],
230+ ),
231+ ],
232+ ),
233+ )
234+
235+ # Row 1: N+1 trace — last event is from the user.
236+ n_plus_1_case = types .EvalCase (
237+ agent_data = types .evals .AgentData (
238+ turns = [
239+ types .evals .ConversationTurn (
240+ turn_index = 0 ,
241+ events = [
242+ types .evals .AgentEvent (
243+ author = "user" ,
244+ content = genai_types .Content (
245+ role = "user" ,
246+ parts = [
247+ genai_types .Part (text = "My favorite number is 7." )
248+ ],
249+ ),
250+ ),
251+ types .evals .AgentEvent (
252+ author = "mixed_agent" ,
253+ content = genai_types .Content (
254+ role = "model" ,
255+ parts = [genai_types .Part (text = "Got it, 7!" )],
256+ ),
257+ ),
258+ ],
259+ ),
260+ types .evals .ConversationTurn (
261+ turn_index = 1 ,
262+ events = [
263+ types .evals .AgentEvent (
264+ author = "user" ,
265+ content = genai_types .Content (
266+ role = "user" ,
267+ parts = [
268+ genai_types .Part (text = "What is my favorite number?" )
269+ ],
270+ ),
271+ ),
272+ ],
273+ ),
274+ ],
275+ ),
276+ )
277+
278+ eval_dataset = types .EvaluationDataset (eval_cases = [completed_case , n_plus_1_case ])
279+
280+ inference_result = client .evals .run_inference (
281+ agent = agent ,
282+ src = eval_dataset ,
283+ )
284+ assert isinstance (inference_result , types .EvaluationDataset )
285+ result_df = inference_result .eval_dataset_df
286+ assert result_df is not None
287+ assert len (result_df ) == 2
288+
289+ # Row 0 (completed trace): response should contain the existing
290+ # agent answer — "The sky is blue."
291+ row0_response = result_df ["response" ].iloc [0 ]
292+ assert row0_response is not None
293+ assert "blue" in row0_response .lower ()
294+
295+ # Row 1 (N+1 inference): response should be a non-empty string
296+ # from the model (actual inference).
297+ row1_response = result_df ["response" ].iloc [1 ]
298+ assert row1_response is not None
299+ assert isinstance (row1_response , str )
300+ assert len (row1_response ) > 0
301+
302+
98303pytestmark = pytest_helper .setup (
99304 file = __file__ ,
100305 globals_for_file = globals (),
0 commit comments