1515from __future__ import annotations
1616
1717from google .adk .evaluation .eval_case import Invocation
18- from google .adk .evaluation .eval_case import InvocationEvent
19- from google .adk .evaluation .eval_case import InvocationEvents
2018from google .adk .evaluation .eval_metrics import BaseCriterion
2119from google .adk .evaluation .eval_metrics import EvalMetric
2220from google .adk .evaluation .eval_metrics import EvalStatus
@@ -129,18 +127,13 @@ def create_test_template() -> str:
129127
130128def _create_test_evaluator_gemini (
131129 threshold : float ,
132- * ,
133- include_intermediate_responses_in_final : bool = False ,
134130) -> FinalResponseMatchV2Evaluator :
135131 evaluator = FinalResponseMatchV2Evaluator (
136132 EvalMetric (
137133 metric_name = "final_response_match_v2" ,
138134 threshold = threshold ,
139135 criterion = BaseCriterion (
140136 threshold = 0.5 ,
141- include_intermediate_responses_in_final = (
142- include_intermediate_responses_in_final
143- ),
144137 ),
145138 ),
146139 )
@@ -175,21 +168,6 @@ def _create_test_invocations(
175168 return actual_invocation , expected_invocation
176169
177170
178- def _add_intermediate_text (invocation : Invocation , text : str ) -> Invocation :
179- invocation .intermediate_data = InvocationEvents (
180- invocation_events = [
181- InvocationEvent (
182- author = "agent" ,
183- content = genai_types .Content (
184- parts = [genai_types .Part (text = text )],
185- role = "model" ,
186- ),
187- ),
188- ]
189- )
190- return invocation
191-
192-
193171def test_format_auto_rater_prompt ():
194172 evaluator = _create_test_evaluator_gemini (threshold = 0.8 )
195173 actual_invocation , expected_invocation = _create_test_invocations (
@@ -215,59 +193,6 @@ def test_format_auto_rater_prompt():
215193"""
216194
217195
218- def test_format_auto_rater_prompt_uses_empty_text_for_missing_final_response ():
219- evaluator = _create_test_evaluator_gemini (threshold = 0.8 )
220- actual_invocation , expected_invocation = _create_test_invocations (
221- "candidate text" , "reference text"
222- )
223- actual_invocation .final_response = None
224- expected_invocation .final_response = None
225-
226- prompt = evaluator .format_auto_rater_prompt (
227- actual_invocation , expected_invocation
228- )
229-
230- assert "None" not in prompt
231- assert '"Agent response": ,' in prompt
232- assert '"Reference response": ,' in prompt
233-
234-
235- def test_format_auto_rater_prompt_ignores_intermediate_by_default ():
236- evaluator = _create_test_evaluator_gemini (threshold = 0.8 )
237- actual_invocation , expected_invocation = _create_test_invocations (
238- "candidate final" , "reference final"
239- )
240- _add_intermediate_text (actual_invocation , "candidate intro" )
241- _add_intermediate_text (expected_invocation , "reference intro" )
242-
243- prompt = evaluator .format_auto_rater_prompt (
244- actual_invocation , expected_invocation
245- )
246-
247- assert "candidate final" in prompt
248- assert "reference final" in prompt
249- assert "candidate intro" not in prompt
250- assert "reference intro" not in prompt
251-
252-
253- def test_format_auto_rater_prompt_includes_intermediate_when_enabled ():
254- evaluator = _create_test_evaluator_gemini (
255- threshold = 0.8 , include_intermediate_responses_in_final = True
256- )
257- actual_invocation , expected_invocation = _create_test_invocations (
258- "candidate final" , "reference final"
259- )
260- _add_intermediate_text (actual_invocation , "candidate intro" )
261- _add_intermediate_text (expected_invocation , "reference intro" )
262-
263- prompt = evaluator .format_auto_rater_prompt (
264- actual_invocation , expected_invocation
265- )
266-
267- assert "candidate intro\n candidate final" in prompt
268- assert "reference intro\n reference final" in prompt
269-
270-
271196def test_convert_auto_rater_response_to_score_valid ():
272197 evaluator = _create_test_evaluator_gemini (threshold = 0.8 )
273198 auto_rater_response = """```json
0 commit comments