1515from __future__ import annotations
1616
1717from google .adk .evaluation .eval_case import Invocation
18+ from google .adk .evaluation .eval_case import InvocationEvent
19+ from google .adk .evaluation .eval_case import InvocationEvents
1820from google .adk .evaluation .eval_metrics import BaseCriterion
1921from google .adk .evaluation .eval_metrics import EvalMetric
2022from google .adk .evaluation .eval_metrics import EvalStatus
@@ -127,13 +129,18 @@ def create_test_template() -> str:
127129
128130def _create_test_evaluator_gemini (
129131 threshold : float ,
132+ * ,
133+ include_intermediate_responses_in_final : bool = False ,
130134) -> FinalResponseMatchV2Evaluator :
131135 evaluator = FinalResponseMatchV2Evaluator (
132136 EvalMetric (
133137 metric_name = "final_response_match_v2" ,
134138 threshold = threshold ,
135139 criterion = BaseCriterion (
136140 threshold = 0.5 ,
141+ include_intermediate_responses_in_final = (
142+ include_intermediate_responses_in_final
143+ ),
137144 ),
138145 ),
139146 )
@@ -168,6 +175,21 @@ def _create_test_invocations(
168175 return actual_invocation , expected_invocation
169176
170177
178+ def _add_intermediate_text (invocation : Invocation , text : str ) -> Invocation :
179+ invocation .intermediate_data = InvocationEvents (
180+ invocation_events = [
181+ InvocationEvent (
182+ author = "agent" ,
183+ content = genai_types .Content (
184+ parts = [genai_types .Part (text = text )],
185+ role = "model" ,
186+ ),
187+ ),
188+ ]
189+ )
190+ return invocation
191+
192+
171193def test_format_auto_rater_prompt ():
172194 evaluator = _create_test_evaluator_gemini (threshold = 0.8 )
173195 actual_invocation , expected_invocation = _create_test_invocations (
@@ -193,6 +215,59 @@ def test_format_auto_rater_prompt():
193215"""
194216
195217
218+ def test_format_auto_rater_prompt_uses_empty_text_for_missing_final_response ():
219+ evaluator = _create_test_evaluator_gemini (threshold = 0.8 )
220+ actual_invocation , expected_invocation = _create_test_invocations (
221+ "candidate text" , "reference text"
222+ )
223+ actual_invocation .final_response = None
224+ expected_invocation .final_response = None
225+
226+ prompt = evaluator .format_auto_rater_prompt (
227+ actual_invocation , expected_invocation
228+ )
229+
230+ assert "None" not in prompt
231+ assert '"Agent response": ,' in prompt
232+ assert '"Reference response": ,' in prompt
233+
234+
235+ def test_format_auto_rater_prompt_ignores_intermediate_by_default ():
236+ evaluator = _create_test_evaluator_gemini (threshold = 0.8 )
237+ actual_invocation , expected_invocation = _create_test_invocations (
238+ "candidate final" , "reference final"
239+ )
240+ _add_intermediate_text (actual_invocation , "candidate intro" )
241+ _add_intermediate_text (expected_invocation , "reference intro" )
242+
243+ prompt = evaluator .format_auto_rater_prompt (
244+ actual_invocation , expected_invocation
245+ )
246+
247+ assert "candidate final" in prompt
248+ assert "reference final" in prompt
249+ assert "candidate intro" not in prompt
250+ assert "reference intro" not in prompt
251+
252+
253+ def test_format_auto_rater_prompt_includes_intermediate_when_enabled ():
254+ evaluator = _create_test_evaluator_gemini (
255+ threshold = 0.8 , include_intermediate_responses_in_final = True
256+ )
257+ actual_invocation , expected_invocation = _create_test_invocations (
258+ "candidate final" , "reference final"
259+ )
260+ _add_intermediate_text (actual_invocation , "candidate intro" )
261+ _add_intermediate_text (expected_invocation , "reference intro" )
262+
263+ prompt = evaluator .format_auto_rater_prompt (
264+ actual_invocation , expected_invocation
265+ )
266+
267+ assert "candidate intro\n candidate final" in prompt
268+ assert "reference intro\n reference final" in prompt
269+
270+
196271def test_convert_auto_rater_response_to_score_valid ():
197272 evaluator = _create_test_evaluator_gemini (threshold = 0.8 )
198273 auto_rater_response = """```json
0 commit comments