@@ -224,6 +224,7 @@ def test_multi_turn_predefined_metric(client):
224224
225225 predefined_metrics = [
226226 types .RubricMetric .MULTI_TURN_GENERAL_QUALITY ,
227+ types .RubricMetric .MULTI_TURN_TEXT_QUALITY ,
227228 ]
228229
229230 evaluation_result = client .evals .evaluate (
@@ -233,11 +234,16 @@ def test_multi_turn_predefined_metric(client):
233234
234235 assert isinstance (evaluation_result , types .EvaluationResult )
235236 assert evaluation_result .summary_metrics is not None
236- assert len (evaluation_result .summary_metrics ) > 0
237+ assert len (evaluation_result .summary_metrics ) == 2
238+ metric_names = set ()
237239 for summary in evaluation_result .summary_metrics :
238240 assert isinstance (summary , types .AggregatedMetricResult )
239- assert summary .metric_name == "multi_turn_general_quality_v1"
241+ metric_names . add ( summary .metric_name )
240242 assert isinstance (summary .mean_score , float )
243+ assert metric_names == {
244+ "multi_turn_general_quality_v1" ,
245+ "multi_turn_text_quality_v1" ,
246+ }
241247
242248 assert evaluation_result .eval_case_results is not None
243249 assert len (evaluation_result .eval_case_results ) > 0
@@ -415,6 +421,61 @@ def test_evaluation_gecko_text2video_metric(client):
415421 assert case_result .response_candidate_results is not None
416422
417423
424+ def test_single_turn_rubric_metrics (client ):
425+ """Tests single-turn text quality RubricMetrics with reference."""
426+ prompts_df = pd .DataFrame (
427+ {
428+ "prompt" : ["Summarize the benefits of regular exercise." ],
429+ "response" : [
430+ "Exercise improves cardiovascular health, boosts mood through"
431+ " endorphin release, strengthens muscles and bones, and enhances"
432+ " sleep quality. Regular physical activity also helps maintain a"
433+ " healthy weight and reduces the risk of chronic diseases."
434+ ],
435+ "reference" : [
436+ "Exercise improves heart health, mood, muscle strength," " and sleep."
437+ ],
438+ "context" : [
439+ "Exercise improves heart health, mood, muscle strength," " and sleep."
440+ ],
441+ }
442+ )
443+
444+ eval_dataset = types .EvaluationDataset (
445+ eval_dataset_df = prompts_df ,
446+ candidate_name = "gemini-2.5-flash" ,
447+ )
448+
449+ predefined_metrics = [
450+ types .RubricMetric .INSTRUCTION_FOLLOWING ,
451+ types .RubricMetric .GENERAL_QUALITY ,
452+ types .RubricMetric .TEXT_QUALITY ,
453+ types .RubricMetric .GROUNDING ,
454+ types .RubricMetric .SAFETY ,
455+ types .RubricMetric .FINAL_RESPONSE_MATCH ,
456+ types .RubricMetric .FINAL_RESPONSE_REFERENCE_FREE ,
457+ ]
458+
459+ evaluation_result = client .evals .evaluate (
460+ dataset = eval_dataset ,
461+ metrics = predefined_metrics ,
462+ )
463+
464+ assert isinstance (evaluation_result , types .EvaluationResult )
465+ assert evaluation_result .summary_metrics is not None
466+ assert len (evaluation_result .summary_metrics ) > 0
467+ for summary in evaluation_result .summary_metrics :
468+ assert isinstance (summary , types .AggregatedMetricResult )
469+ assert summary .metric_name is not None
470+
471+ assert evaluation_result .eval_case_results is not None
472+ assert len (evaluation_result .eval_case_results ) > 0
473+ for case_result in evaluation_result .eval_case_results :
474+ assert isinstance (case_result , types .EvalCaseResult )
475+ assert case_result .eval_case_index is not None
476+ assert case_result .response_candidate_results is not None
477+
478+
418479pytestmark = pytest_helper .setup (
419480 file = __file__ ,
420481 globals_for_file = globals (),
0 commit comments