Skip to content

Commit cae2337

Browse files
wzhang2705GWeale
authored andcommitted
feat: add RubricBasedMultiTurnTrajectoryEvaluator
Change-Id: Ic0464a2cf0080a2abd63543a9bd153fbf41c3e35
1 parent 4baccf6 commit cae2337

6 files changed

Lines changed: 704 additions & 0 deletions

src/google/adk/evaluation/eval_metrics.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,10 @@ class PrebuiltMetrics(Enum):
6767

6868
MULTI_TURN_TOOL_USE_QUALITY_V1 = "multi_turn_tool_use_quality_v1"
6969

70+
RUBRIC_BASED_MULTI_TURN_TRAJECTORY_QUALITY_V1 = (
71+
"rubric_based_multi_turn_trajectory_quality_v1"
72+
)
73+
7074

7175
MetricName: TypeAlias = Union[str, PrebuiltMetrics]
7276
Threshold: TypeAlias = float

src/google/adk/evaluation/metric_evaluator_registry.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
from .metric_info_providers import PerTurnUserSimulatorQualityV1MetricInfoProvider
3434
from .metric_info_providers import ResponseEvaluatorMetricInfoProvider
3535
from .metric_info_providers import RubricBasedFinalResponseQualityV1EvaluatorMetricInfoProvider
36+
from .metric_info_providers import RubricBasedMultiTurnTrajectoryMetricInfoProvider
3637
from .metric_info_providers import RubricBasedToolUseV1EvaluatorMetricInfoProvider
3738
from .metric_info_providers import SafetyEvaluatorV1MetricInfoProvider
3839
from .metric_info_providers import TrajectoryEvaluatorMetricInfoProvider
@@ -41,6 +42,7 @@
4142
from .multi_turn_trajectory_quality_evaluator import MultiTurnTrajectoryQualityV1Evaluator
4243
from .response_evaluator import ResponseEvaluator
4344
from .rubric_based_final_response_quality_v1 import RubricBasedFinalResponseQualityV1Evaluator
45+
from .rubric_based_multi_turn_trajectory_evaluator import RubricBasedMultiTurnTrajectoryEvaluator
4446
from .rubric_based_tool_use_quality_v1 import RubricBasedToolUseV1Evaluator
4547
from .safety_evaluator import SafetyEvaluatorV1
4648
from .simulation.per_turn_user_simulator_quality_v1 import PerTurnUserSimulatorQualityV1
@@ -164,6 +166,10 @@ def _get_default_metric_evaluator_registry() -> MetricEvaluatorRegistry:
164166
metric_info=PerTurnUserSimulatorQualityV1MetricInfoProvider().get_metric_info(),
165167
evaluator=PerTurnUserSimulatorQualityV1,
166168
)
169+
metric_evaluator_registry.register_evaluator(
170+
metric_info=RubricBasedMultiTurnTrajectoryMetricInfoProvider().get_metric_info(),
171+
evaluator=RubricBasedMultiTurnTrajectoryEvaluator,
172+
)
167173

168174
return metric_evaluator_registry
169175

src/google/adk/evaluation/metric_info_providers.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,3 +242,21 @@ def get_metric_info(self) -> MetricInfo:
242242
interval=Interval(min_value=0.0, max_value=1.0)
243243
),
244244
)
245+
246+
247+
class RubricBasedMultiTurnTrajectoryMetricInfoProvider(MetricInfoProvider):
248+
"""Metric info provider for RubricBasedMultiTurnTrajectory."""
249+
250+
def get_metric_info(self) -> MetricInfo:
251+
return MetricInfo(
252+
metric_name=PrebuiltMetrics.RUBRIC_BASED_MULTI_TURN_TRAJECTORY_QUALITY_V1,
253+
description=(
254+
"This metric evaluates the agent's multi-turn trajectory against"
255+
" a set of user-provided rubrics using an LLM as a judge. Value"
256+
" range for this metric is [0,1], with values closer to 1 more"
257+
" desirable."
258+
),
259+
metric_value_info=MetricValueInfo(
260+
interval=Interval(min_value=0.0, max_value=1.0)
261+
),
262+
)

0 commit comments

Comments
 (0)