|
| 1 | +"""Exact string match evaluator. |
| 2 | +
|
| 3 | +Config: |
| 4 | + expected (str): Required. If omitted, returns NOT_EVALUATED. |
| 5 | + case_insensitive (bool, default True): Compare normalized strings. |
| 6 | + strip (bool, default True): Strip whitespace before compare. |
| 7 | +
|
| 8 | +Usage: |
| 9 | + config: |
| 10 | + expected: "4" |
| 11 | +""" |
| 12 | + |
| 13 | +from __future__ import annotations |
| 14 | + |
| 15 | +from agentevals_evaluator_sdk import EvalInput, EvalResult, EvalStatus, evaluator |
| 16 | + |
| 17 | + |
| 18 | +@evaluator |
| 19 | +def equals(input: EvalInput) -> EvalResult: |
| 20 | + expected = input.config.get("expected") |
| 21 | + if expected is None: |
| 22 | + n = len(input.invocations) |
| 23 | + return EvalResult( |
| 24 | + score=0.0, |
| 25 | + status=EvalStatus.NOT_EVALUATED, |
| 26 | + per_invocation_scores=[None] * n, |
| 27 | + details={"reason": "missing config: expected"}, |
| 28 | + ) |
| 29 | + |
| 30 | + case_insensitive = bool(input.config.get("case_insensitive", False)) |
| 31 | + strip = bool(input.config.get("strip", True)) |
| 32 | + |
| 33 | + def norm(s: str) -> str: |
| 34 | + t = s.strip() if strip else s |
| 35 | + return t.lower() if case_insensitive else t |
| 36 | + |
| 37 | + exp = norm(str(expected)) |
| 38 | + scores: list[float] = [] |
| 39 | + issues: list[str] = [] |
| 40 | + |
| 41 | + for inv in input.invocations: |
| 42 | + got = norm(inv.final_response or "") |
| 43 | + if got == exp: |
| 44 | + scores.append(1.0) |
| 45 | + else: |
| 46 | + scores.append(0.0) |
| 47 | + issues.append( |
| 48 | + f"{inv.invocation_id}: expected {expected!r}, got {inv.final_response!r}" |
| 49 | + ) |
| 50 | + |
| 51 | + overall = sum(scores) / len(scores) if scores else 0.0 |
| 52 | + return EvalResult( |
| 53 | + score=overall, |
| 54 | + per_invocation_scores=scores, |
| 55 | + details={"issues": issues} if issues else None, |
| 56 | + ) |
| 57 | + |
| 58 | + |
| 59 | +if __name__ == "__main__": |
| 60 | + equals.run() |
0 commit comments