|
4 | 4 | otherwise 0.0. |
5 | 5 |
|
6 | 6 | Config: |
7 | | - substring (str): Required for scoring; if omitted, the evaluator is a no-op (1.0). |
| 7 | + substring (str): Required. If omitted, returns NOT_EVALUATED. |
8 | 8 |
|
9 | 9 | Usage in eval_config.yaml: |
10 | 10 | config: |
|
13 | 13 |
|
14 | 14 | from __future__ import annotations |
15 | 15 |
|
16 | | -from agentevals_evaluator_sdk import EvalInput, EvalResult, evaluator |
| 16 | +from agentevals_evaluator_sdk import EvalInput, EvalResult, EvalStatus, evaluator |
17 | 17 |
|
18 | 18 |
|
19 | 19 | @evaluator |
20 | 20 | def contains(input: EvalInput) -> EvalResult: |
21 | | - needle = (input.config.get("substring") or "").strip() |
22 | | - if not needle: |
| 21 | + substring = (input.config.get("substring") or "").strip() |
| 22 | + n = len(input.invocations) |
| 23 | + if not substring: |
23 | 24 | return EvalResult( |
24 | | - score=1.0, |
25 | | - per_invocation_scores=[1.0] * len(input.invocations), |
26 | | - details={"note": "no substring configured; skipping check"}, |
| 25 | + score=0.0, |
| 26 | + status=EvalStatus.NOT_EVALUATED, |
| 27 | + per_invocation_scores=[None] * n, |
| 28 | + details={"reason": "missing config: substring"}, |
27 | 29 | ) |
28 | 30 |
|
29 | 31 | case_insensitive = bool(input.config.get("case_insensitive", False)) |
30 | | - haystack_fn = str.lower if case_insensitive else lambda s: s |
31 | | - needle_cmp = haystack_fn(needle) |
| 32 | + normalize = str.lower if case_insensitive else lambda s: s |
| 33 | + substring_cmp = normalize(substring) |
32 | 34 |
|
33 | 35 | scores: list[float] = [] |
34 | 36 | issues: list[str] = [] |
35 | 37 |
|
36 | 38 | for inv in input.invocations: |
37 | | - text = (inv.final_response or "") |
| 39 | + response_text = inv.final_response or "" |
38 | 40 | if case_insensitive: |
39 | | - ok = needle_cmp in haystack_fn(text) |
| 41 | + ok = substring_cmp in normalize(response_text) |
40 | 42 | else: |
41 | | - ok = needle in text |
| 43 | + ok = substring in response_text |
42 | 44 | if ok: |
43 | 45 | scores.append(1.0) |
44 | 46 | else: |
45 | 47 | scores.append(0.0) |
46 | | - issues.append(f"{inv.invocation_id}: response does not contain {needle!r}") |
| 48 | + issues.append(f"{inv.invocation_id}: response does not contain {substring!r}") |
47 | 49 |
|
48 | 50 | overall = sum(scores) / len(scores) if scores else 0.0 |
49 | 51 | return EvalResult( |
|
0 commit comments