Skip to content

Commit 1a400ee

Browse files
authored
Merge pull request #7 from agentevals-dev/peterj-patch-1
Modify evaluators.yaml with new evaluators
2 parents 431ce89 + 4dc1ff3 commit 1a400ee

1 file changed

Lines changed: 53 additions & 8 deletions

File tree

data/evaluators.yaml

Lines changed: 53 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,69 @@
1-
generatedAt: "2026-03-20T00:02:55Z"
1+
# AUTO-GENERATED by CI — do not edit manually.
2+
# Source: .github/workflows/build-index.yaml
3+
4+
generatedAt: "2026-03-23T09:41:13Z"
25

36
evaluators:
4-
- name: peters_evaluator
5-
description: "sample evaluator that returns a 0.123 score"
7+
- name: contains
8+
description: "Scores whether each final response contains a configured substring (case-sensitive or case-insensitive)"
9+
language: python
10+
path: evaluators/contains/contains.py
11+
tags: ["string","contains"]
12+
author: agentevals-dev
13+
lastUpdated: "%Y->- (origin/peterj/addsomeevaluators)Tae67759488d99a06373dae77d557449716bf8c9f:%M:HEADZ"
14+
- name: equals
15+
description: "Scores whether each final response exactly matches a configured expected string"
16+
language: python
17+
path: evaluators/equals/equals.py
18+
tags: ["string","equals"]
19+
author: agentevals-dev
20+
lastUpdated: "%Y->- (origin/peterj/addsomeevaluators)Tae67759488d99a06373dae77d557449716bf8c9f:%M:HEADZ"
21+
- name: is_json
22+
description: "Scores whether each final response parses as JSON (optional markdown code fence extraction)"
23+
language: python
24+
path: evaluators/is_json/is_json.py
25+
tags: ["json","structured"]
26+
author: agentevals-dev
27+
lastUpdated: "%Y->- (origin/peterj/addsomeevaluators)Tae67759488d99a06373dae77d557449716bf8c9f:%M:HEADZ"
28+
- name: levenshtein_ratio
29+
description: "Scores similarity of each response to a reference string using normalized Levenshtein distance"
630
language: python
7-
path: evaluators/peters_evaluator/peters_evaluator.py
8-
tags: ["test","example"]
31+
path: evaluators/levenshtein_ratio/levenshtein_ratio.py
32+
tags: ["string","levenshtein"]
33+
author: agentevals-dev
34+
lastUpdated: "%Y->- (origin/peterj/addsomeevaluators)Tae67759488d99a06373dae77d557449716bf8c9f:%M:HEADZ"
35+
- name: random_evaluator
36+
description: "Example evaluator that returns a randopm score between 0 and 1"
37+
language: python
38+
path: evaluators/random_evaluator/random_evaluator.py
39+
tags: ["test"]
940
author: peterj
10-
lastUpdated: "2026-03-20T00:02:55Z"
41+
lastUpdated: "%Y->-T08a2816b9a45cee04be3d26436b603d14145af75:%M:HEADZ"
42+
- name: regex_match
43+
description: "Scores whether each final response matches a configured regular expression"
44+
language: python
45+
path: evaluators/regex_match/regex_match.py
46+
tags: ["regex"]
47+
author: agentevals-dev
48+
lastUpdated: "%Y->- (origin/peterj/addsomeevaluators)Tae67759488d99a06373dae77d557449716bf8c9f:%M:HEADZ"
1149
- name: response_quality
1250
description: "Checks that responses are non-empty, meet a minimum length, and don't just echo back the user input"
1351
language: python
1452
path: evaluators/response_quality/response_quality.py
1553
tags: ["quality","response","length"]
1654
author: agentevals-dev
17-
lastUpdated: "2026-03-20T00:02:55Z"
55+
lastUpdated: "%Y->-T08a2816b9a45cee04be3d26436b603d14145af75:%M:HEADZ"
1856
- name: tool_coverage
1957
description: "Verifies that each invocation made at least a minimum number of tool calls"
2058
language: python
2159
path: evaluators/tool_coverage/tool_coverage.py
2260
tags: ["tools","coverage","validation"]
2361
author: agentevals-dev
24-
lastUpdated: "2026-03-20T00:02:55Z"
62+
lastUpdated: "%Y->-T49595e8fd6f984b4a66b90037037b650c31b952c:%M:HEADZ"
63+
- name: tool_sequence_match
64+
description: "Scores whether tool calls match an expected list of tool names (order-sensitive or multiset)"
65+
language: python
66+
path: evaluators/tool_sequence_match/tool_sequence_match.py
67+
tags: ["tools","trajectory"]
68+
author: agentevals-dev
69+
lastUpdated: "%Y->- (origin/peterj/addsomeevaluators)Tae67759488d99a06373dae77d557449716bf8c9f:%M:HEADZ"

0 commit comments

Comments
 (0)