Merge pull request #7 from agentevals-dev/peterj-patch-1

peterj · web-flow · commit 1a400eefa046 · 2026-03-24T08:58:41.000+01:00
Modify evaluators.yaml with new evaluators
diff --git a/data/evaluators.yaml b/data/evaluators.yaml
@@ -1,24 +1,69 @@
-generatedAt: "2026-03-20T00:02:55Z"
+# AUTO-GENERATED by CI — do not edit manually.
+# Source: .github/workflows/build-index.yaml
+
+generatedAt: "2026-03-23T09:41:13Z"
 
 evaluators:
-  - name: peters_evaluator
-    description: "sample evaluator that returns a 0.123 score"
+  - name: contains
+    description: "Scores whether each final response contains a configured substring (case-sensitive or case-insensitive)"
+    language: python
+    path: evaluators/contains/contains.py
+    tags: ["string","contains"]
+    author: agentevals-dev
+    lastUpdated: "%Y->- (origin/peterj/addsomeevaluators)Tae67759488d99a06373dae77d557449716bf8c9f:%M:HEADZ"
+  - name: equals
+    description: "Scores whether each final response exactly matches a configured expected string"
+    language: python
+    path: evaluators/equals/equals.py
+    tags: ["string","equals"]
+    author: agentevals-dev
+    lastUpdated: "%Y->- (origin/peterj/addsomeevaluators)Tae67759488d99a06373dae77d557449716bf8c9f:%M:HEADZ"
+  - name: is_json
+    description: "Scores whether each final response parses as JSON (optional markdown code fence extraction)"
+    language: python
+    path: evaluators/is_json/is_json.py
+    tags: ["json","structured"]
+    author: agentevals-dev
+    lastUpdated: "%Y->- (origin/peterj/addsomeevaluators)Tae67759488d99a06373dae77d557449716bf8c9f:%M:HEADZ"
+  - name: levenshtein_ratio
+    description: "Scores similarity of each response to a reference string using normalized Levenshtein distance"
     language: python
-    path: evaluators/peters_evaluator/peters_evaluator.py
-    tags: ["test","example"]
+    path: evaluators/levenshtein_ratio/levenshtein_ratio.py
+    tags: ["string","levenshtein"]
+    author: agentevals-dev
+    lastUpdated: "%Y->- (origin/peterj/addsomeevaluators)Tae67759488d99a06373dae77d557449716bf8c9f:%M:HEADZ"
+  - name: random_evaluator
+    description: "Example evaluator that returns a randopm score between 0 and 1"
+    language: python
+    path: evaluators/random_evaluator/random_evaluator.py
+    tags: ["test"]
     author: peterj
-    lastUpdated: "2026-03-20T00:02:55Z"
+    lastUpdated: "%Y->-T08a2816b9a45cee04be3d26436b603d14145af75:%M:HEADZ"
+  - name: regex_match
+    description: "Scores whether each final response matches a configured regular expression"
+    language: python
+    path: evaluators/regex_match/regex_match.py
+    tags: ["regex"]
+    author: agentevals-dev
+    lastUpdated: "%Y->- (origin/peterj/addsomeevaluators)Tae67759488d99a06373dae77d557449716bf8c9f:%M:HEADZ"
   - name: response_quality
     description: "Checks that responses are non-empty, meet a minimum length, and don't just echo back the user input"
     language: python
     path: evaluators/response_quality/response_quality.py
     tags: ["quality","response","length"]
     author: agentevals-dev
-    lastUpdated: "2026-03-20T00:02:55Z"
+    lastUpdated: "%Y->-T08a2816b9a45cee04be3d26436b603d14145af75:%M:HEADZ"
   - name: tool_coverage
     description: "Verifies that each invocation made at least a minimum number of tool calls"
     language: python
     path: evaluators/tool_coverage/tool_coverage.py
     tags: ["tools","coverage","validation"]
     author: agentevals-dev
-    lastUpdated: "2026-03-20T00:02:55Z"
+    lastUpdated: "%Y->-T49595e8fd6f984b4a66b90037037b650c31b952c:%M:HEADZ"
+  - name: tool_sequence_match
+    description: "Scores whether tool calls match an expected list of tool names (order-sensitive or multiset)"
+    language: python
+    path: evaluators/tool_sequence_match/tool_sequence_match.py
+    tags: ["tools","trajectory"]
+    author: agentevals-dev
+    lastUpdated: "%Y->- (origin/peterj/addsomeevaluators)Tae67759488d99a06373dae77d557449716bf8c9f:%M:HEADZ"