Skip to content

Commit 08a2816

Browse files
committed
fix evaluators to use the renamed sdk
Signed-off-by: Peter Jausovec <peter.jausovec@solo.io>
1 parent 138cc61 commit 08a2816

6 files changed

Lines changed: 50 additions & 37 deletions

File tree

evaluators/peters_evaluator/evaluator.yaml

Lines changed: 0 additions & 6 deletions
This file was deleted.

evaluators/peters_evaluator/peters_evaluator.py

Lines changed: 0 additions & 21 deletions
This file was deleted.
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
name: random_evaluator
2+
description: 'Example evaluator that returns a randopm score between 0 and 1'
3+
language: python
4+
entrypoint: random_evaluator.py
5+
tags: ["test"]
6+
author: 'peterj'
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
"""Custom evaluator: random_evaluator
2+
3+
This evaluator is a random evaluator that returns a random score between 0 and 1.
4+
5+
Usage in eval_config.yaml:
6+
7+
evaluators:
8+
- name: random_evaluator
9+
type: remote
10+
source: github
11+
ref: evaluators/random_evaluator/random_evaluator.py
12+
threshold: 0.5
13+
executor: local
14+
"""
15+
16+
from agentevals_evaluator_sdk import evaluator, EvalInput, EvalResult
17+
import random
18+
19+
20+
@evaluator
21+
def random_evaluator(input: EvalInput) -> EvalResult:
22+
random_score = random.random()
23+
return EvalResult(score=random_score, details={"message": "All good"})
24+
25+
26+
if __name__ == "__main__":
27+
random_evaluator.run()

evaluators/response_quality/response_quality.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,23 @@
1-
"""Community evaluator: response_quality
1+
"""Example custom evaluator: checks that every invocation has a non-empty response
2+
and that responses don't just parrot back the user input.
23
3-
Checks that every invocation has a non-empty response, meets a configurable
4-
minimum length, and doesn't just parrot back the user input.
4+
Install the SDK standalone: pip install agentevals-evaluator-sdk
55
6-
Config options:
7-
min_response_length (int): Minimum character length for responses (default: 10)
6+
Usage in eval_config.yaml:
7+
8+
evaluators:
9+
- name: response_quality
10+
type: code
11+
path: ./examples/custom_evaluators/response_quality.py
12+
threshold: 0.7
13+
config:
14+
min_response_length: 20
815
"""
916

10-
from agentevals_grader_sdk import grader, EvalInput, EvalResult
17+
from agentevals_evaluator_sdk import EvalInput, EvalResult, evaluator
1118

1219

13-
@grader
20+
@evaluator
1421
def response_quality(input: EvalInput) -> EvalResult:
1522
min_len = input.config.get("min_response_length", 10)
1623
scores: list[float] = []
@@ -37,7 +44,7 @@ def response_quality(input: EvalInput) -> EvalResult:
3744
and inv.final_response.strip().lower() == inv.user_content.strip().lower()
3845
):
3946
score -= 0.5
40-
issues.append(f"{inv.invocation_id}: response echoes user input")
47+
issues.append(f"{inv.invocation_id}: response is just the user input echoed back")
4148

4249
scores.append(max(0.0, score))
4350

evaluators/tool_coverage/tool_coverage.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@
88
min_tool_calls (int): Minimum tool calls per invocation (default: 1)
99
"""
1010

11-
from agentevals_grader_sdk import grader, EvalInput, EvalResult
11+
from agentevals_evaluator_sdk import EvalInput, EvalResult, evaluator
1212

1313

14-
@grader
14+
@evaluator
1515
def tool_coverage(input: EvalInput) -> EvalResult:
1616
min_calls = input.config.get("min_tool_calls", 1)
1717
scores: list[float] = []

0 commit comments

Comments
 (0)