Skip to content

Commit a51f664

Browse files
authored
fix(SupportTargetOutputKey): support target output key (#1084)
1 parent 594eda0 commit a51f664

File tree

8 files changed

+709
-79
lines changed

8 files changed

+709
-79
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "uipath"
3-
version = "2.4.14"
3+
version = "2.4.15"
44
description = "Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools."
55
readme = { file = "README.md", content-type = "text/markdown" }
66
requires-python = ">=3.11"

samples/calculator/evaluations/eval-sets/legacy.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
"batchSize": 10,
66
"evaluatorRefs": [
77
"equality",
8+
"equality-with-target-key",
89
"llm-as-a-judge",
910
"json-similarity",
1011
"trajectory"
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"fileName": "equality-with-target-key.json",
3+
"id": "equality-with-target-key",
4+
"name": "Legacy Equality Evaluator With Target Key",
5+
"description": "An evaluator that judges the agent based on expected output under \"result\" key.",
6+
"category": 0,
7+
"type": 1,
8+
"targetOutputKey": "result",
9+
"createdAt": "2025-06-26T17:45:39.651Z",
10+
"updatedAt": "2025-06-26T17:45:39.651Z"
11+
}

src/uipath/eval/evaluators/legacy_exact_match_evaluator.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,24 @@ async def evaluate(
4040
Returns:
4141
EvaluationResult: Boolean result indicating exact match (True/False)
4242
"""
43+
actual_output = agent_execution.agent_output
44+
expected_output = evaluation_criteria.expected_output
45+
46+
if self.target_output_key and self.target_output_key != "*":
47+
if isinstance(actual_output, dict) and isinstance(expected_output, dict):
48+
if not (
49+
self.target_output_key in actual_output
50+
and self.target_output_key in expected_output
51+
):
52+
# Assuming that we should pass the test.
53+
expected_output = actual_output = {}
54+
else:
55+
if self.target_output_key in actual_output:
56+
actual_output = actual_output[self.target_output_key]
57+
if self.target_output_key in expected_output:
58+
expected_output = expected_output[self.target_output_key]
59+
4360
return BooleanEvaluationResult(
44-
score=self._canonical_json(agent_execution.agent_output)
45-
== self._canonical_json(evaluation_criteria.expected_output)
61+
score=self._canonical_json(actual_output)
62+
== self._canonical_json(expected_output)
4663
)

testcases/calculator-evals/run.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ echo "Authenticating with UiPath..."
88
uv run uipath auth --client-id="$CLIENT_ID" --client-secret="$CLIENT_SECRET" --base-url="$BASE_URL"
99

1010
echo "Running evaluations with custom evaluator..."
11-
uv run uipath eval main ../../samples/calculator/evaluations/eval-sets/default.json --no-report
11+
uv run uipath eval main ../../samples/calculator/evaluations/eval-sets/legacy.json --no-report --output-file legacy.json
12+
uv run uipath eval main ../../samples/calculator/evaluations/eval-sets/default.json --no-report --output-file default.json
1213

1314
echo "Test completed successfully!"

testcases/calculator-evals/src/assert.py

Lines changed: 85 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,92 @@
1212
def main() -> None:
1313
"""Main assertion logic."""
1414
# Check if output file exists
15-
output_file = "__uipath/output.json"
15+
for output_file in ["default.json", "legacy.json"]:
16+
assert os.path.isfile(output_file), (
17+
f"Evaluation output file '{output_file}' not found"
18+
)
19+
print(f"✓ Found evaluation output file: {output_file}")
20+
21+
# Load evaluation results
22+
with open(output_file, "r", encoding="utf-8") as f:
23+
output_data = json.load(f)
24+
25+
print("✓ Loaded evaluation output")
26+
27+
# Extract output data
28+
output = output_data
29+
30+
# Validate structure
31+
assert "evaluationSetResults" in output, "Missing 'evaluationSetResults' in output"
32+
33+
evaluation_results = output["evaluationSetResults"]
34+
assert len(evaluation_results) > 0, "No evaluation results found"
35+
36+
print(f"✓ Found {len(evaluation_results)} evaluation result(s)")
37+
38+
# Validate each evaluation result
39+
passed_count = 0
40+
failed_count = 0
41+
skipped_count = 0
42+
has_positive_scores = False
43+
44+
for eval_result in evaluation_results:
45+
eval_name = eval_result.get("evaluationName", "Unknown")
46+
print(f"\n→ Validating: {eval_name}")
47+
48+
try:
49+
# Validate evaluation results are present
50+
eval_run_results = eval_result.get("evaluationRunResults", [])
51+
if len(eval_run_results) == 0:
52+
print(f" ⊘ Skipping '{eval_name}' (no evaluation run results)")
53+
skipped_count += 1
54+
continue
55+
56+
# Check that evaluations have scores > 0
57+
all_passed = True
58+
min_score = 100
59+
for eval_run in eval_run_results:
60+
evaluator_name = eval_run.get("evaluatorName", "Unknown")
61+
result = eval_run.get("result", {})
62+
score = result.get("score", 0)
63+
min_score = min(min_score, score)
64+
65+
# Check if score is greater than 0
66+
if score > 0:
67+
has_positive_scores = True
68+
print(f" ✓ {evaluator_name}: score={score:.1f}")
69+
else:
70+
print(f" ✗ {evaluator_name}: score={score:.1f} (must be > 0)")
71+
all_passed = False
72+
73+
if all_passed and min_score > 0:
74+
print(
75+
f" ✓ All evaluators passed for '{eval_name}' (min score: {min_score:.1f})"
76+
)
77+
passed_count += 1
78+
else:
79+
print(f" ✗ Some evaluators failed for '{eval_name}'")
80+
failed_count += 1
81+
82+
except Exception as e:
83+
print(f" ✗ Error validating '{eval_name}': {e}")
84+
failed_count += 1
1685

86+
# Final summary
87+
print(f"\n{'=' * 60}")
88+
print("Summary:")
89+
print(f" Total evaluations: {passed_count + failed_count + skipped_count}")
90+
print(f" ✓ Passed: {passed_count}")
91+
print(f" ✗ Failed: {failed_count}")
92+
print(f" ⊘ Skipped: {skipped_count}")
93+
print(f"{'=' * 60}")
94+
95+
assert failed_count == 0, "Some assertions failed"
96+
assert has_positive_scores, "No evaluation scores greater than 0 were found"
97+
98+
print("\n✅ All assertions passed!")
99+
100+
output_file = "__uipath/output.json"
17101
assert os.path.isfile(output_file), (
18102
f"Evaluation output file '{output_file}' not found"
19103
)
@@ -30,79 +114,6 @@ def main() -> None:
30114
assert status == "successful", f"Evaluation run failed with status: {status}"
31115
print("✓ Evaluation run status: successful")
32116

33-
# Extract output data
34-
output = output_data.get("output", {})
35-
36-
# Validate structure
37-
assert "evaluationSetResults" in output, "Missing 'evaluationSetResults' in output"
38-
39-
evaluation_results = output["evaluationSetResults"]
40-
assert len(evaluation_results) > 0, "No evaluation results found"
41-
42-
print(f"✓ Found {len(evaluation_results)} evaluation result(s)")
43-
44-
# Validate each evaluation result
45-
passed_count = 0
46-
failed_count = 0
47-
skipped_count = 0
48-
has_positive_scores = False
49-
50-
for eval_result in evaluation_results:
51-
eval_name = eval_result.get("evaluationName", "Unknown")
52-
print(f"\n→ Validating: {eval_name}")
53-
54-
try:
55-
# Validate evaluation results are present
56-
eval_run_results = eval_result.get("evaluationRunResults", [])
57-
if len(eval_run_results) == 0:
58-
print(f" ⊘ Skipping '{eval_name}' (no evaluation run results)")
59-
skipped_count += 1
60-
continue
61-
62-
# Check that evaluations have scores > 0
63-
all_passed = True
64-
min_score = 100
65-
for eval_run in eval_run_results:
66-
evaluator_name = eval_run.get("evaluatorName", "Unknown")
67-
result = eval_run.get("result", {})
68-
score = result.get("score", 0)
69-
min_score = min(min_score, score)
70-
71-
# Check if score is greater than 0
72-
if score > 0:
73-
has_positive_scores = True
74-
print(f" ✓ {evaluator_name}: score={score:.1f}")
75-
else:
76-
print(f" ✗ {evaluator_name}: score={score:.1f} (must be > 0)")
77-
all_passed = False
78-
79-
if all_passed and min_score > 0:
80-
print(
81-
f" ✓ All evaluators passed for '{eval_name}' (min score: {min_score:.1f})"
82-
)
83-
passed_count += 1
84-
else:
85-
print(f" ✗ Some evaluators failed for '{eval_name}'")
86-
failed_count += 1
87-
88-
except Exception as e:
89-
print(f" ✗ Error validating '{eval_name}': {e}")
90-
failed_count += 1
91-
92-
# Final summary
93-
print(f"\n{'=' * 60}")
94-
print("Summary:")
95-
print(f" Total evaluations: {passed_count + failed_count + skipped_count}")
96-
print(f" ✓ Passed: {passed_count}")
97-
print(f" ✗ Failed: {failed_count}")
98-
print(f" ⊘ Skipped: {skipped_count}")
99-
print(f"{'=' * 60}")
100-
101-
assert failed_count == 0, "Some assertions failed"
102-
assert has_positive_scores, "No evaluation scores greater than 0 were found"
103-
104-
print("\n✅ All assertions passed!")
105-
106117

107118
if __name__ == "__main__":
108119
main()

0 commit comments

Comments
 (0)