fix(SupportTargetOutputKey): support target output key (#1084)

akshaylive · web-flow · commit a51f664d5571 · 2026-01-13T15:17:44.000-08:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "uipath"
-version = "2.4.14"
+version = "2.4.15"
 description = "Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools."
 readme = { file = "README.md", content-type = "text/markdown" }
 requires-python = ">=3.11"
diff --git a/samples/calculator/evaluations/eval-sets/legacy.json b/samples/calculator/evaluations/eval-sets/legacy.json
@@ -5,6 +5,7 @@
   "batchSize": 10,
   "evaluatorRefs": [
     "equality",
+    "equality-with-target-key",
     "llm-as-a-judge",
     "json-similarity",
     "trajectory"
diff --git a/samples/calculator/evaluations/evaluators/legacy-equality-with-target-key.json b/samples/calculator/evaluations/evaluators/legacy-equality-with-target-key.json
@@ -0,0 +1,11 @@
+{
+    "fileName": "equality-with-target-key.json",
+    "id": "equality-with-target-key",
+    "name": "Legacy Equality Evaluator With Target Key",
+    "description": "An evaluator that judges the agent based on expected output under \"result\" key.",
+    "category": 0,
+    "type": 1,
+    "targetOutputKey": "result",
+    "createdAt": "2025-06-26T17:45:39.651Z",
+    "updatedAt": "2025-06-26T17:45:39.651Z"
+}
diff --git a/src/uipath/eval/evaluators/legacy_exact_match_evaluator.py b/src/uipath/eval/evaluators/legacy_exact_match_evaluator.py
@@ -40,7 +40,24 @@ async def evaluate(
         Returns:
             EvaluationResult: Boolean result indicating exact match (True/False)
         """
+        actual_output = agent_execution.agent_output
+        expected_output = evaluation_criteria.expected_output
+
+        if self.target_output_key and self.target_output_key != "*":
+            if isinstance(actual_output, dict) and isinstance(expected_output, dict):
+                if not (
+                    self.target_output_key in actual_output
+                    and self.target_output_key in expected_output
+                ):
+                    # Assuming that we should pass the test.
+                    expected_output = actual_output = {}
+                else:
+                    if self.target_output_key in actual_output:
+                        actual_output = actual_output[self.target_output_key]
+                    if self.target_output_key in expected_output:
+                        expected_output = expected_output[self.target_output_key]
+
         return BooleanEvaluationResult(
-            score=self._canonical_json(agent_execution.agent_output)
-            == self._canonical_json(evaluation_criteria.expected_output)
+            score=self._canonical_json(actual_output)
+            == self._canonical_json(expected_output)
         )
diff --git a/testcases/calculator-evals/run.sh b/testcases/calculator-evals/run.sh
@@ -8,6 +8,7 @@ echo "Authenticating with UiPath..."
 uv run uipath auth --client-id="$CLIENT_ID" --client-secret="$CLIENT_SECRET" --base-url="$BASE_URL"
 
 echo "Running evaluations with custom evaluator..."
-uv run uipath eval main ../../samples/calculator/evaluations/eval-sets/default.json --no-report
+uv run uipath eval main ../../samples/calculator/evaluations/eval-sets/legacy.json --no-report --output-file legacy.json
+uv run uipath eval main ../../samples/calculator/evaluations/eval-sets/default.json --no-report --output-file default.json
 
 echo "Test completed successfully!"
diff --git a/testcases/calculator-evals/src/assert.py b/testcases/calculator-evals/src/assert.py
@@ -12,8 +12,92 @@
 def main() -> None:
     """Main assertion logic."""
     # Check if output file exists
-    output_file = "__uipath/output.json"
+    for output_file in ["default.json", "legacy.json"]:
+        assert os.path.isfile(output_file), (
+            f"Evaluation output file '{output_file}' not found"
+        )
+        print(f"✓ Found evaluation output file: {output_file}")
+
+        # Load evaluation results
+        with open(output_file, "r", encoding="utf-8") as f:
+            output_data = json.load(f)
+
+        print("✓ Loaded evaluation output")
+
+        # Extract output data
+        output = output_data
+
+        # Validate structure
+        assert "evaluationSetResults" in output, "Missing 'evaluationSetResults' in output"
+
+        evaluation_results = output["evaluationSetResults"]
+        assert len(evaluation_results) > 0, "No evaluation results found"
+
+        print(f"✓ Found {len(evaluation_results)} evaluation result(s)")
+
+        # Validate each evaluation result
+        passed_count = 0
+        failed_count = 0
+        skipped_count = 0
+        has_positive_scores = False
+
+        for eval_result in evaluation_results:
+            eval_name = eval_result.get("evaluationName", "Unknown")
+            print(f"\n→ Validating: {eval_name}")
+
+            try:
+                # Validate evaluation results are present
+                eval_run_results = eval_result.get("evaluationRunResults", [])
+                if len(eval_run_results) == 0:
+                    print(f"  ⊘ Skipping '{eval_name}' (no evaluation run results)")
+                    skipped_count += 1
+                    continue
+
+                # Check that evaluations have scores > 0
+                all_passed = True
+                min_score = 100
+                for eval_run in eval_run_results:
+                    evaluator_name = eval_run.get("evaluatorName", "Unknown")
+                    result = eval_run.get("result", {})
+                    score = result.get("score", 0)
+                    min_score = min(min_score, score)
+
+                    # Check if score is greater than 0
+                    if score > 0:
+                        has_positive_scores = True
+                        print(f"  ✓ {evaluator_name}: score={score:.1f}")
+                    else:
+                        print(f"  ✗ {evaluator_name}: score={score:.1f} (must be > 0)")
+                        all_passed = False
+
+                if all_passed and min_score > 0:
+                    print(
+                        f"  ✓ All evaluators passed for '{eval_name}' (min score: {min_score:.1f})"
+                    )
+                    passed_count += 1
+                else:
+                    print(f"  ✗ Some evaluators failed for '{eval_name}'")
+                    failed_count += 1
+
+            except Exception as e:
+                print(f"  ✗ Error validating '{eval_name}': {e}")
+                failed_count += 1
 
+        # Final summary
+        print(f"\n{'=' * 60}")
+        print("Summary:")
+        print(f"  Total evaluations: {passed_count + failed_count + skipped_count}")
+        print(f"  ✓ Passed: {passed_count}")
+        print(f"  ✗ Failed: {failed_count}")
+        print(f"  ⊘ Skipped: {skipped_count}")
+        print(f"{'=' * 60}")
+
+        assert failed_count == 0, "Some assertions failed"
+        assert has_positive_scores, "No evaluation scores greater than 0 were found"
+
+        print("\n✅ All assertions passed!")
+
+    output_file = "__uipath/output.json"
     assert os.path.isfile(output_file), (
         f"Evaluation output file '{output_file}' not found"
     )
@@ -30,79 +114,6 @@ def main() -> None:
     assert status == "successful", f"Evaluation run failed with status: {status}"
     print("✓ Evaluation run status: successful")
 
-    # Extract output data
-    output = output_data.get("output", {})
-
-    # Validate structure
-    assert "evaluationSetResults" in output, "Missing 'evaluationSetResults' in output"
-
-    evaluation_results = output["evaluationSetResults"]
-    assert len(evaluation_results) > 0, "No evaluation results found"
-
-    print(f"✓ Found {len(evaluation_results)} evaluation result(s)")
-
-    # Validate each evaluation result
-    passed_count = 0
-    failed_count = 0
-    skipped_count = 0
-    has_positive_scores = False
-
-    for eval_result in evaluation_results:
-        eval_name = eval_result.get("evaluationName", "Unknown")
-        print(f"\n→ Validating: {eval_name}")
-
-        try:
-            # Validate evaluation results are present
-            eval_run_results = eval_result.get("evaluationRunResults", [])
-            if len(eval_run_results) == 0:
-                print(f"  ⊘ Skipping '{eval_name}' (no evaluation run results)")
-                skipped_count += 1
-                continue
-
-            # Check that evaluations have scores > 0
-            all_passed = True
-            min_score = 100
-            for eval_run in eval_run_results:
-                evaluator_name = eval_run.get("evaluatorName", "Unknown")
-                result = eval_run.get("result", {})
-                score = result.get("score", 0)
-                min_score = min(min_score, score)
-
-                # Check if score is greater than 0
-                if score > 0:
-                    has_positive_scores = True
-                    print(f"  ✓ {evaluator_name}: score={score:.1f}")
-                else:
-                    print(f"  ✗ {evaluator_name}: score={score:.1f} (must be > 0)")
-                    all_passed = False
-
-            if all_passed and min_score > 0:
-                print(
-                    f"  ✓ All evaluators passed for '{eval_name}' (min score: {min_score:.1f})"
-                )
-                passed_count += 1
-            else:
-                print(f"  ✗ Some evaluators failed for '{eval_name}'")
-                failed_count += 1
-
-        except Exception as e:
-            print(f"  ✗ Error validating '{eval_name}': {e}")
-            failed_count += 1
-
-    # Final summary
-    print(f"\n{'=' * 60}")
-    print("Summary:")
-    print(f"  Total evaluations: {passed_count + failed_count + skipped_count}")
-    print(f"  ✓ Passed: {passed_count}")
-    print(f"  ✗ Failed: {failed_count}")
-    print(f"  ⊘ Skipped: {skipped_count}")
-    print(f"{'=' * 60}")
-
-    assert failed_count == 0, "Some assertions failed"
-    assert has_positive_scores, "No evaluation scores greater than 0 were found"
-
-    print("\n✅ All assertions passed!")
-
 
 if __name__ == "__main__":
     main()
diff --git a/tests/cli/evaluators/test_legacy_exact_match_evaluator.py b/tests/cli/evaluators/test_legacy_exact_match_evaluator.py
diff --git a/uv.lock b/uv.lock