Skip to content

Commit fec12a2

Browse files
committed
fix: decouple CLI from evals part 1
1 parent 7012ab4 commit fec12a2

40 files changed

Lines changed: 286 additions & 229 deletions

src/uipath/_cli/_evals/_context.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from typing import Any
2+
3+
from uipath.runtime.schema import UiPathRuntimeSchema
4+
5+
from uipath.eval.evaluators.base_evaluator import GenericBaseEvaluator
6+
from uipath.eval.models.evaluation_set import EvaluationSet
7+
8+
9+
class UiPathEvalContext:
10+
"""Context used for evaluation runs."""
11+
12+
# Required Fields
13+
runtime_schema: UiPathRuntimeSchema
14+
evaluation_set: EvaluationSet
15+
evaluators: list[GenericBaseEvaluator[Any, Any, Any]]
16+
execution_id: str
17+
18+
# Optional Fields
19+
entrypoint: str | None = None
20+
workers: int | None = 1
21+
eval_set_run_id: str | None = None
22+
verbose: bool = False
23+
enable_mocker_cache: bool = False
24+
report_coverage: bool = False
25+
input_overrides: dict[str, Any] | None = None
26+
resume: bool = False
27+
job_id: str | None = None

src/uipath/_cli/_evals/_evaluate.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
UiPathRuntimeResult,
55
)
66

7-
from uipath._cli._evals._runtime import UiPathEvalContext, UiPathEvalRuntime
7+
from uipath._cli._evals._context import UiPathEvalContext
8+
from uipath._cli._evals._runtime import UiPathEvalRuntime
89
from uipath._events._event_bus import EventBus
910

1011

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
import logging
2+
from collections import defaultdict
3+
from typing import Sequence
4+
5+
from opentelemetry import context as context_api
6+
from opentelemetry.sdk.trace import ReadableSpan, Span
7+
from opentelemetry.sdk.trace.export import (
8+
SpanExporter,
9+
SpanExportResult,
10+
)
11+
from uipath.core.tracing.processors import UiPathExecutionBatchTraceProcessor
12+
from uipath.runtime.logging import UiPathRuntimeExecutionLogHandler
13+
14+
from uipath._cli._evals._span_collection import ExecutionSpanCollector
15+
from uipath._cli._evals.mocks.mocks import execution_id_context
16+
17+
18+
class ExecutionSpanExporter(SpanExporter):
19+
"""Custom exporter that stores spans grouped by execution ids."""
20+
21+
def __init__(self):
22+
# { execution_id -> list of spans }
23+
self._spans: dict[str, list[ReadableSpan]] = defaultdict(list)
24+
25+
def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
26+
for span in spans:
27+
if span.attributes is not None:
28+
exec_id = span.attributes.get("execution.id")
29+
if exec_id is not None and isinstance(exec_id, str):
30+
self._spans[exec_id].append(span)
31+
32+
return SpanExportResult.SUCCESS
33+
34+
def get_spans(self, execution_id: str) -> list[ReadableSpan]:
35+
"""Retrieve spans for a given execution id."""
36+
return self._spans.get(execution_id, [])
37+
38+
def clear(self, execution_id: str | None = None) -> None:
39+
"""Clear stored spans for one or all executions."""
40+
if execution_id:
41+
self._spans.pop(execution_id, None)
42+
else:
43+
self._spans.clear()
44+
45+
def shutdown(self) -> None:
46+
self.clear()
47+
48+
49+
class ExecutionSpanProcessor(UiPathExecutionBatchTraceProcessor):
50+
"""Span processor that adds spans to ExecutionSpanCollector when they start."""
51+
52+
def __init__(self, span_exporter: SpanExporter, collector: ExecutionSpanCollector):
53+
super().__init__(span_exporter)
54+
self.collector = collector
55+
56+
def on_start(
57+
self, span: Span, parent_context: context_api.Context | None = None
58+
) -> None:
59+
super().on_start(span, parent_context)
60+
61+
exec_id = span.attributes.get("execution.id") if span.attributes else None
62+
63+
# Fallback: if execution.id wasn't propagated (e.g., NonRecordingSpan
64+
# parent on resume), get it from the execution context variable.
65+
if exec_id is None:
66+
ctx_exec_id = execution_id_context.get()
67+
if ctx_exec_id:
68+
span.set_attribute("execution.id", ctx_exec_id)
69+
exec_id = ctx_exec_id
70+
71+
if span.attributes and "execution.id" in span.attributes:
72+
exec_id = span.attributes["execution.id"]
73+
if isinstance(exec_id, str):
74+
self.collector.add_span(span, exec_id)
75+
76+
77+
class ExecutionLogsExporter:
78+
"""Custom exporter that stores multiple execution log handlers."""
79+
80+
def __init__(self):
81+
self._log_handlers: dict[str, UiPathRuntimeExecutionLogHandler] = {}
82+
83+
def register(
84+
self, execution_id: str, handler: UiPathRuntimeExecutionLogHandler
85+
) -> None:
86+
self._log_handlers[execution_id] = handler
87+
88+
def get_logs(self, execution_id: str) -> list[logging.LogRecord]:
89+
"""Clear stored spans for one or all executions."""
90+
log_handler = self._log_handlers.get(execution_id)
91+
return log_handler.buffer if log_handler else []
92+
93+
def clear(self, execution_id: str | None = None) -> None:
94+
"""Clear stored spans for one or all executions."""
95+
if execution_id:
96+
self._log_handlers.pop(execution_id, None)
97+
else:
98+
self._log_handlers.clear()

src/uipath/_cli/_evals/_models/_mocks.py

Lines changed: 0 additions & 3 deletions
This file was deleted.

src/uipath/_cli/_evals/_progress_reporter.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,14 @@
55
import logging
66
import os
77
import uuid
8+
from enum import IntEnum
89
from typing import Any
910
from urllib.parse import urlparse
1011

1112
from opentelemetry import trace
1213
from pydantic import BaseModel
1314
from rich.console import Console
1415

15-
from uipath._cli._evals._models._evaluation_set import (
16-
EvaluationItem,
17-
EvaluationStatus,
18-
)
1916
from uipath._cli._evals._models._sw_reporting import (
2017
StudioWebAgentSnapshot,
2118
StudioWebProgressItem,
@@ -41,12 +38,20 @@
4138
)
4239
from uipath.eval.evaluators.base_evaluator import GenericBaseEvaluator
4340
from uipath.eval.models import EvalItemResult, ScoreType
41+
from uipath.eval.models.evaluation_set import EvaluationItem
4442
from uipath.platform import UiPath
4543
from uipath.platform.common import UiPathConfig
4644

4745
logger = logging.getLogger(__name__)
4846

4947

48+
class EvaluationStatus(IntEnum):
49+
PENDING = 0
50+
IN_PROGRESS = 1
51+
COMPLETED = 2
52+
FAILED = 3
53+
54+
5055
def gracefully_handle_errors(func):
5156
"""Decorator to catch and log errors without stopping execution."""
5257

src/uipath/_cli/_evals/_runtime.py

Lines changed: 12 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,11 @@
88
Awaitable,
99
Iterable,
1010
Iterator,
11-
Sequence,
1211
Tuple,
1312
)
1413

1514
import coverage
16-
from opentelemetry import context as context_api
17-
from opentelemetry.sdk.trace import ReadableSpan, Span
18-
from opentelemetry.sdk.trace.export import (
19-
SpanExporter,
20-
SpanExportResult,
21-
)
15+
from opentelemetry.sdk.trace import ReadableSpan
2216
from opentelemetry.trace import (
2317
NonRecordingSpan,
2418
SpanContext,
@@ -29,7 +23,6 @@
2923
)
3024
from pydantic import BaseModel
3125
from uipath.core.tracing import UiPathTraceManager
32-
from uipath.core.tracing.processors import UiPathExecutionBatchTraceProcessor
3326
from uipath.runtime import (
3427
UiPathExecuteOptions,
3528
UiPathExecutionRuntime,
@@ -45,6 +38,12 @@
4538
from uipath.runtime.logging import UiPathRuntimeExecutionLogHandler
4639
from uipath.runtime.schema import UiPathRuntimeSchema
4740

41+
from uipath._cli._evals._context import UiPathEvalContext
42+
from uipath._cli._evals._exporters import (
43+
ExecutionLogsExporter,
44+
ExecutionSpanExporter,
45+
ExecutionSpanProcessor,
46+
)
4847
from uipath._cli._evals._span_utils import (
4948
configure_eval_set_run_span,
5049
configure_evaluation_span,
@@ -54,6 +53,11 @@
5453
from uipath._cli._evals.mocks.input_mocker import (
5554
generate_llm_input,
5655
)
56+
from uipath.eval.mocks.types import MockingContext
57+
from uipath.eval.models.evaluation_set import (
58+
EvaluationItem,
59+
EvaluationSet,
60+
)
5761

5862
from ..._events._event_bus import EventBus
5963
from ..._events._events import (
@@ -69,10 +73,6 @@
6973
from ...eval.models.models import AgentExecution, EvalItemResult
7074
from .._utils._parallelization import execute_parallel
7175
from ._eval_util import apply_input_overrides
72-
from ._models._evaluation_set import (
73-
EvaluationItem,
74-
EvaluationSet,
75-
)
7676
from ._models._exceptions import EvaluationRuntimeException
7777
from ._models._output import (
7878
EvaluationResultDto,
@@ -90,118 +90,12 @@
9090
from .mocks.mocks import (
9191
cache_manager_context,
9292
clear_execution_context,
93-
execution_id_context,
9493
set_execution_context,
9594
)
96-
from .mocks.types import MockingContext
9795

9896
logger = logging.getLogger(__name__)
9997

10098

101-
class ExecutionSpanExporter(SpanExporter):
102-
"""Custom exporter that stores spans grouped by execution ids."""
103-
104-
def __init__(self):
105-
# { execution_id -> list of spans }
106-
self._spans: dict[str, list[ReadableSpan]] = defaultdict(list)
107-
108-
def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
109-
for span in spans:
110-
if span.attributes is not None:
111-
exec_id = span.attributes.get("execution.id")
112-
if exec_id is not None and isinstance(exec_id, str):
113-
self._spans[exec_id].append(span)
114-
115-
return SpanExportResult.SUCCESS
116-
117-
def get_spans(self, execution_id: str) -> list[ReadableSpan]:
118-
"""Retrieve spans for a given execution id."""
119-
return self._spans.get(execution_id, [])
120-
121-
def clear(self, execution_id: str | None = None) -> None:
122-
"""Clear stored spans for one or all executions."""
123-
if execution_id:
124-
self._spans.pop(execution_id, None)
125-
else:
126-
self._spans.clear()
127-
128-
def shutdown(self) -> None:
129-
self.clear()
130-
131-
132-
class ExecutionSpanProcessor(UiPathExecutionBatchTraceProcessor):
133-
"""Span processor that adds spans to ExecutionSpanCollector when they start."""
134-
135-
def __init__(self, span_exporter: SpanExporter, collector: ExecutionSpanCollector):
136-
super().__init__(span_exporter)
137-
self.collector = collector
138-
139-
def on_start(
140-
self, span: Span, parent_context: context_api.Context | None = None
141-
) -> None:
142-
super().on_start(span, parent_context)
143-
144-
exec_id = span.attributes.get("execution.id") if span.attributes else None
145-
146-
# Fallback: if execution.id wasn't propagated (e.g., NonRecordingSpan
147-
# parent on resume), get it from the execution context variable.
148-
if exec_id is None:
149-
ctx_exec_id = execution_id_context.get()
150-
if ctx_exec_id:
151-
span.set_attribute("execution.id", ctx_exec_id)
152-
exec_id = ctx_exec_id
153-
154-
if span.attributes and "execution.id" in span.attributes:
155-
exec_id = span.attributes["execution.id"]
156-
if isinstance(exec_id, str):
157-
self.collector.add_span(span, exec_id)
158-
159-
160-
class ExecutionLogsExporter:
161-
"""Custom exporter that stores multiple execution log handlers."""
162-
163-
def __init__(self):
164-
self._log_handlers: dict[str, UiPathRuntimeExecutionLogHandler] = {}
165-
166-
def register(
167-
self, execution_id: str, handler: UiPathRuntimeExecutionLogHandler
168-
) -> None:
169-
self._log_handlers[execution_id] = handler
170-
171-
def get_logs(self, execution_id: str) -> list[logging.LogRecord]:
172-
"""Clear stored spans for one or all executions."""
173-
log_handler = self._log_handlers.get(execution_id)
174-
return log_handler.buffer if log_handler else []
175-
176-
def clear(self, execution_id: str | None = None) -> None:
177-
"""Clear stored spans for one or all executions."""
178-
if execution_id:
179-
self._log_handlers.pop(execution_id, None)
180-
else:
181-
self._log_handlers.clear()
182-
183-
184-
class UiPathEvalContext:
185-
"""Context used for evaluation runs."""
186-
187-
# Required Fields
188-
runtime_schema: UiPathRuntimeSchema
189-
evaluation_set: EvaluationSet
190-
evaluators: list[GenericBaseEvaluator[Any, Any, Any]]
191-
execution_id: str
192-
193-
# Optional Fields
194-
entrypoint: str | None = None
195-
workers: int | None = 1
196-
eval_set_run_id: str | None = None
197-
verbose: bool = False
198-
enable_mocker_cache: bool = False
199-
report_coverage: bool = False
200-
input_overrides: dict[str, Any] | None = None
201-
resume: bool = False
202-
job_id: str | None = None
203-
204-
20599
class UiPathEvalRuntime:
206100
"""Specialized runtime for evaluation runs, with access to the factory."""
207101

src/uipath/_cli/_evals/_span_utils.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,7 @@
55

66
from opentelemetry.trace import Span, Status, StatusCode
77
from pydantic import BaseModel, ConfigDict, Field
8-
9-
# Type hint for runtime protocol (avoids circular imports)
10-
try:
11-
from uipath.runtime import UiPathRuntimeProtocol, UiPathRuntimeSchema
12-
except ImportError:
13-
UiPathRuntimeProtocol = Any # type: ignore
8+
from uipath.runtime.schema import UiPathRuntimeSchema
149

1510

1611
class EvalSetRunOutput(BaseModel):

src/uipath/_cli/_evals/mocks/input_mocker.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from datetime import datetime
55
from typing import Any
66

7-
from uipath._cli._evals.mocks.types import (
7+
from uipath.eval.mocks.types import (
88
InputMockingStrategy,
99
)
1010
from uipath.platform import UiPath

0 commit comments

Comments
 (0)