Skip to content

Commit ed856c2

Browse files
committed
chore: use uv for Python dependency management
Signed-off-by: Kyle Hounslow <kylhouns@amazon.com>
1 parent 7e0d53c commit ed856c2

9 files changed

Lines changed: 2943 additions & 18 deletions

File tree

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
FROM python:3.12-slim
22
RUN apt-get update && apt-get install -y --no-install-recommends git && rm -rf /var/lib/apt/lists/*
3+
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
34
WORKDIR /app
4-
COPY requirements.txt .
5-
RUN pip install --no-cache-dir -r requirements.txt
5+
COPY pyproject.toml uv.lock ./
6+
RUN uv sync --frozen --no-dev
67
COPY eval_canary.py .
7-
CMD ["python", "-u", "eval_canary.py"]
8+
CMD ["uv", "run", "eval_canary.py"]

docker-compose/agent-eval-canary/eval_canary.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from datetime import datetime, timedelta, timezone
12
"""Agent eval canary — periodically scores un-evaluated agent traces.
23
34
Polls OpenSearch for recent agent traces, skips any that already have
@@ -14,7 +15,7 @@
1415

1516
from opentelemetry import trace
1617
from opentelemetry.sdk.trace import TracerProvider
17-
from opentelemetry.sdk.trace.export import BatchSpanProcessor
18+
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
1819
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
1920
from opentelemetry.sdk.resources import Resource
2021

@@ -43,7 +44,7 @@ def setup_otel() -> TracerProvider:
4344
resource = Resource.create({"service.name": "agent-eval-canary"})
4445
provider = TracerProvider(resource=resource)
4546
provider.add_span_processor(
46-
BatchSpanProcessor(OTLPSpanExporter(endpoint=OTEL_ENDPOINT, insecure=True))
47+
SimpleSpanProcessor(OTLPSpanExporter(endpoint=OTEL_ENDPOINT, insecure=True))
4748
)
4849
trace.set_tracer_provider(provider)
4950
return provider
@@ -100,29 +101,40 @@ def run() -> None:
100101
# Wait for OpenSearch to be ready
101102
for attempt in range(30):
102103
try:
103-
retriever.list_traces(since_minutes=1, max_results=1)
104+
retriever.list_root_spans(max_results=1)
104105
log.info("OpenSearch is ready")
105106
break
106107
except Exception as e:
107108
log.info("Waiting for OpenSearch... (%d/30): %s", attempt + 1, e)
108109
time.sleep(10)
109110

111+
# Track recently scored traces to avoid duplicates from batch flush delay
112+
recently_scored: dict[str, float] = {} # trace_id -> timestamp
113+
110114
while True:
111115
try:
112-
roots = retriever.list_traces(
116+
# Expire entries older than lookback window
117+
cutoff = time.time() - (LOOKBACK_MINUTES * 60)
118+
recently_scored = {k: v for k, v in recently_scored.items() if v > cutoff}
119+
120+
roots = retriever.list_root_spans(
113121
services=TARGET_SERVICES,
114-
since_minutes=LOOKBACK_MINUTES,
122+
since=datetime.now(timezone.utc) - timedelta(minutes=LOOKBACK_MINUTES),
115123
)
116124
if roots:
117125
trace_ids = [r.trace_id for r in roots]
118126
evaluated = retriever.find_evaluated_trace_ids(trace_ids)
119-
unevaluated = [r for r in roots if r.trace_id not in evaluated]
127+
unevaluated = [
128+
r for r in roots
129+
if r.trace_id not in evaluated and r.trace_id not in recently_scored
130+
]
120131

121132
if unevaluated:
122133
log.info("Found %d unevaluated traces", len(unevaluated))
123134
for root in unevaluated:
124135
try:
125136
deterministic_eval(retriever, root.trace_id, root.span_id)
137+
recently_scored[root.trace_id] = time.time()
126138
except Exception:
127139
log.exception("Failed to eval trace %s", root.trace_id[:12])
128140
provider.force_flush()
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
[project]
2+
name = "agent-eval-canary"
3+
version = "0.1.0"
4+
description = "Periodically scores un-evaluated agent traces in OpenSearch"
5+
requires-python = ">=3.10"
6+
dependencies = [
7+
"opensearch-genai-observability-sdk-py[opensearch] @ git+https://github.com/kylehounslow/genai-observability-sdk-py.git@fd9560630ed376c31f1e15d5e60675d9514f02af",
8+
"opentelemetry-api",
9+
"opentelemetry-sdk",
10+
"opentelemetry-exporter-otlp-proto-grpc",
11+
]

docker-compose/agent-eval-canary/requirements.txt

Lines changed: 0 additions & 5 deletions
This file was deleted.

docker-compose/agent-eval-canary/uv.lock

Lines changed: 490 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

examples/agent-evals/genai-sdk/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ End-to-end evaluation loop: retrieve agent traces from OpenSearch, run LLM-as-ju
1111
## Setup
1212

1313
```bash
14-
pip install -r requirements.txt
14+
uv sync
1515
```
1616

1717
## Usage
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
[project]
2+
name = "agent-evals-genai-sdk"
3+
version = "0.1.0"
4+
description = "Run evaluations against stored agent traces in OpenSearch"
5+
readme = "README.md"
6+
requires-python = ">=3.10"
7+
dependencies = [
8+
"opensearch-genai-observability-sdk-py[opensearch]",
9+
"strands-agents",
10+
"strands-agents-evals",
11+
]

examples/agent-evals/genai-sdk/requirements.txt

Lines changed: 0 additions & 3 deletions
This file was deleted.

0 commit comments

Comments
 (0)