-
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun-arms.sh
More file actions
executable file
·81 lines (72 loc) · 2.53 KB
/
Copy pathrun-arms.sh
File metadata and controls
executable file
·81 lines (72 loc) · 2.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env bash
# Agent eval harness — local A/B probe or live MCP run (dev/CI only, not shipped in npm).
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
cd "$REPO_ROOT"
OUT="${AGENT_EVAL_OUTPUT:-$REPO_ROOT/.agent-eval/comparison.json}"
RUNS="${AGENT_EVAL_RUNS:-1}"
MODE="${AGENT_EVAL_MODE:-probe}"
FIXTURE_ROOT="${AGENT_EVAL_FIXTURE_ROOT:-$REPO_ROOT/fixtures/minimal}"
PROBES="${AGENT_EVAL_PROBES:-$SCRIPT_DIR/scenarios.json}"
SCENARIOS="${AGENT_EVAL_SCENARIOS:-$REPO_ROOT/fixtures/golden/scenarios.json}"
INDEX_DB="$FIXTURE_ROOT/.codemap/index.db"
SKIP_ARGS=()
if [[ -f "$INDEX_DB" ]]; then
SKIP_ARGS=(--skip-index)
fi
echo "=== agent-eval: ${MODE} arms (runs=$RUNS) ==="
set +e
bun "$SCRIPT_DIR/run-probes.ts" \
--mode "$MODE" \
--output "$OUT" \
--runs "$RUNS" \
--fixture-root "$FIXTURE_ROOT" \
--scenarios "$SCENARIOS" \
--probes "$PROBES" \
"${SKIP_ARGS[@]}"
PROBE_EXIT=$?
set -e
if [[ "${AGENT_EVAL_PRINT_SUMMARY:-0}" == "1" && -f "$OUT" ]]; then
bun "$SCRIPT_DIR/print-comparison-summary.ts" --input "$OUT"
fi
LOG_EXIT=0
if [[ "${AGENT_EVAL_CAPTURE:-}" == "1" ]]; then
echo "=== agent-eval: capture synthetic log sessions ==="
SESSION_DIR="${AGENT_EVAL_SESSION_DIR:-$REPO_ROOT/.agent-eval/sessions}"
CAPTURE_ENV=(
AGENT_EVAL_FIXTURE_ROOT="$FIXTURE_ROOT"
AGENT_EVAL_SCENARIOS="$SCENARIOS"
AGENT_EVAL_PROBES="$PROBES"
AGENT_EVAL_SESSION_DIR="$SESSION_DIR"
)
if [[ -f "$INDEX_DB" ]]; then
CAPTURE_ENV+=(AGENT_EVAL_SKIP_INDEX=1)
fi
env "${CAPTURE_ENV[@]}" bun "$SCRIPT_DIR/capture-real-sessions.ts"
export AGENT_EVAL_LOG_ON="${AGENT_EVAL_LOG_ON:-$SESSION_DIR/real-mcp-on.json}"
export AGENT_EVAL_LOG_OFF="${AGENT_EVAL_LOG_OFF:-$SESSION_DIR/real-mcp-off.json}"
fi
if [[ -n "${AGENT_EVAL_LOG:-}" ]]; then
echo "=== agent-eval: parse agent log $AGENT_EVAL_LOG ==="
bun "$SCRIPT_DIR/print-log-metrics.ts" "$AGENT_EVAL_LOG"
fi
if [[ -n "${AGENT_EVAL_LOG_ON:-}" && -n "${AGENT_EVAL_LOG_OFF:-}" ]]; then
LOG_OUT="${AGENT_EVAL_LOG_OUTPUT:-$REPO_ROOT/.agent-eval/log-comparison.json}"
echo "=== agent-eval: compare live logs (orthogonal to AGENT_EVAL_MODE) ==="
set +e
bun "$SCRIPT_DIR/compare-live-logs.ts" \
--mcp-on "$AGENT_EVAL_LOG_ON" \
--mcp-off "$AGENT_EVAL_LOG_OFF" \
--output "$LOG_OUT"
LOG_EXIT=$?
if [[ "$LOG_EXIT" -eq 0 ]]; then
bun "$SCRIPT_DIR/print-comparison-summary.ts" --input "$LOG_OUT"
fi
set -e
fi
echo "Wrote $OUT"
if [[ "$PROBE_EXIT" -ne 0 ]]; then
exit "$PROBE_EXIT"
fi
exit "$LOG_EXIT"