chore: release 0.81.3

semantic-release · semantic-release · commit 0922b0a6da43 · 2026-03-29T20:16:23.000Z
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,29 @@
 # CHANGELOG
 
 
+## v0.81.3 (2026-03-29)
+
+### Bug Fixes
+
+- Try local eval before slow /evaluate endpoint in evaluate_dense
+  ([#245](https://github.com/OpenAdaptAI/openadapt-evals/pull/245),
+  [`3b8c1c2`](https://github.com/OpenAdaptAI/openadapt-evals/commit/3b8c1c2b6317a693fec2e97cf8aa459205f1be4d))
+
+51% of TRL training time wasted on 5050 evaluate timeouts (180s × 3 retries = 9 min per evaluation).
+  The local evaluation via evaluate_checks_local takes ~5s.
+
+Fix: when task config has checks defined, try local eval FIRST. Only
+
+fall through to the slow /evaluate endpoint when no local checks exist. This eliminates the 9-minute
+  timeout for custom YAML tasks that define their own checks.
+
+Before: evaluate() [9 min] → if 0.0 → local [5s]
+
+After: local [5s] → if no checks → evaluate() [9 min]
+
+Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
+
+
 ## v0.81.2 (2026-03-29)
 
 ### Bug Fixes
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "openadapt-evals"
-version = "0.81.2"
+version = "0.81.3"
 description = "Evaluation infrastructure for GUI agent benchmarks"
 readme = "README.md"
 requires-python = ">=3.10"