Skip to content

Commit 80d7990

Browse files
abrichrclaude
andauthored
fix: auto-detect evaluate_url, waa_examples_path, and screen resolution (#149)
Three configuration improvements: 1. evaluate_url: when /evaluate returns 404 on the main server, auto-tries port 5050 (where evaluate_server.py runs). Caches the working URL. 2. waa_examples_path: auto-detects from WAA_EXAMPLES_PATH env var or common directory locations (WindowsAgentArena submodule paths). 3. Screen resolution: defaults to 1280x720 (matches typical WAA QEMU resolution) instead of 1920x1200. Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 791440d commit 80d7990

1 file changed

Lines changed: 64 additions & 4 deletions

File tree

  • openadapt_evals/adapters/waa

openadapt_evals/adapters/waa/live.py

Lines changed: 64 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -370,14 +370,14 @@ class WAALiveConfig:
370370
"""
371371

372372
server_url: str = "http://localhost:5000"
373-
evaluate_url: str | None = None
373+
evaluate_url: str | None = None # Auto-detects port 5050 if /evaluate 404s on server_url
374374
a11y_backend: str = "uia"
375-
screen_width: int = 1920
376-
screen_height: int = 1200
375+
screen_width: int = 1280 # Default matches typical WAA QEMU resolution
376+
screen_height: int = 720
377377
max_steps: int = 15
378378
action_delay: float = 0.5
379379
timeout: float = 90.0
380-
waa_examples_path: str | None = None
380+
waa_examples_path: str | None = None # Auto-detected from common paths + WAA_EXAMPLES_PATH env
381381
clean_desktop: bool = False
382382
force_tray_icons: bool = False
383383
reapply_clean_desktop_each_reset: bool = False
@@ -402,6 +402,7 @@ class WAALiveAdapter(BenchmarkAdapter):
402402

403403
def __init__(self, config: WAALiveConfig | None = None):
404404
self.config = config or WAALiveConfig()
405+
self._auto_detect_waa_examples_path()
405406
self._current_task: BenchmarkTask | None = None
406407
self._step_count = 0
407408
self._current_a11y: dict | None = None
@@ -414,6 +415,36 @@ def __init__(self, config: WAALiveConfig | None = None):
414415
self._last_setup_results: list[dict[str, Any]] = []
415416
self._last_foreground_title: str | None = None
416417

418+
def _auto_detect_waa_examples_path(self) -> None:
419+
"""Auto-detect waa_examples_path from env var or common locations."""
420+
if self.config.waa_examples_path:
421+
return
422+
423+
import os
424+
from pathlib import Path
425+
426+
# Check env var first
427+
env_path = os.environ.get("WAA_EXAMPLES_PATH")
428+
if env_path and Path(env_path).is_dir():
429+
self.config.waa_examples_path = env_path
430+
logger.info("Auto-detected waa_examples_path from WAA_EXAMPLES_PATH: %s", env_path)
431+
return
432+
433+
# Check common paths relative to CWD
434+
common_paths = [
435+
"evaluation_examples_windows",
436+
"src/win-arena-container/evaluation_examples_windows",
437+
"../WindowsAgentArena/src/win-arena-container/evaluation_examples_windows",
438+
"../waa/src/win-arena-container/evaluation_examples_windows",
439+
"../waa/evaluation_examples_windows",
440+
]
441+
for p in common_paths:
442+
path = Path(p)
443+
if path.is_dir():
444+
self.config.waa_examples_path = str(path)
445+
logger.info("Auto-detected waa_examples_path: %s", path)
446+
return
447+
417448
@property
418449
def name(self) -> str:
419450
"""Benchmark name."""
@@ -954,6 +985,35 @@ def evaluate(self, task: BenchmarkTask) -> BenchmarkResult:
954985
elif resp.status_code == 404 or (
955986
resp.status_code == 500 and "404 Not Found" in resp.text
956987
):
988+
# Auto-detect: try port 5050 (evaluate_server.py) if not already tried
989+
if self.config.evaluate_url is None:
990+
from urllib.parse import urlparse
991+
parsed = urlparse(self.config.server_url)
992+
fallback_url = f"{parsed.scheme}://{parsed.hostname}:5050"
993+
logger.info(
994+
"/evaluate not found at %s, trying fallback: %s",
995+
evaluate_endpoint, fallback_url,
996+
)
997+
try:
998+
resp2 = requests.post(
999+
f"{fallback_url}/evaluate",
1000+
json=eval_request,
1001+
timeout=self.config.timeout,
1002+
)
1003+
if resp2.status_code == 200:
1004+
# Cache the working URL for future calls
1005+
self.config.evaluate_url = fallback_url
1006+
result = resp2.json()
1007+
return BenchmarkResult(
1008+
task_id=task.task_id,
1009+
success=result.get("success", False),
1010+
score=result.get("score", 0.0),
1011+
num_steps=self._step_count,
1012+
reason=result.get("reason"),
1013+
)
1014+
except Exception as exc:
1015+
logger.warning("Fallback evaluate at %s failed: %s", fallback_url, exc)
1016+
9571017
logger.warning(
9581018
f"/evaluate endpoint not found at {evaluate_endpoint}. "
9591019
"Ensure the evaluate server is running on port 5050."

0 commit comments

Comments
 (0)