@@ -370,14 +370,14 @@ class WAALiveConfig:
370370 """
371371
372372 server_url : str = "http://localhost:5000"
373- evaluate_url : str | None = None
373+ evaluate_url : str | None = None # Auto-detects port 5050 if /evaluate 404s on server_url
374374 a11y_backend : str = "uia"
375- screen_width : int = 1920
376- screen_height : int = 1200
375+ screen_width : int = 1280 # Default matches typical WAA QEMU resolution
376+ screen_height : int = 720
377377 max_steps : int = 15
378378 action_delay : float = 0.5
379379 timeout : float = 90.0
380- waa_examples_path : str | None = None
380+ waa_examples_path : str | None = None # Auto-detected from common paths + WAA_EXAMPLES_PATH env
381381 clean_desktop : bool = False
382382 force_tray_icons : bool = False
383383 reapply_clean_desktop_each_reset : bool = False
@@ -402,6 +402,7 @@ class WAALiveAdapter(BenchmarkAdapter):
402402
403403 def __init__ (self , config : WAALiveConfig | None = None ):
404404 self .config = config or WAALiveConfig ()
405+ self ._auto_detect_waa_examples_path ()
405406 self ._current_task : BenchmarkTask | None = None
406407 self ._step_count = 0
407408 self ._current_a11y : dict | None = None
@@ -414,6 +415,36 @@ def __init__(self, config: WAALiveConfig | None = None):
414415 self ._last_setup_results : list [dict [str , Any ]] = []
415416 self ._last_foreground_title : str | None = None
416417
418+ def _auto_detect_waa_examples_path (self ) -> None :
419+ """Auto-detect waa_examples_path from env var or common locations."""
420+ if self .config .waa_examples_path :
421+ return
422+
423+ import os
424+ from pathlib import Path
425+
426+ # Check env var first
427+ env_path = os .environ .get ("WAA_EXAMPLES_PATH" )
428+ if env_path and Path (env_path ).is_dir ():
429+ self .config .waa_examples_path = env_path
430+ logger .info ("Auto-detected waa_examples_path from WAA_EXAMPLES_PATH: %s" , env_path )
431+ return
432+
433+ # Check common paths relative to CWD
434+ common_paths = [
435+ "evaluation_examples_windows" ,
436+ "src/win-arena-container/evaluation_examples_windows" ,
437+ "../WindowsAgentArena/src/win-arena-container/evaluation_examples_windows" ,
438+ "../waa/src/win-arena-container/evaluation_examples_windows" ,
439+ "../waa/evaluation_examples_windows" ,
440+ ]
441+ for p in common_paths :
442+ path = Path (p )
443+ if path .is_dir ():
444+ self .config .waa_examples_path = str (path )
445+ logger .info ("Auto-detected waa_examples_path: %s" , path )
446+ return
447+
417448 @property
418449 def name (self ) -> str :
419450 """Benchmark name."""
@@ -954,6 +985,35 @@ def evaluate(self, task: BenchmarkTask) -> BenchmarkResult:
954985 elif resp .status_code == 404 or (
955986 resp .status_code == 500 and "404 Not Found" in resp .text
956987 ):
988+ # Auto-detect: try port 5050 (evaluate_server.py) if not already tried
989+ if self .config .evaluate_url is None :
990+ from urllib .parse import urlparse
991+ parsed = urlparse (self .config .server_url )
992+ fallback_url = f"{ parsed .scheme } ://{ parsed .hostname } :5050"
993+ logger .info (
994+ "/evaluate not found at %s, trying fallback: %s" ,
995+ evaluate_endpoint , fallback_url ,
996+ )
997+ try :
998+ resp2 = requests .post (
999+ f"{ fallback_url } /evaluate" ,
1000+ json = eval_request ,
1001+ timeout = self .config .timeout ,
1002+ )
1003+ if resp2 .status_code == 200 :
1004+ # Cache the working URL for future calls
1005+ self .config .evaluate_url = fallback_url
1006+ result = resp2 .json ()
1007+ return BenchmarkResult (
1008+ task_id = task .task_id ,
1009+ success = result .get ("success" , False ),
1010+ score = result .get ("score" , 0.0 ),
1011+ num_steps = self ._step_count ,
1012+ reason = result .get ("reason" ),
1013+ )
1014+ except Exception as exc :
1015+ logger .warning ("Fallback evaluate at %s failed: %s" , fallback_url , exc )
1016+
9571017 logger .warning (
9581018 f"/evaluate endpoint not found at { evaluate_endpoint } . "
9591019 "Ensure the evaluate server is running on port 5050."
0 commit comments