Skip to content

Commit 3e56272

Browse files
abrichrclaude
andauthored
feat: update default planner model to gpt-5.4 (#170)
* fix: skip close_all and notification cleanup by default The close_all PowerShell command (Get-Process | CloseMainWindow) and notification cleanup (taskkill OneDrive) crash the WAA Flask server, making it unresponsive for the rest of the run. This happened on every test run. Now these only run when clean_desktop=True is explicitly set. Default behavior skips them entirely — the task runs against whatever state the desktop is in, which is more reliable than crashing the server trying to clean up. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * feat: update default planner model to gpt-5.4 Update the OpenAI default model in ApiAgent from gpt-5.1 to gpt-5.4 for higher-quality planning and reasoning in both the main agent and the WAA deploy copy. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent a851451 commit 3e56272

3 files changed

Lines changed: 25 additions & 29 deletions

File tree

openadapt_evals/adapters/waa/live.py

Lines changed: 23 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -664,34 +664,30 @@ def reset(self, task: BenchmarkTask) -> BenchmarkObservation:
664664

665665
import requests
666666

667-
# Try to close all windows for clean state via /execute_windows
668-
try:
669-
close_cmd = (
670-
"import subprocess; "
671-
"subprocess.run("
672-
"['powershell', '-Command', "
673-
"'Get-Process | Where-Object {$_.MainWindowTitle -ne \\\"\\\"} | "
674-
"ForEach-Object { $_.CloseMainWindow() }'], "
675-
"timeout=15)"
676-
)
677-
resp = requests.post(
678-
f"{self.config.server_url}/execute_windows",
679-
json={"command": close_cmd},
680-
timeout=10.0,
681-
)
682-
if resp.status_code == 200:
683-
logger.info("Closed all windows for clean state")
684-
else:
685-
logger.warning("close_all failed: HTTP %s", resp.status_code)
686-
except Exception as e:
687-
logger.warning(f"Failed to close windows (non-fatal): {e}")
688-
689-
# Optionally apply deterministic desktop policy before task setup.
690-
if self.config.clean_desktop or self.config.force_tray_icons:
667+
# Close windows and dismiss notifications ONLY if explicitly requested.
668+
# The close_all and notification cleanup commands can crash the WAA Flask
669+
# server (PowerShell Get-Process hangs, taskkill blocks), leaving the
670+
# server unresponsive for the rest of the run.
671+
if self.config.clean_desktop:
672+
try:
673+
close_cmd = (
674+
"import subprocess; "
675+
"subprocess.run("
676+
"['powershell', '-Command', "
677+
"'Get-Process | Where-Object {$_.MainWindowTitle -ne \\\"\\\"} | "
678+
"ForEach-Object { $_.CloseMainWindow() }'], "
679+
"timeout=10)"
680+
)
681+
resp = requests.post(
682+
f"{self.config.server_url}/execute_windows",
683+
json={"command": close_cmd},
684+
timeout=10.0,
685+
)
686+
if resp.status_code == 200:
687+
logger.info("Closed all windows for clean state")
688+
except Exception as e:
689+
logger.warning("close_all failed (non-fatal): %s", e)
691690
self._apply_clean_desktop_policy(requests)
692-
else:
693-
# Best-effort cleanup even when full clean policy is disabled.
694-
self._dismiss_notifications(requests)
695691

696692
# LibreOffice can surface a modal "Document Recovery" dialog after
697693
# dirty shutdowns. Pre-clean recovery state before setup to avoid

openadapt_evals/agents/api_agent.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ class ApiAgent(BenchmarkAgent):
293293
# Default models for each provider
294294
DEFAULT_MODELS = {
295295
"anthropic": "claude-sonnet-4-5-20250929",
296-
"openai": "gpt-5.1",
296+
"openai": "gpt-5.4",
297297
}
298298

299299
def __init__(

openadapt_evals/waa_deploy/api_agent.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ class ApiAgent:
177177
# Default models for each provider
178178
DEFAULT_MODELS = {
179179
"anthropic": "claude-sonnet-4-5-20250929",
180-
"openai": "gpt-5.1",
180+
"openai": "gpt-5.4",
181181
}
182182

183183
def __init__(

0 commit comments

Comments
 (0)