Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 18 additions & 6 deletions openadapt_ml/benchmarks/azure_ops_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,9 @@ def start_operation(
log_tail=[], # Clear stale logs
started_at=self._start_time.isoformat(),
elapsed_seconds=0.0,
eta_seconds=TYPICAL_DURATIONS.get(operation), # Use typical duration as initial ETA
eta_seconds=TYPICAL_DURATIONS.get(
operation
), # Use typical duration as initial ETA
cost_usd=0.0,
hourly_rate_usd=self.hourly_rate,
vm_ip=vm_ip,
Expand Down Expand Up @@ -284,7 +286,9 @@ def parse_docker_build_line(self, line: str) -> dict[str, Any]:
downloaded_unit = download_match.group(2)
total = float(download_match.group(3))
total_unit = download_match.group(4)
result["download_bytes"] = int(downloaded * size_multipliers[downloaded_unit])
result["download_bytes"] = int(
downloaded * size_multipliers[downloaded_unit]
)
result["download_total_bytes"] = int(total * size_multipliers[total_unit])

# Extract phase from buildx output
Expand Down Expand Up @@ -384,7 +388,9 @@ def _update_progress(self) -> None:

# ETA from download speed
if self._status.download_bytes > 0 and self._status.elapsed_seconds > 1:
bytes_per_sec = self._status.download_bytes / self._status.elapsed_seconds
bytes_per_sec = (
self._status.download_bytes / self._status.elapsed_seconds
)
remaining_bytes = (
self._status.download_total_bytes - self._status.download_bytes
)
Expand All @@ -402,9 +408,15 @@ def _update_progress(self) -> None:
remaining_steps = self._status.total_steps - self._status.step
step_eta = time_per_step * remaining_steps
# Use step ETA if we don't have download ETA or if step progress > download
if eta_seconds is None or step_pct > (
self._status.download_bytes / max(self._status.download_total_bytes, 1)
) * 100:
if (
eta_seconds is None
or step_pct
> (
self._status.download_bytes
/ max(self._status.download_total_bytes, 1)
)
* 100
):
eta_seconds = step_eta

# 3. Fallback: Use typical duration if no progress info
Expand Down
17 changes: 8 additions & 9 deletions openadapt_ml/benchmarks/viewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,18 +38,10 @@

from __future__ import annotations

import warnings

warnings.warn(
"openadapt_ml.benchmarks.viewer is deprecated. "
"Use openadapt_viewer instead: from openadapt_viewer import generate_benchmark_viewer",
DeprecationWarning,
stacklevel=2,
)

import base64
import json
import logging
import warnings
from pathlib import Path
from typing import Any

Expand All @@ -58,6 +50,13 @@
generate_shared_header_html as _generate_shared_header_html,
)

warnings.warn(
"openadapt_ml.benchmarks.viewer is deprecated. "
"Use openadapt_viewer instead: from openadapt_viewer import generate_benchmark_viewer",
DeprecationWarning,
stacklevel=2,
)

logger = logging.getLogger(__name__)


Expand Down
4 changes: 3 additions & 1 deletion openadapt_ml/benchmarks/vm_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1014,7 +1014,9 @@ def detect_vm_activity(
return VMActivity(
is_active=is_running,
activity_type="benchmark_running" if is_running else "idle",
description="WAA benchmark running" if is_running else "WAA ready - idle",
description="WAA benchmark running"
if is_running
else "WAA ready - idle",
benchmark_progress=probe_data,
)
except json.JSONDecodeError:
Expand Down
41 changes: 21 additions & 20 deletions openadapt_ml/benchmarks/waa_deploy/api_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
import os
import re
from io import BytesIO
from typing import Any, Dict, List
from typing import Dict, List

from PIL import Image

Expand Down Expand Up @@ -210,6 +210,7 @@ def __init__(
)
try:
from anthropic import Anthropic

self._client = Anthropic(api_key=self.api_key)
except ImportError:
raise RuntimeError(
Expand All @@ -225,6 +226,7 @@ def __init__(
)
try:
from openai import OpenAI

self._client = OpenAI(api_key=self.api_key)
except ImportError:
raise RuntimeError(
Expand All @@ -240,9 +242,13 @@ def __init__(
self.memory_block_text = "# empty memory block"
self.step_counter = 0

logger.info(f"ApiAgent initialized with provider={provider}, model={self.model}")
logger.info(
f"ApiAgent initialized with provider={provider}, model={self.model}"
)
if self.demo:
logger.info(f"Demo trajectory provided ({len(self.demo)} chars) - will persist across all steps")
logger.info(
f"Demo trajectory provided ({len(self.demo)} chars) - will persist across all steps"
)

def predict(self, instruction: str, obs: Dict) -> tuple:
"""Predict the next action based on observation.
Expand Down Expand Up @@ -325,10 +331,9 @@ def predict(self, instruction: str, obs: Dict) -> tuple:
# Add action history if enabled (enhanced: includes reasoning, not just raw actions)
if self.use_history and self.history:
# Use rich history with reasoning (like PC Agent-E)
history_entries = self.history[-self.history_cutoff:]
history_entries = self.history[-self.history_cutoff :]
history_str = "\n\n".join(
f"[Step {i+1}] {entry}"
for i, entry in enumerate(history_entries)
f"[Step {i + 1}] {entry}" for i, entry in enumerate(history_entries)
)
content_parts.append(f"History of previous steps:\n{history_str}")
logs["history_entries"] = len(history_entries)
Expand Down Expand Up @@ -381,14 +386,18 @@ def predict(self, instruction: str, obs: Dict) -> tuple:
actions = [code_text]
self.prev_actions.append(code_text)
# Store rich history with reasoning (memory + action)
self._add_to_history(f"Thought: {self.memory_block_text}\nAction: {code_text}")
self._add_to_history(
f"Thought: {self.memory_block_text}\nAction: {code_text}"
)
else:
# Try to extract action from response text
action = self._parse_action_from_text(response_text, w, h)
if action:
actions = [action]
self.prev_actions.append(action)
self._add_to_history(f"Thought: {self.memory_block_text}\nAction: {action}")
self._add_to_history(
f"Thought: {self.memory_block_text}\nAction: {action}"
)
else:
logger.warning("Could not extract action from response")
actions = ["# Could not parse action"]
Expand Down Expand Up @@ -483,33 +492,25 @@ def _parse_action_from_text(self, text: str, width: int, height: int) -> str | N
Python code string or None if parsing failed.
"""
# Try to find click coordinates
click_match = re.search(
r"click.*?(\d+)\s*,\s*(\d+)", text, re.IGNORECASE
)
click_match = re.search(r"click.*?(\d+)\s*,\s*(\d+)", text, re.IGNORECASE)
if click_match:
x, y = int(click_match.group(1)), int(click_match.group(2))
return f"computer.click({x}, {y})"

# Try to find type text
type_match = re.search(
r'type[:\s]+["\'](.+?)["\']', text, re.IGNORECASE
)
type_match = re.search(r'type[:\s]+["\'](.+?)["\']', text, re.IGNORECASE)
if type_match:
text_to_type = type_match.group(1)
return f'computer.type("{text_to_type}")'

# Try to find key press
key_match = re.search(
r"press[:\s]+(\w+)", text, re.IGNORECASE
)
key_match = re.search(r"press[:\s]+(\w+)", text, re.IGNORECASE)
if key_match:
key = key_match.group(1).lower()
return f'computer.press("{key}")'

# Try to find hotkey
hotkey_match = re.search(
r"hotkey[:\s]+(\w+)\s*\+\s*(\w+)", text, re.IGNORECASE
)
hotkey_match = re.search(r"hotkey[:\s]+(\w+)\s*\+\s*(\w+)", text, re.IGNORECASE)
if hotkey_match:
key1, key2 = hotkey_match.group(1).lower(), hotkey_match.group(2).lower()
return f'computer.hotkey("{key1}", "{key2}")'
Expand Down
24 changes: 18 additions & 6 deletions openadapt_ml/cloud/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -1078,11 +1078,17 @@ def do_GET(self):
status["session_id"] = session.get("session_id")
status["session_is_active"] = session.get("is_active", False)
# Include accumulated time from previous sessions for hybrid display
status["accumulated_seconds"] = session.get("accumulated_seconds", 0.0)
status["accumulated_seconds"] = session.get(
"accumulated_seconds", 0.0
)
# Calculate current session time (total - accumulated)
current_session_seconds = max(0, status["elapsed_seconds"] - status["accumulated_seconds"])
current_session_seconds = max(
0, status["elapsed_seconds"] - status["accumulated_seconds"]
)
status["current_session_seconds"] = current_session_seconds
status["current_session_cost_usd"] = (current_session_seconds / 3600) * session.get("hourly_rate_usd", 0.422)
status["current_session_cost_usd"] = (
current_session_seconds / 3600
) * session.get("hourly_rate_usd", 0.422)

try:
tunnel_mgr = get_tunnel_manager()
Expand Down Expand Up @@ -3212,12 +3218,18 @@ def compute_server_side_values(status: dict) -> dict:
status["session_id"] = session.get("session_id")
status["session_is_active"] = session.get("is_active", False)
# Include accumulated time from previous sessions for hybrid display
status["accumulated_seconds"] = session.get("accumulated_seconds", 0.0)
status["accumulated_seconds"] = session.get(
"accumulated_seconds", 0.0
)
# Calculate current session time (total - accumulated)
current_session_seconds = max(0, status["elapsed_seconds"] - status["accumulated_seconds"])
current_session_seconds = max(
0, status["elapsed_seconds"] - status["accumulated_seconds"]
)
status["current_session_seconds"] = current_session_seconds
hourly_rate = session.get("hourly_rate_usd", 0.422)
status["current_session_cost_usd"] = (current_session_seconds / 3600) * hourly_rate
status["current_session_cost_usd"] = (
current_session_seconds / 3600
) * hourly_rate

try:
tunnel_mgr = get_tunnel_manager()
Expand Down
9 changes: 4 additions & 5 deletions openadapt_ml/scripts/capture_screenshots.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,7 @@
from __future__ import annotations

import argparse
import base64
import datetime
import os
import re
import subprocess
import sys
Expand Down Expand Up @@ -108,7 +106,6 @@ def capture_web_page_selenium(url: str, output_path: Path) -> bool:
try:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service

options = Options()
options.add_argument("--headless")
Expand Down Expand Up @@ -358,7 +355,9 @@ def capture_vm_screenshot_from_vm(output_path: Path) -> bool:
)

if result.returncode != 0:
print(f" VM screenshot failed: {result.stderr[:200] if result.stderr else 'Unknown error'}")
print(
f" VM screenshot failed: {result.stderr[:200] if result.stderr else 'Unknown error'}"
)
return False

# The CLI saves to training_output/current/vm_screenshot.png
Expand Down Expand Up @@ -499,7 +498,7 @@ def main():
print(f" OK: {output_path.name} ({size_kb:.1f} KB)")
results[target] = str(output_path)
else:
print(f" SKIP: Not available or capture failed")
print(" SKIP: Not available or capture failed")
results[target] = None
except Exception as e:
print(f" ERROR: {e}")
Expand Down
15 changes: 7 additions & 8 deletions openadapt_ml/training/viewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,14 @@

from __future__ import annotations

import json
import warnings
from pathlib import Path

from openadapt_ml.training.shared_ui import (
get_shared_header_css as _get_shared_header_css,
generate_shared_header_html as _generate_shared_header_html,
)

warnings.warn(
"openadapt_ml.training.viewer is deprecated. "
Expand All @@ -22,14 +29,6 @@
stacklevel=2,
)

import json
from pathlib import Path

from openadapt_ml.training.shared_ui import (
get_shared_header_css as _get_shared_header_css,
generate_shared_header_html as _generate_shared_header_html,
)


def _copy_transcript_and_audio(capture_path: Path | None, output_dir: Path) -> None:
"""Copy transcript.json and convert audio to mp3 for viewer playback.
Expand Down