Skip to content

Commit 426377f

Browse files
abrichrclaude
andcommitted
docs: switch benchmark viewer animation to WebP with compact layout
Replace lossy GIF (256 colors, 749KB) with high-quality animated WebP (quality 90, 588KB) for the README benchmark viewer animation. Changes: - Add compact mode to viewer HTML (compact=True hides nav header, summary panel, filter bar, and log panel via CSS) so screenshots are fully visible in animation frames - Add scripts/generate_viewer_animation.py using Playwright for frame capture and Pillow for animated WebP assembly - Update README to reference .webp instead of .gif - Remove old benchmark-viewer.gif Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 4160a09 commit 426377f

5 files changed

Lines changed: 292 additions & 2 deletions

File tree

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ OpenAdapt Evals is a unified framework for evaluating GUI automation agents agai
1414

1515
## Benchmark Viewer
1616

17-
![Benchmark Viewer Animation](https://raw.githubusercontent.com/OpenAdaptAI/openadapt-evals/main/animations/benchmark-viewer.gif)
17+
![Benchmark Viewer Animation](https://raw.githubusercontent.com/OpenAdaptAI/openadapt-evals/main/animations/benchmark-viewer.webp)
1818

1919
<details>
2020
<summary>More screenshots</summary>

animations/benchmark-viewer.gif

-731 KB
Binary file not shown.

animations/benchmark-viewer.webp

588 KB
Loading

openadapt_evals/benchmarks/viewer.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,7 @@ def generate_benchmark_viewer(
261261
benchmark_dir: Path,
262262
output_path: Path | None = None,
263263
embed_screenshots: bool = False,
264+
compact: bool = False,
264265
) -> Path:
265266
"""Generate HTML viewer for benchmark results.
266267
@@ -270,6 +271,9 @@ def generate_benchmark_viewer(
270271
output_path: Path for output HTML file. Defaults to benchmark_dir/benchmark.html.
271272
embed_screenshots: If True, embed screenshots as base64 data URLs.
272273
This creates a larger but fully standalone HTML file.
274+
compact: If True, hide the navigation header, summary panel, and filter
275+
bar to maximize space for the task list and screenshots. Useful when
276+
generating screenshots for animations.
273277
274278
Returns:
275279
Path to generated HTML file.
@@ -294,6 +298,7 @@ def generate_benchmark_viewer(
294298
domain_stats=domain_stats,
295299
benchmark_dir=benchmark_dir,
296300
embed_screenshots=embed_screenshots,
301+
compact=compact,
297302
)
298303

299304
# Write output
@@ -312,6 +317,7 @@ def _generate_benchmark_viewer_html(
312317
domain_stats: dict[str, dict[str, int]],
313318
benchmark_dir: Path,
314319
embed_screenshots: bool = False,
320+
compact: bool = False,
315321
) -> str:
316322
"""Generate the HTML content for benchmark viewer.
317323
@@ -322,6 +328,7 @@ def _generate_benchmark_viewer_html(
322328
domain_stats: Per-domain statistics.
323329
benchmark_dir: Base directory for resolving relative paths.
324330
embed_screenshots: If True, embed screenshots as base64.
331+
compact: If True, hide header, summary, and filter bar.
325332
326333
Returns:
327334
HTML string.
@@ -367,6 +374,8 @@ def _generate_benchmark_viewer_html(
367374
num_success = sum(1 for t in tasks if t.get("execution", {}).get("success", False))
368375
success_rate = (num_success / num_tasks * 100) if num_tasks > 0 else 0
369376

377+
body_class = ' class="compact"' if compact else ''
378+
370379
html = f'''<!DOCTYPE html>
371380
<html lang="en">
372381
<head>
@@ -1116,9 +1125,23 @@ def _generate_benchmark_viewer_html(
11161125
11171126
/* Keyboard Shortcuts */
11181127
{keyboard_shortcuts_css}
1128+
1129+
/* Compact mode: hide chrome to maximize screenshot area */
1130+
body.compact .unified-header {{ display: none; }}
1131+
body.compact .summary-panel {{ display: none; }}
1132+
body.compact .filter-bar {{ display: none; }}
1133+
body.compact .keyboard-hint {{ display: none; }}
1134+
body.compact .container {{ padding: 8px; }}
1135+
body.compact .main-content {{ gap: 8px; }}
1136+
body.compact .task-list {{ max-height: calc(100vh - 40px); }}
1137+
body.compact .task-detail-header {{ padding: 8px 12px; }}
1138+
body.compact .task-detail-header h2 {{ font-size: 0.85rem; margin-bottom: 4px; }}
1139+
body.compact .task-detail-instruction {{ padding: 6px; margin-top: 4px; font-size: 0.8rem; }}
1140+
body.compact .step-viewer {{ padding: 8px; gap: 8px; }}
1141+
body.compact .log-panel {{ display: none; }}
11191142
</style>
11201143
</head>
1121-
<body>
1144+
<body{body_class}>
11221145
{shared_header_html}
11231146
11241147
<div class="container">
Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
"""Generate an animated WebP of the benchmark viewer for README.
2+
3+
This script:
4+
1. Generates a compact benchmark viewer HTML (with embedded screenshots).
5+
2. Uses Playwright to capture frames showing the overview, task selection,
6+
and step-by-step screenshot replay.
7+
3. Assembles frames into a lossless animated WebP.
8+
9+
Usage:
10+
uv run python scripts/generate_viewer_animation.py \
11+
--benchmark-dir benchmark_results/phase0_multi_domain_v3 \
12+
--output animations/benchmark-viewer.webp
13+
14+
Requirements:
15+
pip install playwright pillow
16+
python -m playwright install chromium
17+
"""
18+
19+
from __future__ import annotations
20+
21+
import argparse
22+
import logging
23+
import sys
24+
import tempfile
25+
import time
26+
from pathlib import Path
27+
28+
from PIL import Image
29+
from playwright.sync_api import sync_playwright
30+
31+
# Add project root to path for imports
32+
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
33+
34+
from openadapt_evals.benchmarks.viewer import generate_benchmark_viewer
35+
36+
logger = logging.getLogger(__name__)
37+
38+
# Animation settings
39+
VIEWPORT_WIDTH = 1280
40+
VIEWPORT_HEIGHT = 800
41+
FRAME_DURATION_MS = 2500 # 2.5s per frame
42+
STEP_DURATION_MS = 1500 # 1.5s per step screenshot
43+
TRANSITION_PAUSE_MS = 800 # brief pause for transitions
44+
45+
46+
def capture_frames(
47+
html_path: Path,
48+
benchmark_dir: Path,
49+
output_dir: Path,
50+
viewport_width: int = VIEWPORT_WIDTH,
51+
viewport_height: int = VIEWPORT_HEIGHT,
52+
) -> list[Path]:
53+
"""Capture animation frames from the viewer using Playwright.
54+
55+
Args:
56+
html_path: Path to the generated HTML viewer.
57+
benchmark_dir: Path to benchmark results for context.
58+
output_dir: Directory to save frame PNGs.
59+
viewport_width: Browser viewport width.
60+
viewport_height: Browser viewport height.
61+
62+
Returns:
63+
List of paths to captured frame PNGs.
64+
"""
65+
frames: list[Path] = []
66+
frame_idx = 0
67+
68+
def save_frame(page, label: str = "") -> Path:
69+
nonlocal frame_idx
70+
path = output_dir / f"frame_{frame_idx:03d}.png"
71+
page.screenshot(path=str(path))
72+
logger.info(f"Frame {frame_idx}: {label}")
73+
frame_idx += 1
74+
frames.append(path)
75+
return path
76+
77+
with sync_playwright() as p:
78+
browser = p.chromium.launch(headless=True)
79+
page = browser.new_page(
80+
viewport={"width": viewport_width, "height": viewport_height},
81+
)
82+
83+
# Load the viewer
84+
page.goto(f"file://{html_path.resolve()}")
85+
page.wait_for_load_state("networkidle")
86+
time.sleep(0.5)
87+
88+
# Frame 1: Overview (task list visible, no task selected)
89+
save_frame(page, "overview - task list")
90+
91+
# Get number of tasks from the page
92+
task_count = page.locator(".task-item").count()
93+
logger.info(f"Found {task_count} tasks in viewer")
94+
95+
# For each task, select it, show overview, then cycle through
96+
# a few steps
97+
tasks_to_show = min(task_count, 5)
98+
99+
for task_idx in range(tasks_to_show):
100+
# Click on the task
101+
task_item = page.locator(".task-item").nth(task_idx)
102+
task_item.click()
103+
page.wait_for_timeout(600)
104+
105+
# Frame: task selected, showing detail header + screenshot
106+
save_frame(page, f"task {task_idx} - detail view")
107+
108+
# Cycle through a few steps for this task
109+
step_count_text = page.locator("#step-progress").text_content() or "0 / 0"
110+
parts = step_count_text.split("/")
111+
total_steps = int(parts[1].strip()) if len(parts) == 2 else 0
112+
113+
# Show up to 3 steps per task (first, middle, last)
114+
if total_steps > 1:
115+
steps_to_show = [0]
116+
if total_steps > 2:
117+
steps_to_show.append(total_steps // 2)
118+
steps_to_show.append(total_steps - 1)
119+
# Remove duplicates while preserving order
120+
seen = set()
121+
steps_to_show = [s for s in steps_to_show if not (s in seen or seen.add(s))]
122+
123+
for step in steps_to_show:
124+
if step == 0:
125+
continue # Already showing step 0
126+
# Click "Next" button to advance to the step
127+
next_btn = page.locator("button:has-text('Next')")
128+
clicks_needed = step - (steps_to_show[steps_to_show.index(step) - 1] if steps_to_show.index(step) > 0 else 0)
129+
for _ in range(clicks_needed):
130+
next_btn.click()
131+
page.wait_for_timeout(300)
132+
133+
save_frame(page, f"task {task_idx} - step {step}/{total_steps}")
134+
135+
browser.close()
136+
137+
return frames
138+
139+
140+
def assemble_animation(
141+
frame_paths: list[Path],
142+
output_path: Path,
143+
frame_duration_ms: int = FRAME_DURATION_MS,
144+
) -> None:
145+
"""Assemble frames into an animated WebP.
146+
147+
Args:
148+
frame_paths: List of paths to frame PNG files.
149+
output_path: Output path for the animated WebP.
150+
frame_duration_ms: Duration per frame in milliseconds.
151+
"""
152+
if not frame_paths:
153+
raise ValueError("No frames to assemble")
154+
155+
images = [Image.open(p) for p in frame_paths]
156+
157+
# Save as animated WebP.
158+
# Use high quality lossy encoding (quality=90) for a good balance between
159+
# file size and visual fidelity. Lossless WebP at 1280x800 is ~2MB which
160+
# is heavy for a README. At quality=90, typical output is ~700-900KB with
161+
# near-lossless appearance (far better than GIF's 256-color palette).
162+
output_path.parent.mkdir(parents=True, exist_ok=True)
163+
images[0].save(
164+
str(output_path),
165+
format="WEBP",
166+
save_all=True,
167+
append_images=images[1:],
168+
duration=frame_duration_ms,
169+
loop=0,
170+
quality=90,
171+
)
172+
173+
size_kb = output_path.stat().st_size / 1024
174+
logger.info(
175+
f"Generated animation: {output_path} "
176+
f"({len(images)} frames, {size_kb:.0f} KB)"
177+
)
178+
179+
180+
def main() -> None:
181+
"""Generate benchmark viewer animation."""
182+
parser = argparse.ArgumentParser(
183+
description="Generate an animated WebP of the benchmark viewer"
184+
)
185+
parser.add_argument(
186+
"--benchmark-dir",
187+
type=Path,
188+
default=Path("benchmark_results/phase0_multi_domain_v3"),
189+
help="Path to benchmark results directory",
190+
)
191+
parser.add_argument(
192+
"--output",
193+
type=Path,
194+
default=Path("animations/benchmark-viewer.webp"),
195+
help="Output path for animated WebP",
196+
)
197+
parser.add_argument(
198+
"--frame-duration",
199+
type=int,
200+
default=FRAME_DURATION_MS,
201+
help="Duration per frame in ms (default: 2500)",
202+
)
203+
parser.add_argument(
204+
"--width",
205+
type=int,
206+
default=VIEWPORT_WIDTH,
207+
help="Viewport width (default: 1280)",
208+
)
209+
parser.add_argument(
210+
"--height",
211+
type=int,
212+
default=VIEWPORT_HEIGHT,
213+
help="Viewport height (default: 800)",
214+
)
215+
parser.add_argument(
216+
"-v", "--verbose",
217+
action="store_true",
218+
help="Enable verbose logging",
219+
)
220+
args = parser.parse_args()
221+
222+
logging.basicConfig(
223+
level=logging.DEBUG if args.verbose else logging.INFO,
224+
format="%(asctime)s %(levelname)s %(message)s",
225+
)
226+
227+
# Resolve benchmark dir
228+
benchmark_dir = args.benchmark_dir.resolve()
229+
if not benchmark_dir.exists():
230+
logger.error(f"Benchmark directory not found: {benchmark_dir}")
231+
sys.exit(1)
232+
233+
with tempfile.TemporaryDirectory() as tmpdir:
234+
tmpdir_path = Path(tmpdir)
235+
236+
# Step 1: Generate compact viewer with embedded screenshots
237+
logger.info("Generating compact viewer HTML...")
238+
html_path = tmpdir_path / "viewer.html"
239+
generate_benchmark_viewer(
240+
benchmark_dir=benchmark_dir,
241+
output_path=html_path,
242+
embed_screenshots=True,
243+
compact=True,
244+
)
245+
logger.info(f"Viewer HTML: {html_path} ({html_path.stat().st_size / 1024:.0f} KB)")
246+
247+
# Step 2: Capture frames
248+
logger.info("Capturing frames with Playwright...")
249+
frames_dir = tmpdir_path / "frames"
250+
frames_dir.mkdir()
251+
frame_paths = capture_frames(
252+
html_path, benchmark_dir, frames_dir,
253+
viewport_width=args.width,
254+
viewport_height=args.height,
255+
)
256+
logger.info(f"Captured {len(frame_paths)} frames")
257+
258+
# Step 3: Assemble animation
259+
logger.info("Assembling animated WebP...")
260+
output_path = args.output.resolve()
261+
assemble_animation(frame_paths, output_path, args.frame_duration)
262+
263+
logger.info("Done!")
264+
265+
266+
if __name__ == "__main__":
267+
main()

0 commit comments

Comments
 (0)