-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathtest_minimal_viewer.py
More file actions
128 lines (100 loc) · 4.33 KB
/
test_minimal_viewer.py
File metadata and controls
128 lines (100 loc) · 4.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#!/usr/bin/env python3
"""Test script for minimal benchmark viewer.
Tests:
1. Viewer HTML loads correctly
2. Generator works with real data
3. API endpoints would work (structure check)
"""
import json
from pathlib import Path
def test_viewer_html():
"""Test that viewer HTML file exists and is valid."""
viewer_path = Path(__file__).parent / "viewers" / "benchmark" / "minimal_viewer.html"
assert viewer_path.exists(), f"Viewer not found: {viewer_path}"
html = viewer_path.read_text()
# Check for key components
assert "benchmarkViewer()" in html, "Alpine.js component missing"
assert "/api/benchmark/runs" in html, "API endpoint missing"
assert "window.BENCHMARK_DATA" in html, "Data placeholder missing"
assert "alpinejs" in html, "Alpine.js script missing"
# Check line count
line_count = len(html.splitlines())
print(f"✓ Viewer HTML: {line_count} lines")
assert line_count < 650, f"Viewer too large: {line_count} lines (target: <650)"
print("✓ Viewer HTML structure valid")
def test_generator():
"""Test that generator works with real benchmark data."""
from viewers.benchmark.generator import generate_from_benchmark_results
# Find a real benchmark run
results_dir = Path(__file__).parent.parent / "openadapt-ml" / "benchmark_results"
if not results_dir.exists():
print("⚠ No benchmark_results directory, skipping generator test")
return
# Find first run
runs = [d for d in results_dir.iterdir() if d.is_dir() and (d / "summary.json").exists()]
if not runs:
print("⚠ No benchmark runs found, skipping generator test")
return
run_name = runs[0].name
output_path = Path("/tmp/test_minimal_viewer.html")
# Generate viewer
result = generate_from_benchmark_results(results_dir, run_name, output_path)
assert result.exists(), "Generated HTML not found"
# Verify embedded data
html = result.read_text()
assert "window.BENCHMARK_DATA = {" in html, "Data not embedded"
assert run_name in html, "Run name not embedded"
print(f"✓ Generator works with run: {run_name}")
print(f" Output: {output_path}")
def test_data_structure():
"""Test that real benchmark data matches expected structure."""
results_dir = Path(__file__).parent.parent / "openadapt-ml" / "benchmark_results"
if not results_dir.exists():
print("⚠ No benchmark_results directory, skipping data structure test")
return
# Find first run
runs = [d for d in results_dir.iterdir() if d.is_dir() and (d / "summary.json").exists()]
if not runs:
print("⚠ No benchmark runs found, skipping data structure test")
return
run_dir = runs[0]
# Check summary.json
summary_path = run_dir / "summary.json"
summary = json.loads(summary_path.read_text())
required_fields = ["run_name", "num_tasks", "success_rate", "avg_steps", "tasks"]
for field in required_fields:
assert field in summary, f"Missing field in summary: {field}"
# Check task structure
if summary["tasks"]:
task = summary["tasks"][0]
task_required = ["task_id", "success", "num_steps"]
for field in task_required:
assert field in task, f"Missing field in task: {field}"
# Check execution.json
task_id = summary["tasks"][0]["task_id"]
execution_path = run_dir / "tasks" / task_id / "execution.json"
if execution_path.exists():
execution = json.loads(execution_path.read_text())
assert "steps" in execution, "Missing steps in execution"
if execution["steps"]:
step = execution["steps"][0]
assert "action" in step, "Missing action in step"
assert "screenshot_path" in step, "Missing screenshot_path in step"
print(f"✓ Data structure valid for run: {run_dir.name}")
print(f" Tasks: {summary['num_tasks']}")
print(f" Success rate: {summary['success_rate']:.1%}")
if __name__ == "__main__":
print("\n=== Testing Minimal Benchmark Viewer ===\n")
try:
test_viewer_html()
test_generator()
test_data_structure()
print("\n✅ All tests passed!")
except AssertionError as e:
print(f"\n❌ Test failed: {e}")
exit(1)
except Exception as e:
print(f"\n❌ Error: {e}")
import traceback
traceback.print_exc()
exit(1)