Skip to content

Commit cc862a1

Browse files
committed
feat(engine): complete stage 1 completion and stage 2 self-healing engine runs
1 parent 70dbc56 commit cc862a1

17 files changed

Lines changed: 1285 additions & 34 deletions
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
import asyncio
2+
import json
3+
import typer
4+
from pathlib import Path
5+
from scrapewizard.engine.recorder import InteractiveRecorder
6+
from scrapewizard.engine.sandbox import SandboxRunner
7+
8+
app = typer.Typer(help="Self-healing test automation engine commands")
9+
10+
def async_command(f):
11+
"""Decorator to run async Typer commands synchronously."""
12+
import functools
13+
@functools.wraps(f)
14+
def wrapper(*args, **kwargs):
15+
return asyncio.run(f(*args, **kwargs))
16+
return wrapper
17+
18+
@app.command(name="record")
19+
@async_command
20+
async def record(
21+
url: str = typer.Option(..., "--url", "-u", help="Target URL to start recording from"),
22+
output: str = typer.Option("flow.json", "--output", "-o", help="Path to save the generated flow.json"),
23+
screenshots: str = typer.Option("screenshots", "--screenshots", "-s", help="Directory to save crop screenshots")
24+
):
25+
"""
26+
Open a headed browser to record user interactions on a page.
27+
Saves the flow steps and element fingerprints to a flow.json file.
28+
"""
29+
typer.echo(f"Starting interactive recording on {url}...")
30+
recorder = InteractiveRecorder(output_path=output, screenshots_dir=screenshots, headless=False)
31+
await recorder.start(url)
32+
typer.echo(f"Successfully saved flow recording to {output}")
33+
34+
@app.command(name="test")
35+
@async_command
36+
async def test(
37+
flow_path: str = typer.Argument(..., help="Path to the flow.json file to execute"),
38+
artifacts: str = typer.Option(None, "--artifacts", "-a", help="Directory to save run artifacts"),
39+
headless: bool = typer.Option(True, "--headless/--headed", help="Run the browser in headless or headed mode")
40+
):
41+
"""
42+
Run an automated headless sandbox execution of the recorded flow.json.
43+
Validates console/network errors, visual diffs, and accessibility violations.
44+
"""
45+
flow_file = Path(flow_path)
46+
if not flow_file.exists():
47+
typer.echo(f"Error: flow file not found at {flow_path}", err=True)
48+
raise typer.Exit(code=1)
49+
50+
try:
51+
with open(flow_file, "r", encoding="utf-8") as f:
52+
flow_data = json.load(f)
53+
except Exception as e:
54+
typer.echo(f"Error: failed to parse JSON in {flow_path}: {e}", err=True)
55+
raise typer.Exit(code=1)
56+
57+
# Construct test definition from flow data directly
58+
from scrapewizard.engine.test_generator import TestGenerator
59+
generator = TestGenerator(flow_path)
60+
test_def = generator.generate()
61+
62+
typer.echo(f"Running sandbox execution for {flow_path}...")
63+
runner = SandboxRunner(artifacts_dir=artifacts, headless=headless)
64+
result = await runner.run(test_def)
65+
66+
# Print results
67+
typer.echo(f"\nExecution finished in {result.duration_ms} ms. Status: {result.status.upper()}")
68+
typer.echo(f"Artifacts saved to: {result.artifacts_dir}\n")
69+
70+
for idx, step in enumerate(result.step_results):
71+
status_symbol = "✅" if step.status == "passed" else "❌"
72+
typer.echo(f" {status_symbol} Step {idx + 1}: {step.step_name} - {step.status.upper()} ({step.duration_ms} ms)")
73+
if step.error_message:
74+
typer.echo(f" Error: {step.error_message}")
75+
if step.console_errors:
76+
typer.echo(f" Console Errors ({len(step.console_errors)}):")
77+
for err in step.console_errors:
78+
typer.echo(f" - {err}")
79+
if step.network_errors:
80+
typer.echo(f" Network Failures ({len(step.network_errors)}):")
81+
for err in step.network_errors:
82+
typer.echo(f" - {err}")
83+
if step.a11y_violations:
84+
typer.echo(f" A11y Violations ({len(step.a11y_violations)}):")
85+
for violation in step.a11y_violations:
86+
typer.echo(f" - [{violation['impact']}] {violation['id']}: {violation['help']}")
87+
88+
if result.status != "passed":
89+
typer.echo("\n❌ Run FAILED.")
90+
raise typer.Exit(code=1)
91+
else:
92+
typer.echo("\n✅ Run PASSED.")
93+
raise typer.Exit(code=0)

scrapewizard/cli/main.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import typer
2-
from scrapewizard.cli.commands import setup, scrape, utils
2+
from scrapewizard.cli.commands import setup, scrape, utils, engine
33
from scrapewizard.core.logging import Logger
44

55
app = typer.Typer(
@@ -16,6 +16,8 @@
1616
app.command()(utils.clean)
1717
app.command()(utils.doctor)
1818
app.command()(utils.resume)
19+
app.command(name="record")(engine.record)
20+
app.command(name="test")(engine.test)
1921

2022
VERSION = "1.2.0"
2123

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# ScrapeWizard Step Checks Package
2+
from .console_network import ConsoleNetworkTracker
3+
from .visual import perform_visual_check
4+
from .a11y import perform_a11y_check

scrapewizard/engine/checks/a11y.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import os
2+
from pathlib import Path
3+
from typing import List, Dict, Any
4+
from scrapewizard.core.logging import log
5+
6+
# Read the bundled script content once and cache it in memory
7+
AXE_SCRIPT_PATH = Path(__file__).parent / "axe.min.js"
8+
AXE_SCRIPT_CONTENT = None
9+
10+
if AXE_SCRIPT_PATH.exists():
11+
with open(AXE_SCRIPT_PATH, "r", encoding="utf-8") as f:
12+
AXE_SCRIPT_CONTENT = f.read()
13+
14+
async def perform_a11y_check(page) -> List[Dict[str, Any]]:
15+
"""
16+
Inject axe-core into the target Playwright page, run analysis,
17+
and return a structured list of accessibility violations.
18+
"""
19+
global AXE_SCRIPT_CONTENT
20+
21+
if not AXE_SCRIPT_CONTENT:
22+
if AXE_SCRIPT_PATH.exists():
23+
with open(AXE_SCRIPT_PATH, "r", encoding="utf-8") as f:
24+
AXE_SCRIPT_CONTENT = f.read()
25+
else:
26+
log("axe.min.js not found, skipping accessibility check", level="warning")
27+
return []
28+
29+
try:
30+
# Inject the axe-core library
31+
await page.evaluate(AXE_SCRIPT_CONTENT)
32+
33+
# Run accessibility analysis
34+
# axe.run() returns a promise, so we evaluate it asynchronously
35+
results = await page.evaluate("async () => { return await axe.run(); }")
36+
37+
violations = []
38+
for violation in results.get("violations", []):
39+
nodes = []
40+
for node in violation.get("nodes", []):
41+
nodes.append({
42+
"html": node.get("html"),
43+
"target": node.get("target"),
44+
"failure_summary": node.get("failureSummary")
45+
})
46+
47+
violations.append({
48+
"id": violation.get("id"),
49+
"impact": violation.get("impact"),
50+
"description": violation.get("description"),
51+
"help": violation.get("help"),
52+
"help_url": violation.get("helpUrl"),
53+
"nodes": nodes
54+
})
55+
56+
return violations
57+
58+
except Exception as e:
59+
log(f"Accessibility validation failed: {e}", level="warning")
60+
return []

scrapewizard/engine/checks/axe.min.js

Lines changed: 13 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
class ConsoleNetworkTracker:
2+
"""
3+
Captures console errors and network response failures (4xx/5xx status codes)
4+
on a Playwright page during a test execution block.
5+
"""
6+
def __init__(self, page):
7+
self.page = page
8+
self.console_errors = []
9+
self.network_errors = []
10+
self._setup_listeners()
11+
12+
def _setup_listeners(self):
13+
def handle_console(msg):
14+
if msg.type == "error":
15+
location = msg.location or {}
16+
url = location.get("url") or "unknown"
17+
line = location.get("lineNumber") or 0
18+
self.console_errors.append(f"[{url}:{line}] {msg.text}")
19+
20+
def handle_response(response):
21+
if response.status >= 400:
22+
self.network_errors.append(f"[{response.status}] {response.url}")
23+
24+
self.page.on("console", handle_console)
25+
self.page.on("response", handle_response)
26+
27+
def flush(self):
28+
"""Return the collected errors and clear the tracking buffers."""
29+
console = list(self.console_errors)
30+
network = list(self.network_errors)
31+
self.console_errors.clear()
32+
self.network_errors.clear()
33+
return console, network
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import os
2+
from pathlib import Path
3+
from typing import Tuple, Optional
4+
from PIL import Image, ImageChops
5+
6+
def perform_visual_check(
7+
current_screenshot_path: str,
8+
step_name: str,
9+
baseline_dir: str,
10+
diff_dir: str,
11+
threshold: float = 0.05
12+
) -> Tuple[float, Optional[str]]:
13+
"""
14+
Perform a visual check comparing the current step's screenshot to a saved baseline.
15+
If the baseline does not exist, copies current screenshot as baseline and returns (0.0, None).
16+
Otherwise, computes the percentage of differing pixels.
17+
If the difference is above threshold, saves a difference image in diff_dir.
18+
Returns: (diff_score: float, diff_image_path: Optional[str])
19+
"""
20+
baseline_path = Path(baseline_dir) / f"{step_name}.png"
21+
22+
if not Path(current_screenshot_path).exists():
23+
return 0.0, None
24+
25+
if not baseline_path.exists():
26+
# Set the current run screenshot as the baseline
27+
baseline_path.parent.mkdir(parents=True, exist_ok=True)
28+
try:
29+
img = Image.open(current_screenshot_path)
30+
img.save(baseline_path)
31+
except Exception:
32+
pass
33+
return 0.0, None
34+
35+
# Open and compare
36+
try:
37+
img1 = Image.open(baseline_path).convert("RGB")
38+
img2 = Image.open(current_screenshot_path).convert("RGB")
39+
except Exception:
40+
# If image parsing fails, return 0 diff score
41+
return 0.0, None
42+
43+
if img1.size != img2.size:
44+
img2 = img2.resize(img1.size)
45+
46+
diff = ImageChops.difference(img1, img2)
47+
stat = diff.getbbox()
48+
if stat is None:
49+
return 0.0, None
50+
51+
# Calculate score (ratio of non-matching pixels)
52+
gray_diff = diff.convert("L")
53+
hist = gray_diff.histogram()
54+
total_pixels = gray_diff.size[0] * gray_diff.size[1]
55+
matching_pixels = hist[0]
56+
differing_pixels = total_pixels - matching_pixels
57+
diff_score = differing_pixels / total_pixels
58+
59+
diff_image_path = None
60+
if diff_score > threshold:
61+
# Save diff image
62+
diff_path = Path(diff_dir) / f"diff_{step_name}.png"
63+
diff_path.parent.mkdir(parents=True, exist_ok=True)
64+
try:
65+
diff.save(diff_path)
66+
diff_image_path = str(diff_path)
67+
except Exception:
68+
pass
69+
70+
return diff_score, diff_image_path

0 commit comments

Comments
 (0)