Add AI-driven PyOsmo model generator and refinement scripts.

OPpuolitaival · OPpuolitaival · commit a5a62f79ee5f · 2026-02-14T23:37:52.000+02:00
- Introduce `generate_model.py` for automating PyOsmo model creation using Claude and Playwright.
- Add `refine_model.py` to optimize generated models based on test execution history.
- Include prompt templates for Claude agent in `prompt_template.py`.
- Provide example outputs and usage documentation in `README.md` and `example_output/`.
- Extend `history.py` with JSON export utilities to facilitate model refinement.
diff --git a/examples/ai_web_agent/README.md b/examples/ai_web_agent/README.md
@@ -0,0 +1,69 @@
+# AI Web Agent - PyOsmo Model Generator
+
+Generate PyOsmo model-based tests for any web application using Claude + Playwright.
+
+## How it works
+
+1. **`generate_model.py`** — A Claude agent explores a web page via Playwright MCP, discovers interactive elements, and generates a PyOsmo model with `step_*`/`guard_*` methods.
+
+2. **`refine_model.py`** — Runs the generated model, collects test history as JSON, and sends it to Claude for analysis. The agent fixes errors, improves coverage, and adds missing steps.
+
+3. **`prompt_template.py`** — System prompts that teach the agent PyOsmo patterns.
+
+## Setup
+
+```bash
+# Install dependencies
+pip install claude-agent-sdk playwright pyosmo
+
+# Install Playwright browsers
+playwright install chromium
+
+# Set your API key
+export ANTHROPIC_API_KEY=your-key-here
+```
+
+## Usage
+
+### Generate a model
+
+```bash
+python generate_model.py https://todomvc.com/examples/react/dist/ -o todo_model.py
+```
+
+### Refine a model
+
+```bash
+# Single refinement pass
+python refine_model.py todo_model.py --url https://todomvc.com/examples/react/dist/
+
+# Multiple iterations
+python refine_model.py todo_model.py --url https://todomvc.com/examples/react/dist/ --iterations 3
+```
+
+### Run the generated model directly
+
+```bash
+python todo_model.py
+```
+
+## Example output
+
+See `example_output/todo_app_model.py` for a sample of what the agent generates for a TodoMVC application.
+
+## How the refinement loop works
+
+```
+┌─────────────┐     ┌──────────────┐     ┌──────────────┐
+│  Generate   │────>│  Run model   │────>│  Collect     │
+│  model      │     │  with PyOsmo │     │  history JSON│
+└─────────────┘     └──────────────┘     └──────┬───────┘
+                                                │
+                    ┌──────────────┐            │
+                    │  Write back  │<───────────┘
+                    │  refined     │  Claude analyzes
+                    │  model       │  errors & coverage
+                    └──────────────┘
+```
+
+The history JSON includes statistics, step frequencies, transition pairs, and per-test error details — giving the agent everything it needs to diagnose and fix issues.
diff --git a/examples/ai_web_agent/example_output/todo_app_model.py b/examples/ai_web_agent/example_output/todo_app_model.py
@@ -0,0 +1,113 @@
+"""Example PyOsmo model for a Todo application.
+
+This is a sample of what the AI agent generates. It models a typical
+TodoMVC-style application with add, complete, delete, and filter actions.
+"""
+
+from playwright.sync_api import Page
+
+from pyosmo import Osmo
+from pyosmo.end_conditions import Length
+
+
+class TodoAppModel:
+    """Model-based test for a Todo web application."""
+
+    def __init__(self, page: Page, url: str):
+        self.page = page
+        self.url = url
+        self.todo_count = 0
+        self.completed_count = 0
+
+    def before_test(self):
+        """Navigate to app and reset state."""
+        self.page.goto(self.url)
+        self.page.wait_for_selector(".new-todo")
+        self.todo_count = 0
+        self.completed_count = 0
+
+    # --- Add a todo item ---
+
+    def step_add_todo(self):
+        input_field = self.page.locator(".new-todo")
+        input_field.fill(f"Task {self.todo_count + 1}")
+        input_field.press("Enter")
+        self.todo_count += 1
+
+    def guard_add_todo(self):
+        return self.todo_count < 10
+
+    def weight_add_todo(self):
+        return 5  # Adding items is the most common action
+
+    # --- Toggle a todo as complete ---
+
+    def step_toggle_todo(self):
+        items = self.page.locator(".todo-list li:not(.completed) .toggle")
+        items.first.click()
+        self.completed_count += 1
+
+    def guard_toggle_todo(self):
+        active_count = self.todo_count - self.completed_count
+        return active_count > 0
+
+    # --- Delete a todo item ---
+
+    def step_delete_todo(self):
+        item = self.page.locator(".todo-list li").first
+        item.hover()
+        item.locator(".destroy").click()
+        self.todo_count -= 1
+
+    def guard_delete_todo(self):
+        return self.todo_count > 0
+
+    # --- Filter: show all ---
+
+    def step_filter_all(self):
+        self.page.click('a[href="#/"]')
+
+    def guard_filter_all(self):
+        return self.todo_count > 0
+
+    # --- Filter: show active ---
+
+    def step_filter_active(self):
+        self.page.click('a[href="#/active"]')
+
+    def guard_filter_active(self):
+        return self.todo_count > 0
+
+    # --- Filter: show completed ---
+
+    def step_filter_completed(self):
+        self.page.click('a[href="#/completed"]')
+
+    def guard_filter_completed(self):
+        return self.completed_count > 0
+
+    # --- Assertions (run after every step) ---
+
+    def after(self):
+        """Verify todo count display matches model state."""
+        active_count = self.todo_count - self.completed_count
+        if active_count > 0:
+            count_text = self.page.locator(".todo-count").text_content()
+            assert str(active_count) in count_text
+
+
+if __name__ == "__main__":
+    from playwright.sync_api import sync_playwright
+
+    with sync_playwright() as p:
+        browser = p.chromium.launch(headless=False)
+        page = browser.new_page()
+
+        model = TodoAppModel(page=page, url="https://todomvc.com/examples/react/dist/")
+
+        osmo = Osmo(model)
+        osmo.test_end_condition = Length(30)
+        osmo.generate()
+
+        print(osmo.history.to_json())
+        browser.close()
diff --git a/examples/ai_web_agent/generate_model.py b/examples/ai_web_agent/generate_model.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+"""Generate a PyOsmo model for a web application using Claude + Playwright.
+
+Usage:
+    python generate_model.py <URL> [--output model.py]
+
+Requires:
+    pip install claude-agent-sdk
+    npx @anthropic-ai/claude-code mcp add playwright -- npx @playwright/mcp@latest
+"""
+
+import argparse
+import asyncio
+import sys
+from pathlib import Path
+
+from prompt_template import PYOSMO_MODEL_REFERENCE
+
+
+async def generate_model(url: str, output_path: str) -> None:
+    try:
+        from claude_agent_sdk import Agent, AgentConfig, MCPServer
+    except ImportError:
+        print("Error: claude-agent-sdk is required. Install with: pip install claude-agent-sdk")
+        sys.exit(1)
+
+    playwright_mcp = MCPServer(
+        name="playwright",
+        command="npx",
+        args=["@playwright/mcp@latest"],
+    )
+
+    agent = Agent(
+        model="claude-sonnet-4-5-20250929",
+        config=AgentConfig(
+            system_prompt=PYOSMO_MODEL_REFERENCE,
+            mcp_servers=[playwright_mcp],
+        ),
+    )
+
+    user_prompt = f"""\
+Explore the web application at {url} and generate a PyOsmo model for it.
+
+Steps:
+1. Navigate to {url} and observe the page structure
+2. Click around to discover interactive elements, forms, navigation, and states
+3. Generate a PyOsmo model class with step_*/guard_* methods using Playwright selectors
+4. Include state tracking and assertions where appropriate
+5. Output ONLY the complete Python file content, no extra explanation
+
+Save the model to: {output_path}
+"""
+
+    print(f"Exploring {url} and generating model...")
+    result = await agent.run(user_prompt)
+    print(f"Agent completed. Model saved to {output_path}")
+    print(f"Result: {result}")
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Generate a PyOsmo model from a web application")
+    parser.add_argument("url", help="URL of the web application to model")
+    parser.add_argument("--output", "-o", default="generated_model.py", help="Output file path (default: generated_model.py)")
+    args = parser.parse_args()
+
+    asyncio.run(generate_model(args.url, args.output))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/ai_web_agent/prompt_template.py b/examples/ai_web_agent/prompt_template.py
@@ -0,0 +1,125 @@
+"""System prompt template that teaches the Claude agent how to build PyOsmo models."""
+
+PYOSMO_MODEL_REFERENCE = """\
+You are an expert test engineer. Your job is to explore a web application and
+generate a PyOsmo model-based testing model that exercises the application.
+
+## PyOsmo Model Structure
+
+A PyOsmo model is a Python class whose methods describe **steps** (actions),
+**guards** (preconditions), and optional **weights** (probabilities).
+
+### Naming convention (preferred)
+
+```python
+class WebAppModel:
+    def before_test(self):
+        \"\"\"Reset state before each test.\"\"\"
+        self.page.goto(self.url)
+
+    # --- steps ---
+    def step_click_login(self):
+        self.page.click("#login-btn")
+
+    # --- guards: return True when the step is allowed ---
+    def guard_click_login(self):
+        return self.page.is_visible("#login-btn")
+
+    # --- weights (optional, default=1) ---
+    def weight_click_login(self):
+        return 5  # 5× more likely than default
+```
+
+### Lifecycle hooks
+- `before_suite()` – once before all tests
+- `before_test()` – before each test case (reset state here)
+- `before()` / `after()` – before/after every step
+- `after_test()` – after each test case
+- `after_suite()` – once after all tests
+
+### Guards
+Every `step_X` can have a matching `guard_X` that returns True/False.
+If the guard returns False the step is **not available** for selection.
+At least one step must always be available.
+
+### State tracking
+Use `self.*` attributes to track application state (logged_in, item_count, current_page, etc).
+Update state in steps and check it in guards. This is how you model valid sequences.
+
+## Guidelines for exploring and modeling a web page
+
+1. **Navigate** to the target URL. Observe the page structure: links, buttons, forms, navigation.
+2. **Identify actions** a user can take – clicking links, filling forms, toggling elements, navigating.
+3. **Map each action to a `step_*` method** using Playwright selectors.
+4. **Add guards** for actions that are only valid in certain states (e.g., can only logout when logged in).
+5. **Track state** with `self.*` variables – update in steps, check in guards.
+6. **Add `before_test`** to navigate to the starting URL and reset state.
+7. **Keep the model focused** – 5-15 steps is a good range for a first model.
+8. **Use robust selectors** – prefer `data-testid`, `role`, or visible text over brittle CSS paths.
+9. **Add assertions** where possible – e.g., after clicking "Add to cart", assert the cart count increased.
+
+## Output format
+
+Generate a single Python file that:
+- Imports `from playwright.sync_api import Page`
+- Defines a model class with a constructor accepting `page: Page` and `url: str`
+- Has `before_test` reset to the starting URL
+- Has `step_*` / `guard_*` methods for each discovered action
+- Includes a `if __name__ == '__main__'` block that runs the model with PyOsmo
+
+Example `__main__` block:
+
+```python
+if __name__ == "__main__":
+    from playwright.sync_api import sync_playwright
+    from pyosmo import Osmo
+    from pyosmo.end_conditions import Length
+
+    with sync_playwright() as p:
+        browser = p.chromium.launch(headless=False)
+        page = browser.new_page()
+        model = WebAppModel(page=page, url="https://example.com")
+        osmo = Osmo(model)
+        osmo.test_end_condition = Length(20)
+        osmo.generate()
+        # Print results as JSON for analysis
+        print(osmo.history.to_json())
+        browser.close()
+```
+"""
+
+REFINEMENT_PROMPT = """\
+You are an expert test engineer refining a PyOsmo model based on test execution results.
+
+You will receive:
+1. The current model source code
+2. A JSON history from the last test run (produced by `history.to_json()`)
+
+## How to read the history JSON
+
+- `statistics.error_count` – total errors across all tests
+- `statistics.step_frequency` – how often each step ran
+- `step_pairs` – which step transitions occurred (and how often)
+- `test_cases[].errors[]` – specific errors with step name and message
+
+## Refinement strategy
+
+1. **Fix errors first**: look at `test_cases[].errors[]`. Common causes:
+   - Selector changed or element not found → update the selector
+   - Guard too permissive → tighten the guard condition
+   - Missing wait → add `page.wait_for_selector()` before interacting
+   - State tracking wrong → fix state updates
+
+2. **Improve coverage**: look at `statistics.step_frequency`.
+   - Steps with 0 executions → guard may be too restrictive
+   - Steps dominating → lower their weight or add more variety
+
+3. **Check transitions**: look at `step_pairs`.
+   - Missing expected transitions → guards may block valid paths
+   - Unexpected transitions → may need new guards
+
+4. **Add new steps**: if the test explored pages with actions not yet modeled,
+   add new `step_*`/`guard_*` methods.
+
+Output the **complete updated model file**.
+"""
diff --git a/examples/ai_web_agent/refine_model.py b/examples/ai_web_agent/refine_model.py
diff --git a/pyosmo/history/history.py b/pyosmo/history/history.py