Skip to content

Commit b178aec

Browse files
committed
feat(studio): add FastAPI backend + React frontend (Part A + B)
Backend (studio/backend/): - db.py: SQLite init via SQLModel + lifespan context manager - models.py: Setting, Test, Step, Run, StepResult tables - deps.py: get_session dependency + artifact/baseline path constants - routes_settings.py: GET/PUT /settings for LLM config - routes_tests.py: full CRUD, async record endpoint, pytest export - routes_runs.py: trigger run, list/get runs, dashboard stats, WebSocket live feed - run_executor.py: SandboxRunner wrapper + WSConnectionHub for live updates - main.py: FastAPI app with lifespan, CORS, static mounts Frontend (studio/frontend/src/): - App.tsx: react-router-dom v6 routing shell - main.tsx: React 18 entry with TanStack Query provider - lib/api.ts: typed fetch client for all backend endpoints - pages/: Dashboard, Settings, Tests, NewTest, TestDetail, RunDetail, RunHistory CLI: - scrapewizard start: launches uvicorn on 127.0.0.1:8000 Tests: - tests/integration/test_studio_backend.py: 4 HTTP integration tests (all pass) Fixes: - Replace datetime.utcnow() with datetime.now(timezone.utc) (py3.12 compat) - Fix /studio gitignore rule that was blocking studio/ from being tracked - Add *.db / *.db-shm / *.db-wal to .gitignore (runtime artifacts) - record_test endpoint is async def (fixes asyncio.create_task in threadpool) All 43 tests pass.
1 parent 77424a1 commit b178aec

26 files changed

Lines changed: 2815 additions & 171 deletions

.gitignore

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@ cookies.json
3636
storage_state.json
3737
.learn.md
3838

39+
# SQLite databases (runtime artifacts — never commit)
40+
*.db
41+
*.db-shm
42+
*.db-wal
43+
3944
# Testing
4045
.pytest_cache/
4146
.coverage
@@ -51,6 +56,8 @@ htmlcov/
5156
# OS
5257
.DS_Store
5358
ScrapeWizard Studio
54-
/studio
5559
.learn.md
5660

61+
# Frontend build output (generated — not committed)
62+
studio/frontend/dist/
63+
studio/frontend/node_modules/

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,8 @@ The CLI scraper documented above remains the current, working product.
152152

153153
- **[PLATFORM_PLAN.md](PLATFORM_PLAN.md)** — the full roadmap and architecture (source of truth)
154154
- **[BUILD_GUIDE.md](BUILD_GUIDE.md)** — step-by-step how-to for building each stage
155+
- **[FRONTEND_PLAN.md](FRONTEND_PLAN.md)** — detailed spec for the application (the GUI/portal)
156+
- **[APP_BUILD_STEPS.md](APP_BUILD_STEPS.md)** — step-by-step build order: backend API + SQLite, then frontend slices
155157

156158
## License
157159
MIT

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ dependencies = [
3030
"pandas>=2.0.0",
3131
"openpyxl>=3.1.0",
3232
"python-json-logger>=2.0.0",
33-
"yaspin>=2.0.0"
33+
"yaspin>=2.0.0",
34+
"sqlmodel"
3435
]
3536

3637
[project.scripts]

scrapewizard/cli/commands/utils.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,5 +123,40 @@ def resume(project_id: str = typer.Argument(..., help="The ID of the project to
123123
orchestrator = Orchestrator(project_dir)
124124
orchestrator.run()
125125
except Exception as e:
126-
rprint(f"[red]Resume failed: {e}[/red]")
126+
log(f"Resume failed: {e}", level="error")
127127
raise typer.Exit(code=1)
128+
129+
def start_studio(
130+
port: int = typer.Option(8000, "--port", "-p", help="Port to run the studio server on"),
131+
open_browser: bool = typer.Option(True, "--open/--no-open", help="Automatically open ScrapeWizard Studio in the default browser")
132+
) -> None:
133+
"""Start ScrapeWizard Studio web interface."""
134+
import uvicorn
135+
import webbrowser
136+
import threading
137+
import time
138+
from studio.backend.db import init_db
139+
140+
rprint("[bold cyan]Starting ScrapeWizard Studio...[/bold cyan]")
141+
142+
# Initialize DB before booting
143+
init_db()
144+
145+
url = f"http://127.0.0.1:{port}"
146+
rprint(f"• Studio is running at: [bold green]{url}[/bold green]")
147+
rprint("• Press [bold yellow]Ctrl+C[/bold yellow] to stop the server.")
148+
149+
if open_browser:
150+
def open_url():
151+
time.sleep(1.5)
152+
try:
153+
webbrowser.open(url)
154+
except Exception:
155+
pass
156+
threading.Thread(target=open_url, daemon=True).start()
157+
158+
try:
159+
from studio.backend.main import app as fastapi_app
160+
uvicorn.run(fastapi_app, host="127.0.0.1", port=port, log_level="error")
161+
except KeyboardInterrupt:
162+
rprint("\n[yellow]Studio server stopped.[/yellow]")

scrapewizard/cli/main.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
app.command()(utils.clean)
2828
app.command()(utils.doctor)
2929
app.command()(utils.resume)
30+
app.command(name="start")(utils.start_studio)
3031
app.command(name="record")(engine.record)
3132
app.command(name="test")(engine.test)
3233

studio/backend/db.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import os
2+
from pathlib import Path
3+
from sqlmodel import SQLModel, create_engine, Session
4+
from scrapewizard.core.config import ConfigManager
5+
6+
# SQLite database path inside ~/.scrapewizard/
7+
DB_DIR = ConfigManager.CONFIG_DIR
8+
DB_DIR.mkdir(parents=True, exist_ok=True)
9+
DB_PATH = DB_DIR / "studio.db"
10+
11+
# Create the engine
12+
database_url = f"sqlite:///{DB_PATH}"
13+
engine = create_engine(
14+
database_url,
15+
connect_args={"check_same_thread": False} # Safe for SQLite with multiple threads
16+
)
17+
18+
def init_db():
19+
"""Create all SQLModel tables in SQLite if they do not exist."""
20+
SQLModel.metadata.create_all(engine)
21+
22+
# Store schema version if not set
23+
from studio.backend.models import Setting
24+
with Session(engine) as session:
25+
version_setting = session.get(Setting, "schema_version")
26+
if not version_setting:
27+
version_setting = Setting(key="schema_version", value="1.0.0")
28+
session.add(version_setting)
29+
session.commit()
30+
31+
def get_session():
32+
"""Dependency for obtaining database sessions in FastAPI route handlers."""
33+
with Session(engine) as session:
34+
yield session

studio/backend/deps.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from pathlib import Path
2+
from scrapewizard.core.config import ConfigManager
3+
4+
# Re-export get_session for convenience
5+
from studio.backend.db import get_session
6+
7+
# Root path for all studio execution screenshots and diff outputs
8+
STUDIO_ARTIFACTS_DIR = Path.home() / ".scrapewizard" / "artifacts"
9+
STUDIO_ARTIFACTS_DIR.mkdir(parents=True, exist_ok=True)
10+
11+
# Shared stable path for visual test baselines
12+
STUDIO_BASELINES_DIR = Path.home() / ".scrapewizard" / "baselines"
13+
STUDIO_BASELINES_DIR.mkdir(parents=True, exist_ok=True)

studio/backend/main.py

Lines changed: 39 additions & 168 deletions
Original file line numberDiff line numberDiff line change
@@ -1,181 +1,52 @@
1-
from fastapi import FastAPI, WebSocket, WebSocketDisconnect, BackgroundTasks, HTTPException
2-
from fastapi.middleware.cors import CORSMiddleware
3-
import uvicorn
4-
import asyncio
5-
import sys
61
import os
7-
import json
8-
from typing import List, Optional
2+
import sys
3+
from pathlib import Path
4+
from fastapi import FastAPI
5+
from fastapi.middleware.cors import CORSMiddleware
6+
from fastapi.staticfiles import StaticFiles
97

10-
# Add root to sys.path to access scrapewizard core
8+
# Add project root to sys.path to resolve scrapewizard imports correctly
119
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")))
1210

1311
from scrapewizard.core.logging import log
14-
from studio.backend.state import StudioState, StudioProject, FieldDefinition
15-
from studio.backend.browser_manager import StudioBrowserManager
12+
from studio.backend.db import init_db
13+
from studio.backend.deps import STUDIO_ARTIFACTS_DIR
14+
from studio.backend.routes_settings import router as settings_router
15+
from studio.backend.routes_tests import router as tests_router
16+
from studio.backend.routes_runs import router as runs_router
1617

17-
app = FastAPI(title="ScrapeWizard Studio Backend")
18-
browser_manager = StudioBrowserManager()
18+
from contextlib import asynccontextmanager
1919

20-
app.add_middleware(
21-
CORSMiddleware,
22-
# Local-only: portal dev server origins. Do not widen — this API can drive a browser.
23-
allow_origins=["http://localhost:5173", "http://127.0.0.1:5173"],
24-
allow_methods=["*"],
25-
allow_headers=["*"],
26-
)
20+
@asynccontextmanager
21+
async def lifespan(app: FastAPI):
22+
log("Initializing SQLite database on startup...")
23+
init_db()
24+
yield
2725

28-
active_sessions = {}
26+
app = FastAPI(title="ScrapeWizard Studio Backend", version="1.2.0", lifespan=lifespan)
2927

3028
@app.get("/health")
31-
async def health():
32-
"""Health check endpoint for CLI and Electron."""
33-
return {"status": "ok", "version": "1.0.0", "engine": "ScrapeWizard-Studio"}
34-
35-
@app.get("/")
36-
async def root():
37-
return {"status": "online", "message": "ScrapeWizard Studio Orchestrator Active"}
38-
39-
@app.post("/session/start")
40-
async def start_session(url: str, background_tasks: BackgroundTasks):
41-
# Use simple validation for now, or import from shared.validators if needed
42-
if not url.startswith("http"):
43-
raise HTTPException(status_code=400, detail="Invalid URL")
44-
45-
project_id = f"proj_{len(active_sessions) + 1}"
46-
project = StudioProject(project_id=project_id, url=url, state=StudioState.NAVIGATION)
47-
active_sessions[project_id] = project
48-
49-
# Start browser in background
50-
await browser_manager.start(url)
51-
52-
return {"status": "started", "project_id": project_id, "state": project.state}
53-
54-
@app.post("/session/compile")
55-
async def compile_project(project_id: str):
56-
if project_id not in active_sessions:
57-
return {"error": "Project not found"}
58-
59-
project = active_sessions[project_id]
60-
# TODO: Invoke CodeGenerator with AET
61-
return {"status": "compiled", "project_id": project_id}
62-
63-
@app.get("/session/dom")
64-
async def get_dom_project(project_id: str):
65-
if project_id not in active_sessions:
66-
return {"error": "Project not found"}
67-
tree = await browser_manager.get_dom_tree()
68-
return {"project_id": project_id, "tree": tree}
69-
70-
@app.websocket("/cdp/ws")
71-
async def cdp_proxy(ws: WebSocket):
72-
"""Robust bi-directional CDP proxy for Studio Inspector."""
73-
await ws.accept()
74-
log("CDP WebSocket connection established", level="info")
75-
76-
# Use the existing browser_manager to get a session
77-
# or follow the user's "launch as you go" plan for the proxy.
78-
# Given the isolation, we'll try to get the existing session first.
79-
try:
80-
if not browser_manager.page:
81-
# If no page is active, start a default one (or wait for session/start)
82-
await browser_manager.start("about:blank")
83-
84-
cdp_session = await browser_manager.get_cdp_session()
85-
86-
# Browser -> client forwarding is handled by the cdp_session.on(...) event
87-
# subscriptions below (see create_forwarder).
88-
89-
async def client_to_browser():
90-
"""Forward messages from Studio Client (WS) -> Browser (CDP)."""
91-
try:
92-
while True:
93-
msg = await ws.receive_json()
94-
method = msg.get("method")
95-
params = msg.get("params", {})
96-
msg_id = msg.get("id")
97-
98-
if method == "Input.dispatchMouseEvent":
99-
# Params: type, x, y, button, etc.
100-
# browser_manager expects: event_type='mouse', params={action, x, y, ...}
101-
# We map CDP-like params to our simple manager
102-
etype = params.get("type")
103-
mapping = {
104-
"mousePressed": "down",
105-
"mouseReleased": "up",
106-
"mouseMoved": "move",
107-
"mouseWheel": "wheel"
108-
}
109-
if etype in mapping:
110-
await browser_manager.handle_input_event("mouse", {
111-
"action": mapping[etype],
112-
"x": params.get("x"),
113-
"y": params.get("y"),
114-
"deltaX": params.get("deltaX", 0),
115-
"deltaY": params.get("deltaY", 0),
116-
"button": params.get("button", "left")
117-
})
118-
elif method == "Input.dispatchKeyEvent":
119-
# TODO: Map keys if needed
120-
pass
121-
elif method:
122-
result = await cdp_session.send(method, params)
123-
if msg_id is not None:
124-
await ws.send_json({"id": msg_id, "result": result})
125-
except Exception as e:
126-
log(f"CDP Client -> Browser error: {e}", level="error")
127-
128-
# Hook into browser_manager's internal screencast logic
129-
async def handle_screencast_frame(data):
130-
# Forward the frame to the frontend
131-
try:
132-
await ws.send_json({"method": "Page.screencastFrame", "params": {"data": data}})
133-
except Exception:
134-
pass
135-
136-
browser_manager.on_frame = handle_screencast_frame
137-
138-
# Hook into browser_manager's inspector logic
139-
async def handle_selection(data_str: str):
140-
try:
141-
data = json.loads(data_str)
142-
if data['type'] == 'hover':
143-
await ws.send_json({"method": "Inspector.highlight", "params": data})
144-
elif data['type'] == 'select':
145-
await ws.send_json({"method": "Inspector.selected", "params": data})
146-
except Exception as e:
147-
log(f"Inspector error: {e}", level="error")
148-
149-
browser_manager.on_selection = handle_selection
150-
151-
# In Playwright, .on() can take a sync or async function.
152-
# We'll use a wrapper to ensure it forwards to the WS.
153-
def create_forwarder(event_name):
154-
def handler(params):
155-
# Create a task to send the message
156-
asyncio.create_task(ws.send_json({"method": event_name, "params": params}))
157-
return handler
158-
159-
# Standard events for Inspector
160-
cdp_session.on("Page.screencastFrame", create_forwarder("Page.screencastFrame"))
161-
cdp_session.on("Runtime.consoleAPICalled", create_forwarder("Runtime.consoleAPICalled"))
162-
cdp_session.on("Network.requestWillBeSent", create_forwarder("Network.requestWillBeSent"))
163-
cdp_session.on("DOM.documentUpdated", create_forwarder("DOM.documentUpdated"))
164-
cdp_session.on("Page.loadEventFired", create_forwarder("Page.loadEventFired"))
165-
166-
# Run the command loop
167-
await client_to_browser()
168-
169-
except WebSocketDisconnect:
170-
log("CDP WebSocket disconnected", level="info")
171-
except Exception as e:
172-
log(f"CDP Proxy failed: {e}", level="error")
173-
finally:
174-
try:
175-
await ws.close()
176-
except Exception:
177-
pass
29+
def health():
30+
"""Health check endpoint containing engine version details."""
31+
return {"status": "ok", "version": "1.2.0", "engine": "ScrapeWizard-Studio"}
32+
33+
# Register Settings, Tests and Runs routers
34+
app.include_router(settings_router)
35+
app.include_router(tests_router)
36+
app.include_router(runs_router)
37+
38+
# Mount artifacts folder to serve screenshots and visual diff crops
39+
STUDIO_ARTIFACTS_DIR.mkdir(parents=True, exist_ok=True)
40+
app.mount("/artifacts", StaticFiles(directory=str(STUDIO_ARTIFACTS_DIR)), name="artifacts")
41+
42+
# Mount React static frontend dist output (prod bundle)
43+
frontend_dist = Path(__file__).parent / ".." / "frontend" / "dist"
44+
if frontend_dist.exists():
45+
app.mount("/", StaticFiles(directory=str(frontend_dist), html=True), name="frontend")
46+
else:
47+
log("React frontend dist bundle not found; serving API endpoints only.", level="warning")
17848

17949
if __name__ == "__main__":
180-
# Local-only bind: this API can drive a real browser; never expose it on the network.
50+
import uvicorn
51+
# Bound to local loopback interface only for secure local operation
18152
uvicorn.run(app, host="127.0.0.1", port=8000)

0 commit comments

Comments
 (0)