|
1 | | -from fastapi import FastAPI, WebSocket, WebSocketDisconnect, BackgroundTasks, HTTPException |
2 | | -from fastapi.middleware.cors import CORSMiddleware |
3 | | -import uvicorn |
4 | | -import asyncio |
5 | | -import sys |
6 | 1 | import os |
7 | | -import json |
8 | | -from typing import List, Optional |
| 2 | +import sys |
| 3 | +from pathlib import Path |
| 4 | +from fastapi import FastAPI |
| 5 | +from fastapi.middleware.cors import CORSMiddleware |
| 6 | +from fastapi.staticfiles import StaticFiles |
9 | 7 |
|
10 | | -# Add root to sys.path to access scrapewizard core |
| 8 | +# Add project root to sys.path to resolve scrapewizard imports correctly |
11 | 9 | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))) |
12 | 10 |
|
13 | 11 | from scrapewizard.core.logging import log |
14 | | -from studio.backend.state import StudioState, StudioProject, FieldDefinition |
15 | | -from studio.backend.browser_manager import StudioBrowserManager |
| 12 | +from studio.backend.db import init_db |
| 13 | +from studio.backend.deps import STUDIO_ARTIFACTS_DIR |
| 14 | +from studio.backend.routes_settings import router as settings_router |
| 15 | +from studio.backend.routes_tests import router as tests_router |
| 16 | +from studio.backend.routes_runs import router as runs_router |
16 | 17 |
|
17 | | -app = FastAPI(title="ScrapeWizard Studio Backend") |
18 | | -browser_manager = StudioBrowserManager() |
| 18 | +from contextlib import asynccontextmanager |
19 | 19 |
|
20 | | -app.add_middleware( |
21 | | - CORSMiddleware, |
22 | | - # Local-only: portal dev server origins. Do not widen — this API can drive a browser. |
23 | | - allow_origins=["http://localhost:5173", "http://127.0.0.1:5173"], |
24 | | - allow_methods=["*"], |
25 | | - allow_headers=["*"], |
26 | | -) |
| 20 | +@asynccontextmanager |
| 21 | +async def lifespan(app: FastAPI): |
| 22 | + log("Initializing SQLite database on startup...") |
| 23 | + init_db() |
| 24 | + yield |
27 | 25 |
|
28 | | -active_sessions = {} |
| 26 | +app = FastAPI(title="ScrapeWizard Studio Backend", version="1.2.0", lifespan=lifespan) |
29 | 27 |
|
30 | 28 | @app.get("/health") |
31 | | -async def health(): |
32 | | - """Health check endpoint for CLI and Electron.""" |
33 | | - return {"status": "ok", "version": "1.0.0", "engine": "ScrapeWizard-Studio"} |
34 | | - |
35 | | -@app.get("/") |
36 | | -async def root(): |
37 | | - return {"status": "online", "message": "ScrapeWizard Studio Orchestrator Active"} |
38 | | - |
39 | | -@app.post("/session/start") |
40 | | -async def start_session(url: str, background_tasks: BackgroundTasks): |
41 | | - # Use simple validation for now, or import from shared.validators if needed |
42 | | - if not url.startswith("http"): |
43 | | - raise HTTPException(status_code=400, detail="Invalid URL") |
44 | | - |
45 | | - project_id = f"proj_{len(active_sessions) + 1}" |
46 | | - project = StudioProject(project_id=project_id, url=url, state=StudioState.NAVIGATION) |
47 | | - active_sessions[project_id] = project |
48 | | - |
49 | | - # Start browser in background |
50 | | - await browser_manager.start(url) |
51 | | - |
52 | | - return {"status": "started", "project_id": project_id, "state": project.state} |
53 | | - |
54 | | -@app.post("/session/compile") |
55 | | -async def compile_project(project_id: str): |
56 | | - if project_id not in active_sessions: |
57 | | - return {"error": "Project not found"} |
58 | | - |
59 | | - project = active_sessions[project_id] |
60 | | - # TODO: Invoke CodeGenerator with AET |
61 | | - return {"status": "compiled", "project_id": project_id} |
62 | | - |
63 | | -@app.get("/session/dom") |
64 | | -async def get_dom_project(project_id: str): |
65 | | - if project_id not in active_sessions: |
66 | | - return {"error": "Project not found"} |
67 | | - tree = await browser_manager.get_dom_tree() |
68 | | - return {"project_id": project_id, "tree": tree} |
69 | | - |
70 | | -@app.websocket("/cdp/ws") |
71 | | -async def cdp_proxy(ws: WebSocket): |
72 | | - """Robust bi-directional CDP proxy for Studio Inspector.""" |
73 | | - await ws.accept() |
74 | | - log("CDP WebSocket connection established", level="info") |
75 | | - |
76 | | - # Use the existing browser_manager to get a session |
77 | | - # or follow the user's "launch as you go" plan for the proxy. |
78 | | - # Given the isolation, we'll try to get the existing session first. |
79 | | - try: |
80 | | - if not browser_manager.page: |
81 | | - # If no page is active, start a default one (or wait for session/start) |
82 | | - await browser_manager.start("about:blank") |
83 | | - |
84 | | - cdp_session = await browser_manager.get_cdp_session() |
85 | | - |
86 | | - # Browser -> client forwarding is handled by the cdp_session.on(...) event |
87 | | - # subscriptions below (see create_forwarder). |
88 | | - |
89 | | - async def client_to_browser(): |
90 | | - """Forward messages from Studio Client (WS) -> Browser (CDP).""" |
91 | | - try: |
92 | | - while True: |
93 | | - msg = await ws.receive_json() |
94 | | - method = msg.get("method") |
95 | | - params = msg.get("params", {}) |
96 | | - msg_id = msg.get("id") |
97 | | - |
98 | | - if method == "Input.dispatchMouseEvent": |
99 | | - # Params: type, x, y, button, etc. |
100 | | - # browser_manager expects: event_type='mouse', params={action, x, y, ...} |
101 | | - # We map CDP-like params to our simple manager |
102 | | - etype = params.get("type") |
103 | | - mapping = { |
104 | | - "mousePressed": "down", |
105 | | - "mouseReleased": "up", |
106 | | - "mouseMoved": "move", |
107 | | - "mouseWheel": "wheel" |
108 | | - } |
109 | | - if etype in mapping: |
110 | | - await browser_manager.handle_input_event("mouse", { |
111 | | - "action": mapping[etype], |
112 | | - "x": params.get("x"), |
113 | | - "y": params.get("y"), |
114 | | - "deltaX": params.get("deltaX", 0), |
115 | | - "deltaY": params.get("deltaY", 0), |
116 | | - "button": params.get("button", "left") |
117 | | - }) |
118 | | - elif method == "Input.dispatchKeyEvent": |
119 | | - # TODO: Map keys if needed |
120 | | - pass |
121 | | - elif method: |
122 | | - result = await cdp_session.send(method, params) |
123 | | - if msg_id is not None: |
124 | | - await ws.send_json({"id": msg_id, "result": result}) |
125 | | - except Exception as e: |
126 | | - log(f"CDP Client -> Browser error: {e}", level="error") |
127 | | - |
128 | | - # Hook into browser_manager's internal screencast logic |
129 | | - async def handle_screencast_frame(data): |
130 | | - # Forward the frame to the frontend |
131 | | - try: |
132 | | - await ws.send_json({"method": "Page.screencastFrame", "params": {"data": data}}) |
133 | | - except Exception: |
134 | | - pass |
135 | | - |
136 | | - browser_manager.on_frame = handle_screencast_frame |
137 | | - |
138 | | - # Hook into browser_manager's inspector logic |
139 | | - async def handle_selection(data_str: str): |
140 | | - try: |
141 | | - data = json.loads(data_str) |
142 | | - if data['type'] == 'hover': |
143 | | - await ws.send_json({"method": "Inspector.highlight", "params": data}) |
144 | | - elif data['type'] == 'select': |
145 | | - await ws.send_json({"method": "Inspector.selected", "params": data}) |
146 | | - except Exception as e: |
147 | | - log(f"Inspector error: {e}", level="error") |
148 | | - |
149 | | - browser_manager.on_selection = handle_selection |
150 | | - |
151 | | - # In Playwright, .on() can take a sync or async function. |
152 | | - # We'll use a wrapper to ensure it forwards to the WS. |
153 | | - def create_forwarder(event_name): |
154 | | - def handler(params): |
155 | | - # Create a task to send the message |
156 | | - asyncio.create_task(ws.send_json({"method": event_name, "params": params})) |
157 | | - return handler |
158 | | - |
159 | | - # Standard events for Inspector |
160 | | - cdp_session.on("Page.screencastFrame", create_forwarder("Page.screencastFrame")) |
161 | | - cdp_session.on("Runtime.consoleAPICalled", create_forwarder("Runtime.consoleAPICalled")) |
162 | | - cdp_session.on("Network.requestWillBeSent", create_forwarder("Network.requestWillBeSent")) |
163 | | - cdp_session.on("DOM.documentUpdated", create_forwarder("DOM.documentUpdated")) |
164 | | - cdp_session.on("Page.loadEventFired", create_forwarder("Page.loadEventFired")) |
165 | | - |
166 | | - # Run the command loop |
167 | | - await client_to_browser() |
168 | | - |
169 | | - except WebSocketDisconnect: |
170 | | - log("CDP WebSocket disconnected", level="info") |
171 | | - except Exception as e: |
172 | | - log(f"CDP Proxy failed: {e}", level="error") |
173 | | - finally: |
174 | | - try: |
175 | | - await ws.close() |
176 | | - except Exception: |
177 | | - pass |
| 29 | +def health(): |
| 30 | + """Health check endpoint containing engine version details.""" |
| 31 | + return {"status": "ok", "version": "1.2.0", "engine": "ScrapeWizard-Studio"} |
| 32 | + |
| 33 | +# Register Settings, Tests and Runs routers |
| 34 | +app.include_router(settings_router) |
| 35 | +app.include_router(tests_router) |
| 36 | +app.include_router(runs_router) |
| 37 | + |
| 38 | +# Mount artifacts folder to serve screenshots and visual diff crops |
| 39 | +STUDIO_ARTIFACTS_DIR.mkdir(parents=True, exist_ok=True) |
| 40 | +app.mount("/artifacts", StaticFiles(directory=str(STUDIO_ARTIFACTS_DIR)), name="artifacts") |
| 41 | + |
| 42 | +# Mount React static frontend dist output (prod bundle) |
| 43 | +frontend_dist = Path(__file__).parent / ".." / "frontend" / "dist" |
| 44 | +if frontend_dist.exists(): |
| 45 | + app.mount("/", StaticFiles(directory=str(frontend_dist), html=True), name="frontend") |
| 46 | +else: |
| 47 | + log("React frontend dist bundle not found; serving API endpoints only.", level="warning") |
178 | 48 |
|
179 | 49 | if __name__ == "__main__": |
180 | | - # Local-only bind: this API can drive a real browser; never expose it on the network. |
| 50 | + import uvicorn |
| 51 | + # Bound to local loopback interface only for secure local operation |
181 | 52 | uvicorn.run(app, host="127.0.0.1", port=8000) |
0 commit comments