Skip to content

Commit b7695cc

Browse files
gHashTagona-agent
andcommitted
Add WebArena browser integration framework
Browser Bridge: - BrowserBridge struct with stealth/fingerprint support - JSON-RPC protocol for Playwright communication - Human-like timing with φ-based delays - Fingerprint evolution on detection Task Loader: - Parse WebArena JSON config format - Category distribution analysis - Task filtering by site Integration Tests: - 6 tests all passing (100%) - Bridge connection, fingerprint evolution - Task loading, simulation consistency - Stealth advantage validation Files: - specs/tri/webarena_browser.vibee: Browser spec - webarena_agent/src/browser_bridge.zig: Bridge impl - webarena_agent/src/task_loader.zig: JSON loader - webarena_agent/src/integration_test.zig: Test suite Co-authored-by: Ona <no-reply@ona.com>
1 parent 583fffd commit b7695cc

4 files changed

Lines changed: 1530 additions & 0 deletions

File tree

specs/tri/webarena_browser.vibee

Lines changed: 268 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,268 @@
1+
# WebArena Browser Integration Specification
2+
# Real browser automation via Playwright bridge
3+
# φ² + 1/φ² = 3 = TRINITY
4+
5+
name: webarena_browser
6+
version: "1.0.0"
7+
language: zig
8+
module: webarena_browser
9+
10+
constants:
11+
PHI: 1.6180339887
12+
TRINITY: 3
13+
14+
# Browser settings
15+
VIEWPORT_WIDTH: 1280
16+
VIEWPORT_HEIGHT: 720
17+
MAX_STEPS: 30
18+
19+
# Timing (human-like, φ-based)
20+
MIN_DELAY_MS: 500
21+
MAX_DELAY_MS: 2000
22+
PHI_DELAY_FACTOR: 1.618
23+
24+
# Fingerprint targets
25+
FINGERPRINT_SIMILARITY_TARGET: 0.90
26+
DETECTION_THRESHOLD_STEALTH: 0.05
27+
DETECTION_THRESHOLD_BASELINE: 0.25
28+
29+
# Evolution parameters
30+
EVOLUTION_GENERATIONS: 20
31+
MUTATION_RATE: 0.0382 # 1/φ³
32+
33+
types:
34+
# Browser action types
35+
ActionType:
36+
enum:
37+
- none
38+
- click
39+
- type_text
40+
- scroll
41+
- hover
42+
- goto
43+
- go_back
44+
- go_forward
45+
- press_key
46+
- select_option
47+
- stop
48+
49+
# Action to send to browser
50+
BrowserAction:
51+
fields:
52+
action_type: ActionType
53+
element_id: Option<Int>
54+
text: Option<String>
55+
url: Option<String>
56+
coords: Option<Tuple<Int, Int>>
57+
key: Option<String>
58+
59+
# DOM element from accessibility tree
60+
DOMElement:
61+
fields:
62+
id: Int
63+
tag: String
64+
role: String
65+
text: String
66+
bounds: Tuple<Int, Int, Int, Int>
67+
clickable: Bool
68+
focusable: Bool
69+
70+
# Browser state snapshot
71+
BrowserState:
72+
fields:
73+
url: String
74+
title: String
75+
accessibility_tree: String
76+
screenshot_base64: Option<String>
77+
elements: List<DOMElement>
78+
viewport: Tuple<Int, Int>
79+
80+
# Task configuration (from WebArena JSON)
81+
TaskConfig:
82+
fields:
83+
task_id: Int
84+
sites: List<String>
85+
intent: String
86+
start_url: String
87+
require_login: Bool
88+
eval_types: List<String>
89+
reference_answers: String
90+
91+
# Task execution result
92+
TaskResult:
93+
fields:
94+
task_id: Int
95+
success: Bool
96+
steps_taken: Int
97+
time_ms: Int
98+
final_answer: Option<String>
99+
error: Option<String>
100+
detected: Bool
101+
102+
# Fingerprint state
103+
FingerprintState:
104+
fields:
105+
similarity: Float
106+
canvas_noise: List<Float>
107+
webgl_vendor: String
108+
webgl_renderer: String
109+
audio_noise: Float
110+
navigator_props: Map<String, String>
111+
112+
# Bridge connection state
113+
BridgeState:
114+
fields:
115+
connected: Bool
116+
process_id: Option<Int>
117+
socket_path: Option<String>
118+
fingerprint: FingerprintState
119+
120+
behaviors:
121+
- name: connect_browser
122+
given: Bridge configuration with stealth flag
123+
when: Need to start browser automation
124+
then: Spawn Playwright process, establish JSON-RPC connection
125+
126+
- name: disconnect_browser
127+
given: Active browser connection
128+
when: Task complete or error
129+
then: Close connection, terminate Playwright process
130+
131+
- name: inject_fingerprint
132+
given: Connected browser with stealth enabled
133+
when: Before first navigation
134+
then: Inject canvas/webgl/audio spoofing scripts
135+
136+
- name: evolve_fingerprint
137+
given: Detection risk or periodic check
138+
when: Fingerprint similarity drops below target
139+
then: Run genetic evolution to improve similarity
140+
141+
- name: execute_action
142+
given: BrowserAction and connected browser
143+
when: Agent decides next action
144+
then: Send action via JSON-RPC, wait for response with φ-delay
145+
146+
- name: get_state
147+
given: Connected browser
148+
when: Need current page state for planning
149+
then: Query accessibility tree, screenshot, return BrowserState
150+
151+
- name: check_detection
152+
given: Current fingerprint state
153+
when: After each action
154+
then: Evaluate detection probability, trigger evolution if needed
155+
156+
- name: run_task
157+
given: TaskConfig and max steps
158+
when: Running WebArena task
159+
then: Execute actions until success/failure/max_steps
160+
161+
- name: evaluate_result
162+
given: Final state and reference answers
163+
when: Task execution complete
164+
then: Compare output with expected, return success/failure
165+
166+
functions:
167+
# Initialize browser bridge
168+
init_bridge:
169+
params:
170+
- stealth: Bool
171+
- seed: Int
172+
returns: BridgeState
173+
description: Create browser bridge with optional stealth
174+
175+
# Connect to Playwright
176+
connect:
177+
params:
178+
- bridge: BridgeState
179+
returns: Bool
180+
description: Establish connection to Playwright process
181+
182+
# Execute single action
183+
execute:
184+
params:
185+
- bridge: BridgeState
186+
- action: BrowserAction
187+
returns: Bool
188+
description: Send action to browser, return success
189+
190+
# Get current state
191+
get_state:
192+
params:
193+
- bridge: BridgeState
194+
returns: BrowserState
195+
description: Query browser for current page state
196+
197+
# Run complete task
198+
run_task:
199+
params:
200+
- bridge: BridgeState
201+
- config: TaskConfig
202+
- max_steps: Int
203+
returns: TaskResult
204+
description: Execute task from start to completion
205+
206+
# Batch run tasks
207+
run_batch:
208+
params:
209+
- bridge: BridgeState
210+
- configs: List<TaskConfig>
211+
- max_steps: Int
212+
returns: List<TaskResult>
213+
description: Run multiple tasks sequentially
214+
215+
test_cases:
216+
- name: bridge_connect
217+
input: "stealth=true, seed=42"
218+
expected: "connected=true, fingerprint.similarity > 0.80"
219+
220+
- name: action_execution
221+
input: "click action on element 42"
222+
expected: "action executed, step_count incremented"
223+
224+
- name: fingerprint_evolution
225+
input: "10 generations"
226+
expected: "similarity increased"
227+
228+
- name: stealth_reduces_detection
229+
input: "stealth vs baseline"
230+
expected: "stealth.detection < baseline.detection"
231+
232+
# Protocol specification for Playwright communication
233+
protocol:
234+
name: PlaywrightBridge
235+
transport: JSON-RPC over Unix socket
236+
237+
messages:
238+
- name: connect
239+
request: { headless: bool, viewport: { width: int, height: int } }
240+
response: { success: bool, session_id: string }
241+
242+
- name: navigate
243+
request: { url: string }
244+
response: { success: bool, final_url: string }
245+
246+
- name: click
247+
request: { element_id: int }
248+
response: { success: bool }
249+
250+
- name: type
251+
request: { element_id: int, text: string }
252+
response: { success: bool }
253+
254+
- name: get_state
255+
request: {}
256+
response: { url: string, title: string, accessibility_tree: string }
257+
258+
- name: inject_script
259+
request: { script: string }
260+
response: { success: bool, result: any }
261+
262+
- name: screenshot
263+
request: { format: string }
264+
response: { data: string }
265+
266+
- name: close
267+
request: {}
268+
response: { success: bool }

0 commit comments

Comments
 (0)