MarimerLLC
diff --git a/‎.env.example‎
Lines changed: 5 additions & 6 deletions b/‎.env.example‎
Lines changed: 5 additions & 6 deletions
diff --git a/‎CLAUDE.md‎
Lines changed: 7 additions & 5 deletions b/‎CLAUDE.md‎
Lines changed: 7 additions & 5 deletions
diff --git a/‎Directory.Build.props‎
Lines changed: 1 addition & 1 deletion b/‎Directory.Build.props‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎deploy/rockbot-seed/agent-trust.json‎
Lines changed: 1 addition & 1 deletion b/‎deploy/rockbot-seed/agent-trust.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎deploy/rockbot-seed/well-known-agents.json‎
Lines changed: 8 additions & 8 deletions b/‎deploy/rockbot-seed/well-known-agents.json‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎docker-compose.yml‎
Lines changed: 22 additions & 7 deletions b/‎docker-compose.yml‎
Lines changed: 22 additions & 7 deletions
diff --git a/‎docs/capabilities.md‎
Lines changed: 22 additions & 8 deletions b/‎docs/capabilities.md‎
Lines changed: 22 additions & 8 deletions
diff --git a/‎docs/foragent-specification.md‎
Lines changed: 23 additions & 15 deletions b/‎docs/foragent-specification.md‎
Lines changed: 23 additions & 15 deletions
@@ -2,14 +2,13 @@
 #
 # These are consumed by docker-compose.yml. Foragent and the RockBot agent can
 # be pointed at different models — Foragent uses FORAGENT_LLM_* for its own
-# LLM-backed capabilities (extract-structured-data and beyond), the RockBot
-# agent uses LLM_* for its own reasoning.
+# LLM-backed capabilities (browser-task planner, form-schema enrichment), the
+# RockBot agent uses LLM_* for its own reasoning.
 #
-# Direct curl tests of capabilities that don't need an LLM (fetch-page-title)
-# still work without either set; anything LLM-backed will fail until Foragent's
-# config is populated.
+# Every Foragent capability needs the LLM wired; without it, the host fails
+# fast at startup.
 
-# ── Foragent's LLM (REQUIRED for extract-structured-data) ────────────────────
+# ── Foragent's LLM (REQUIRED — browser-task + form-schema enrichment) ────────
 # Azure AI Foundry / OpenAI-compatible endpoints are both fine.
 # Foundry endpoint shape: https://<resource>-<region>.cognitiveservices.azure.com/openai/v1/
 FORAGENT_LLM_ENDPOINT=https://your-resource-region.cognitiveservices.azure.com/openai/v1/
 
@@ -6,7 +6,7 @@
     <ImplicitUsings>enable</ImplicitUsings>
     <TreatWarningsAsErrors>true</TreatWarningsAsErrors>
     <EnforceCodeStyleInBuild>true</EnforceCodeStyleInBuild>
-    <Version>0.2.0-alpha.8</Version>
+    <Version>0.2.0-alpha.9</Version>
     <Authors>Marimer LLC</Authors>
     <Company>Marimer LLC</Company>
     <Copyright>Copyright (c) Marimer LLC</Copyright>
 
@@ -2,7 +2,7 @@
   {
     "agentId": "Foragent",
     "level": 4,
-    "approvedSkills": ["browser-task", "fetch-page-title", "extract-structured-data"],
+    "approvedSkills": ["browser-task", "learn-form-schema", "execute-form-batch"],
     "firstSeen": "2026-04-21T00:00:00+00:00",
     "lastInteraction": "2026-04-21T00:00:00+00:00",
     "interactionCount": 0
 
@@ -1,7 +1,7 @@
 [
   {
     "agentName": "Foragent",
-    "description": "Browser agent — navigates pages with Chromium and exposes task-level skills over HTTP A2A.",
+    "description": "Browser agent — navigates pages with Chromium and exposes task-level skills over HTTP A2A. All skills consume structured input on the A2A DataPart (invoke_agent 'data' parameter); the text 'message' is only for human-readable summaries.",
     "version": "1.0",
     "url": "http://foragent:8080",
     "protocolVersion": "1.0",
@@ -11,17 +11,17 @@
       {
         "id": "browser-task",
         "name": "Browser Task (generalist)",
-        "description": "Drive a browser with an LLM-in-the-loop planner to accomplish a free-form intent. Input JSON {\"intent\":\"...\",\"allowedHosts\":[\"host\",\"*.host\",\"*\"],\"url\":\"optional start\",\"credentialId\":\"optional\",\"maxSteps\":60,\"maxSeconds\":120}. allowedHosts is required and empty rejects. Returns a structured JSON result with status (done/failed/incomplete), summary, optional result, step count, and navigations."
+        "description": "Drive a browser with an LLM-in-the-loop planner to accomplish a free-form intent. PASS INPUT AS AN A2A DATA PART — populate invoke_agent's 'data' parameter with {\"intent\":\"...\",\"allowedHosts\":[\"host\",\"*.host\",\"*\"],\"url\":\"optional start\",\"credentialId\":\"optional\",\"maxSteps\":60,\"maxSeconds\":120}. 'intent' and 'allowedHosts' are required; an empty allowlist rejects the task. Use [\"*\"] explicitly when any host is acceptable. Returns a structured JSON result with status (done/failed/incomplete), summary, optional result, step count, and navigations."
       },
       {
-        "id": "fetch-page-title",
-        "name": "Fetch Page Title",
-        "description": "Navigate to a URL with a real browser and return the contents of its <title> element."
+        "id": "learn-form-schema",
+        "name": "Learn Form Schema",
+        "description": "Navigate to a web form, extract its structure (fields, types, options, validation), and persist it as a reusable skill. PASS INPUT AS AN A2A DATA PART — populate invoke_agent's 'data' parameter with {\"url\":\"https://...\",\"allowedHosts\":[\"host\"],\"formSelector\":\"optional\",\"credentialId\":\"optional\",\"skillName\":\"optional override\",\"intent\":\"optional prose\"}. 'url' and 'allowedHosts' are required. Returns the typed form schema plus the skill name it was persisted under."
       },
       {
-        "id": "extract-structured-data",
-        "name": "Extract Structured Data",
-        "description": "Navigate to a URL and extract data matching a natural-language description, returning JSON. Input the target URL and a description of what to extract."
+        "id": "execute-form-batch",
+        "name": "Execute Form Batch",
+        "description": "Submit a batch of rows against a learned form schema. PASS INPUT AS AN A2A DATA PART — populate invoke_agent's 'data' parameter with {\"schemaRef\":\"sites/host/forms/name\" OR \"schema\":{...FormSchema...},\"rows\":[{fieldName:value,...}],\"allowedHosts\":[\"host\"],\"credentialId\":\"optional\",\"mode\":\"abort-on-first\"|\"continue\",\"successIndicator\":\"optional CSS selector\"}. 'rows', 'allowedHosts', and exactly one of schemaRef/schema are required. Streams per-row progress. Default mode aborts on first failure."
       }
     ]
   }
 
@@ -5,7 +5,7 @@
 #   - foragent        — this project; exposes HTTP A2A on port 5210
 #   - rockbot-init    — seeds /data/agent with RockBot profile + well-known-agents.json
 #                       pointing at foragent
-#   - rockbot         — rockylhotka/rockbot-agent:0.9.11, configured to know Foragent
+#   - rockbot         — rockylhotka/rockbot-agent:0.9.15, configured to know Foragent
 #                       as an A2A peer it can delegate tasks to. 0.9.11 brings
 #                       the structured-data invoke_agent surface (PR #291) so
 #                       RockBot can consume Foragent's FormSchema JSON results
@@ -21,7 +21,7 @@
 #   curl -X POST http://localhost:5210/ \
 #     -H "X-Api-Key: rockbot-calls-foragent" \
 #     -H "Content-Type: application/json" \
-#     -d '{"jsonrpc":"2.0","id":1,"method":"message/send","params":{"message":{"role":"ROLE_USER","messageId":"m1","parts":[{"text":"https://example.com"}]},"metadata":{"skill":"fetch-page-title"}}}'
+#     -d '{"jsonrpc":"2.0","id":1,"method":"message/send","params":{"message":{"role":"ROLE_USER","messageId":"m1","parts":[{"text":"{\"intent\":\"fetch the page title\",\"url\":\"https://example.com\",\"allowedHosts\":[\"example.com\"]}"}]},"metadata":{"skill":"browser-task"}}}'
 # Note: the A2A v1-preview schema uses protobuf-style enum values (ROLE_USER, not "user")
 # and parts are bare {"text":"..."} objects — no "kind" field.
 #
@@ -47,12 +47,27 @@ services:
       timeout: 3s
       retries: 15
 
+  foragent-init:
+    # One-shot ownership fix for the foragent-data named volume. The Foragent
+    # Dockerfile chowns /data to the non-root `foragent` user at build time, but
+    # Docker mounts a fresh named volume root-owned and masks the chown —
+    # FileSkillStore then hits UnauthorizedAccessException on first boot. This
+    # init container runs as root once per volume creation and mirrors the
+    # rockbot-init pattern. Subsequent boots skip the mkdir+chown if already set.
+    image: busybox:latest
+    user: root
+    command: ["sh", "-c", "mkdir -p /data/foragent/skills /data/foragent/memory && chmod -R 777 /data/foragent"]
+    volumes:
+      - foragent-data:/data/foragent
+
   foragent:
     build:
       context: .
     depends_on:
       rabbitmq:
         condition: service_healthy
+      foragent-init:
+        condition: service_completed_successfully
     ports:
       - "5210:8080"
     environment:
@@ -64,12 +79,12 @@ services:
       RabbitMq__VirtualHost: /
       Gateway__AgentName: Foragent
       Gateway__InternalAgentName: Foragent
-      Gateway__Description: "Browser agent — browser-task (generalist), learn-form-schema, execute-form-batch, fetch-page-title, extract-structured-data"
+      Gateway__Description: "Browser agent — browser-task (generalist), learn-form-schema, execute-form-batch"
       # RockBot will call Foragent with header X-Api-Key: rockbot-calls-foragent
       ApiKeys__rockbot-calls-foragent__AgentId: RockBot
       ApiKeys__rockbot-calls-foragent__DisplayName: RockBot
-      # LLM required for the extract-structured-data capability. Namespaced so
-      # Foragent can point at a different model than the RockBot side.
+      # LLM required for the browser-task planner and form-schema enrichment.
+      # Namespaced so Foragent can point at a different model than the RockBot side.
       ForagentLlm__Endpoint: ${FORAGENT_LLM_ENDPOINT:?FORAGENT_LLM_ENDPOINT is required}
       ForagentLlm__ModelId: ${FORAGENT_LLM_MODEL_ID:?FORAGENT_LLM_MODEL_ID is required}
       ForagentLlm__ApiKey: ${FORAGENT_LLM_API_KEY:?FORAGENT_LLM_API_KEY is required}
@@ -100,7 +115,7 @@ services:
       - foragent-data:/data/foragent
 
   rockbot-init:
-    image: rockylhotka/rockbot-agent:0.9.11
+    image: rockylhotka/rockbot-agent:0.9.15
     user: root
     entrypoint: ["/bin/sh", "-c"]
     command:
@@ -136,7 +151,7 @@ services:
       - ./deploy/rockbot-seed:/seed:ro
 
   rockbot:
-    image: rockylhotka/rockbot-agent:0.9.11
+    image: rockylhotka/rockbot-agent:0.9.15
     depends_on:
       rockbot-init:
         condition: service_completed_successfully
 
@@ -8,13 +8,22 @@ invoke capabilities by name; Foragent handles the browser mechanics.
 - `browser-task` — **generalist**, spec §5.2. LLM-in-the-loop planner that
   drives a real browser to accomplish a free-form intent. Shipped in
   step 6; step 7 added skills + memory priming (spec §5.6).
-- `fetch-page-title` — specialist. Inherited from step 1/2.
-- `extract-structured-data` — specialist. Inherited from step 3.
-
-The step-4 `post-to-site` capability was removed in step 7 — the
-generalist `browser-task` plus the seeded `sites/bsky.app/login` skill
-subsume its function, and the project is still pre-public so no consumer
-needed a deprecation path.
+- `learn-form-schema` — specialist (phase-1, spec §5.5). Introspects a
+  form and returns a typed `FormSchema` persisted at `sites/{host}/forms/{slug}`.
+- `execute-form-batch` — specialist (phase-3, spec §5.5). Submits rows
+  against a learned schema, streaming per-row progress over A2A.
+
+Three v0.1/v0.2 specialists have been removed as `browser-task` subsumes
+them. The project is pre-public so no deprecation path was required:
+
+- `post-to-site` — removed in step 7. `browser-task` + the seeded
+  `sites/bsky-app/login` skill cover the use case.
+- `fetch-page-title` — removed in step 9. Was a milestone-1 smoke
+  target; `browser-task` with a simple intent produces the same result.
+- `extract-structured-data` — removed in step 9. `browser-task` with a
+  "return JSON: {…}" intent produces the same result. Its typed input
+  shape also lacked the mandatory allowlist required by spec §7.1; the
+  generalist enforces that by design.
 
 ## `browser-task` input shape
 
@@ -60,7 +69,12 @@ A JSON object in a single text part:
 ```
 
 `incomplete` means the budget was exhausted before `done`/`fail` was
-called.
+called. For extraction-style tasks, instruct the planner to return JSON
+via the `result` field — e.g. intent `"Open https://shop.example/p/42
+and return {\"name\":..., \"price_usd\":...} as JSON in the result
+field."`. The planner is not schema-enforced the way
+`extract-structured-data` used to be, so keep the target shape explicit
+in the intent.
 
 ## `browser-task` tool surface
 
 
@@ -311,15 +311,15 @@ callers cheap — not to proliferate.
 | `browser-task` | Generalist | Given intent + optional URL, credential id, and allowed-hosts list, plan and drive the browser to fulfill the intent. Uses RockBot skills + memory as priming. Returns a result or a structured intermediate artifact (e.g. a learned form schema). |
 | `learn-form-schema` | Specialist (phase-1) | Given a URL and optional credential, introspect a form and return its schema — fields, types, dropdown dependencies, validation rules. Persists the schema as a skill (§5.6). Returns the schema to the caller for review. |
 | `execute-form-batch` | Specialist (phase-2) | Given a learned schema (by id or inline) and a batch of row data, submit the form once per row. Streams A2A progress updates. Handles partial failure. |
-| `fetch-page-title` | Specialist | Return the `<title>` of a URL. Inherited from milestone 2. |
-| `extract-structured-data` | Specialist | Extract structured data from a page matching a natural-language description. Inherited from milestone 3. |
 
-The v0.1 `post-to-site` capability ships in the main codebase as a
-regression test for credential handling. After step 7 it is removed
-from the advertised skill list; `browser-task` subsumes its function.
-
-The v0.1 `monitor-page` and `fill-form` capabilities fold into
-`browser-task` and do not ship as separate advertised skills.
+After step 9 the v0.2 surface is three skills. The v0.1 `post-to-site`
+capability was removed in step 7 once the seeded `bsky.app` skill +
+`browser-task` covered it; the v0.1 `fetch-page-title` and v0.1
+`extract-structured-data` specialists were removed in step 9 — the
+generalist subsumes both at the cost of 2–3× tokens per call, which
+is acceptable given zero deterministic high-volume callers today. The
+v0.1 `monitor-page` and `fill-form` capabilities fold into `browser-task`
+and do not ship as separate advertised skills either.
 
 ### 5.3 Capabilities explicitly out of scope (v1)
 
@@ -393,18 +393,21 @@ site knowledge, rather than building a Foragent-local store.
   `RockBot.Host.Abstractions` + `RockBot.Host.AgentMemoryExtensions.WithSkills()`).
   Stores site knowledge as markdown skills. Two origin categories:
   - **Human-authored skills** — operator-written primers for a site
-    (e.g. `sites/bsky.app/overview`). Treated as priming hints for the
+    (e.g. `sites/bsky-app/overview`). Treated as priming hints for the
     generalist planner.
   - **Agent-learned skills** — written by the generalist on successful
-    task completion (e.g. `sites/bsky.app/learned/login-flow`). Tagged
+    task completion (e.g. `sites/bsky-app/learned/login-flow`). Tagged
     with `metadata.source = "agent-learned"` and an importance score.
 - **`ILongTermMemory`** (file-backed, BM25 + semantic —
   `WithLongTermMemory()`). Declarative observations that don't fit the
   procedural skill shape: failed attempts, site-version notes, ambient
   facts.
 
 **Skill naming:** `sites/{host}/{phase-or-intent}` — e.g.
-`sites/bsky.app/login`, `sites/bsky.app/compose-post`. Hierarchical `/`
+`sites/bsky-app/login`, `sites/bsky-app/compose-post`. Host segments are
+sanitized (`.` → `-`) because RockBot 0.9's `FileSkillStore.ValidateName`
+rejects dots; `bsky.app` becomes `bsky-app`. Allowlists and memory
+categories keep the original dotted host. Hierarchical `/`
 nesting is supported by the store. `seeAlso` links cross-reference
 skills for the same site so retrieval surfaces a small knowledge
 cluster, not one skill at a time.
@@ -714,10 +717,15 @@ hard design questions until usage forces them.
    Resolve open question #6 (how to persist typed JSON alongside
    markdown skills) in the deliverable.
 
-9. **Deprecate subsumed specialists.** Review whether `fetch-page-title`
-   / `extract-structured-data` still pay their way or fold into
-   `browser-task` with equivalent cost. Land on the minimum advertised
-   capability set v0.2 actually needs.
+9. **Deprecate subsumed specialists.** Reviewed whether `fetch-page-title`
+   / `extract-structured-data` still paid their way vs. `browser-task` at
+   equivalent cost. Both removed: `fetch-page-title` was a milestone-1
+   smoke-test relic that `browser-task` subsumes trivially;
+   `extract-structured-data` was functionally equivalent to a `browser-task`
+   intent that asks for JSON in the `done.result` channel (cost delta
+   ~2–3× tokens per call, zero deterministic high-volume callers today),
+   and was out of spec on §7.1 mandatory allowlists. Advertised surface
+   lands at `browser-task` + `learn-form-schema` + `execute-form-batch`.
 
 Each milestone produces framework feedback. Capture it in
 `docs/framework-feedback.md` — some will be small ergonomic fixes; some
Original file line number	Diff line number	Diff line change
`@@ -2,7 +2,7 @@`
`2`	`2`	`{`
`3`	`3`	`"agentId": "Foragent",`
`4`	`4`	`"level": 4,`
`5`		`- "approvedSkills": ["browser-task", "fetch-page-title", "extract-structured-data"],`
	`5`	`+ "approvedSkills": ["browser-task", "learn-form-schema", "execute-form-batch"],`
`6`	`6`	`"firstSeen": "2026-04-21T00:00:00+00:00",`
`7`	`7`	`"lastInteraction": "2026-04-21T00:00:00+00:00",`
`8`	`8`	`"interactionCount": 0`
Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`[`
`2`	`2`	`{`
`3`	`3`	`"agentName": "Foragent",`
`4`		`- "description": "Browser agent — navigates pages with Chromium and exposes task-level skills over HTTP A2A.",`
	`4`	`+ "description": "Browser agent — navigates pages with Chromium and exposes task-level skills over HTTP A2A. All skills consume structured input on the A2A DataPart (invoke_agent 'data' parameter); the text 'message' is only for human-readable summaries.",`
`5`	`5`	`"version": "1.0",`
`6`	`6`	`"url": "http://foragent:8080",`
`7`	`7`	`"protocolVersion": "1.0",`
`@@ -11,17 +11,17 @@`
`11`	`11`	`{`
`12`	`12`	`"id": "browser-task",`
`13`	`13`	`"name": "Browser Task (generalist)",`
`14`		`- "description": "Drive a browser with an LLM-in-the-loop planner to accomplish a free-form intent. Input JSON {\"intent\":\"...\",\"allowedHosts\":[\"host\",\".host\",\"\"],\"url\":\"optional start\",\"credentialId\":\"optional\",\"maxSteps\":60,\"maxSeconds\":120}. allowedHosts is required and empty rejects. Returns a structured JSON result with status (done/failed/incomplete), summary, optional result, step count, and navigations."`
	`14`	+ "description": "Drive a browser with an LLM-in-the-loop planner to accomplish a free-form intent. PASS INPUT AS AN A2A DATA PART — populate invoke_agent's 'data' parameter with {\"intent\":\"...\",\"allowedHosts\":[\"host\",\".host\",\"\"],\"url\":\"optional start\",\"credentialId\":\"optional\",\"maxSteps\":60,\"maxSeconds\":120}. 'intent' and 'allowedHosts' are required; an empty allowlist rejects the task. Use [\"*\"] explicitly when any host is acceptable. Returns a structured JSON result with status (done/failed/incomplete), summary, optional result, step count, and navigations."
`15`	`15`	`},`
`16`	`16`	`{`
`17`		`- "id": "fetch-page-title",`
`18`		`- "name": "Fetch Page Title",`
`19`		`- "description": "Navigate to a URL with a real browser and return the contents of its <title> element."`
	`17`	`+ "id": "learn-form-schema",`
	`18`	`+ "name": "Learn Form Schema",`
	`19`	+ "description": "Navigate to a web form, extract its structure (fields, types, options, validation), and persist it as a reusable skill. PASS INPUT AS AN A2A DATA PART — populate invoke_agent's 'data' parameter with {\"url\":\"https://...\",\"allowedHosts\":[\"host\"],\"formSelector\":\"optional\",\"credentialId\":\"optional\",\"skillName\":\"optional override\",\"intent\":\"optional prose\"}. 'url' and 'allowedHosts' are required. Returns the typed form schema plus the skill name it was persisted under."
`20`	`20`	`},`
`21`	`21`	`{`
`22`		`- "id": "extract-structured-data",`
`23`		`- "name": "Extract Structured Data",`
`24`		`- "description": "Navigate to a URL and extract data matching a natural-language description, returning JSON. Input the target URL and a description of what to extract."`
	`22`	`+ "id": "execute-form-batch",`
	`23`	`+ "name": "Execute Form Batch",`
	`24`	+ "description": "Submit a batch of rows against a learned form schema. PASS INPUT AS AN A2A DATA PART — populate invoke_agent's 'data' parameter with {\"schemaRef\":\"sites/host/forms/name\" OR \"schema\":{...FormSchema...},\"rows\":[{fieldName:value,...}],\"allowedHosts\":[\"host\"],\"credentialId\":\"optional\",\"mode\":\"abort-on-first\"\|\"continue\",\"successIndicator\":\"optional CSS selector\"}. 'rows', 'allowedHosts', and exactly one of schemaRef/schema are required. Streams per-row progress. Default mode aborts on first failure."
`25`	`25`	`}`
`26`	`26`	`]`
`27`	`27`	`}`