longcipher
diff --git a/‎.github/prompts/pb-build.prompt.md‎
Lines changed: 9 additions & 6 deletions b/‎.github/prompts/pb-build.prompt.md‎
Lines changed: 9 additions & 6 deletions
diff --git a/‎.github/prompts/pb-plan.prompt.md‎
Lines changed: 0 additions & 4 deletions b/‎.github/prompts/pb-plan.prompt.md‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎.github/prompts/pb-refine.prompt.md‎
Lines changed: 5 additions & 5 deletions b/‎.github/prompts/pb-refine.prompt.md‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎.opencode/skills/pb-build/SKILL.md‎
Lines changed: 11 additions & 6 deletions b/‎.opencode/skills/pb-build/SKILL.md‎
Lines changed: 11 additions & 6 deletions
diff --git a/‎.opencode/skills/pb-build/references/implementer_prompt.md‎
Lines changed: 1 addition & 4 deletions b/‎.opencode/skills/pb-build/references/implementer_prompt.md‎
Lines changed: 1 addition & 4 deletions
diff --git a/‎.opencode/skills/pb-init/SKILL.md‎
Lines changed: 1 addition & 1 deletion b/‎.opencode/skills/pb-init/SKILL.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.opencode/skills/pb-plan/SKILL.md‎
Lines changed: 1 addition & 1 deletion b/‎.opencode/skills/pb-plan/SKILL.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.opencode/skills/pb-refine/SKILL.md‎
Lines changed: 6 additions & 6 deletions b/‎.opencode/skills/pb-refine/SKILL.md‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎Cargo.toml‎
Lines changed: 2 additions & 0 deletions b/‎Cargo.toml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎agent.toml‎
Lines changed: 18 additions & 0 deletions b/‎agent.toml‎
Lines changed: 18 additions & 0 deletions
@@ -51,6 +51,7 @@ For each unfinished task, in order:
 
 1. **Extract** the full task block (Context, Steps, Verification).
 2. **Gather context** — read `design.md` and `AGENTS.md`.
+   - Record a pre-task workspace snapshot (`git status --porcelain` + tracked/untracked file lists) for safe rollback.
 3. **Spawn a fresh subagent** with the Implementer Prompt (below), filled in with the task content and project context.
    **Context Hygiene:** Do NOT pass the entire chat history. Pass ONLY:
    - The specific Task Description from `tasks.md`.
@@ -68,9 +69,12 @@ For each unfinished task, in order:
 If a subagent fails:
 
 1. **Analyze the diff:** Run `git diff` to see what the failed agent changed.
-2. **Revert the workspace:** Run `git checkout .` to reset to the last known-good state (Harness Reset).
-3. **Report** the failure — which task, what went wrong, specific error output.
-4. Prompt the user:
+2. **Compute task-local change set:** Compare against the pre-task snapshot to identify only files changed by this failed attempt.
+3. **Safe recovery (file-scoped):**
+   - If pre-task workspace was clean: restore only changed tracked files with `git restore --worktree --staged -- <files>` and remove only newly created files from this task.
+   - If pre-task workspace was dirty: do NOT run workspace-wide restore commands. Report file-level cleanup options and wait for user choice.
+4. **Report** the failure — which task, what went wrong, specific error output.
+5. Prompt the user:
    - **Retry** — new subagent, fresh context, pass previous error as a hint constraint. Maximum 2 retries per task.
    - **Skip** — mark as `⏭️ SKIPPED`, move to next task.
    - **Abort** — stop the build, report progress so far.
@@ -165,6 +169,7 @@ Update `tasks.md` in-place after each task using **precise edits** (target the s
 ### ALWAYS
 
 - Mark completed tasks in `tasks.md` immediately.
+- Capture a pre-task workspace snapshot before spawning subagents.
 - Self-review before submitting each task.
 - Run full test suite after each task.
 - Report failures with retry/skip/abort options.
@@ -182,13 +187,11 @@ Update `tasks.md` in-place after each task using **precise edits** (target the s
 4. **Grounding before action.** Verify workspace state before writing code.
 5. **Self-review catches over-engineering.** Audit before submit.
 6. **State lives on disk.** Checkboxes and code are the only persistent state.
-7. **Fail fast, recover cleanly.** Revert workspace before retry. Each attempt starts from a known-good state.
+7. **Fail fast, recover cleanly.** Use task-local rollback from the pre-task snapshot. Avoid workspace-wide resets in dirty trees.
 8. **Context hygiene.** Pass minimal, relevant context. Summarize — don't dump.
 
 ---
 
----
-
 ## IMPLEMENTER PROMPT TEMPLATE
 
 > This is the instruction template passed to each subagent. Fill in the `{{placeholders}}` with actual values per task.
 
@@ -205,8 +205,6 @@ Please review the design and tasks. When ready, run /pb-build <feature-name> to
 
 ---
 
----
-
 ## DESIGN TEMPLATE
 
 > Fill this template and write to `specs/<spec-dir>/design.md`.
@@ -359,8 +357,6 @@ Please review the design and tasks. When ready, run /pb-build <feature-name> to
 
 ---
 
----
-
 ## TASKS TEMPLATE
 
 > Fill this template and write to `specs/<spec-dir>/tasks.md`.
 
@@ -1,4 +1,3 @@
-````prompt
 # pb-refine — Design & Plan Refinement
 
 You are the **pb-refine** agent. Your job is to read user feedback on an existing spec (`design.md` and/or `tasks.md`) and update them accordingly. This closes the gap between one-shot planning and iterative refinement.
@@ -10,12 +9,14 @@ Run this when the user invokes `/pb-refine <feature-name>` with feedback or chan
 ## Step 1: Resolve Spec Directory & Load Existing Spec
 
 **Resolve `<feature-name>` → `<spec-dir>`:**
+
 1. List all directories under `specs/`.
 2. Find the directory whose name ends with `-<feature-name>` (e.g., `2026-02-15-01-add-websocket-auth` for feature-name `add-websocket-auth`).
 3. If exactly one match is found, use it as `<spec-dir>`. All `specs/<spec-dir>/` paths below refer to this resolved directory.
 4. If multiple matches exist, use the most recent one (latest date prefix).
 5. If no match is found, stop and report:
-   ```
+
+   ```text
    ❌ No spec directory found for feature "<feature-name>" in specs/.
       Run /pb-plan <requirement> first to generate the spec.
    ```
@@ -37,6 +38,7 @@ The user's feedback may include:
 - **General feedback** — "this approach won't work because..." or "we should use X instead of Y".
 
 Categorize the feedback into:
+
 1. **Design changes** — modifications to `design.md`.
 2. **Task changes** — modifications to `tasks.md`.
 3. **Both** — changes that affect design and cascade to tasks.
@@ -82,7 +84,7 @@ After making changes, verify:
 
 ## Step 6: Output Summary
 
-```
+```text
 🔄 Spec refined: specs/<spec-dir>/
 
 Changes to design.md:
@@ -131,5 +133,3 @@ Next steps:
 - **Feedback invalidates completed tasks:** Flag this in the summary as a warning. Do not automatically undo completed tasks.
 - **Feedback requires entirely new design:** Recommend the user run `/pb-plan <feature-name>` instead with the new requirements. Only use `/pb-refine` for incremental changes.
 - **Multiple conflicting feedback items:** Apply them in the order given. Note conflicts in the Revision History.
-
-````
@@ -1,6 +1,6 @@
 ---
 name: pb-build
-description: "Subagent-Driven Implementation"
+description: "Use when tasks.md is ready and you need sequential TDD implementation with recovery loops."
 ---
 
 # pb-build — Subagent-Driven Implementation
@@ -67,6 +67,7 @@ Extract the full task block from `tasks.md` — including Context, Steps, and Ve
 - Read `specs/<spec-dir>/design.md` for design context.
 - Read `AGENTS.md` (if it exists) for project conventions.
 - Identify files most relevant to this task.
+- Record a pre-task workspace snapshot (`git status --porcelain` + tracked/untracked file lists). This baseline is used for safe recovery if the task fails.
 
 #### 3c. Spawn Subagent
 
@@ -115,14 +116,17 @@ After the subagent succeeds, update `tasks.md`:
 If a subagent fails (tests don't pass, implementation blocked, etc.):
 
 1. **Analyze the diff:** Run `git diff` to see exactly what the failed agent changed. Understanding the attempted approach is essential before retrying.
-2. **Revert the workspace:** Run `git checkout .` to clean the workspace back to the last known-good state. This is the "Harness Reset" — it prevents broken code from one attempt polluting the next.
-3. **Report** the failure with details — which task, what went wrong, the specific error output.
-4. **Prompt the user** to choose:
+2. **Compute task-local change set:** Compare with the pre-task snapshot to identify only files changed by this failed attempt (tracked diffs + newly created untracked files).
+3. **Safe recovery (file-scoped):**
+   - If the pre-task workspace was clean: restore only the task-local changed tracked files with `git restore --worktree --staged -- <files>` and remove only the new files created by this task.
+   - If the pre-task workspace was dirty: **do not run any workspace-wide restore command**. Report file-level cleanup steps and ask the user before reverting anything.
+4. **Report** the failure with details — which task, what went wrong, the specific error output.
+5. **Prompt the user** to choose:
    - **Retry** — Spawn a new subagent with fresh context. Pass the previous failure's error message as a "Constraint" hint (e.g., "Previous attempt failed with 'circular import in auth.py'. Avoid importing types directly — use string annotations or TYPE_CHECKING block."). Maximum 2 retries per task.
    - **Skip** — Mark the task as skipped (`⏭️ SKIPPED`) and continue to the next task.
    - **Abort** — Stop the entire build. Report progress so far.
 
-> **Why revert before retry:** If the failed agent left partially-written code, a new agent may try to build on top of broken foundations. A clean revert ensures each retry starts from a known-good state — this is the core principle of an observable, resettable harness.
+> **Why file-scoped recovery before retry:** Failed attempts can leave broken partial edits, but global resets can wipe unrelated in-progress work. Task-local rollback preserves harness reliability without destroying user state.
 
 #### Design Change Requests (DCR)
 
@@ -223,6 +227,7 @@ While executing, display progress after each task:
 ### ALWAYS
 
 - **ALWAYS** mark completed tasks in `tasks.md` immediately after success.
+- **ALWAYS** capture a pre-task workspace snapshot before spawning a subagent.
 - **ALWAYS** self-review before submitting a task's work.
 - **ALWAYS** run the full test suite after each task to catch regressions.
 - **ALWAYS** report failures clearly with actionable options (retry/skip/abort).
@@ -240,7 +245,7 @@ While executing, display progress after each task:
 4. **Grounding before action.** Every subagent verifies workspace state before writing code — preventing path hallucination and stale assumptions.
 5. **Self-review catches over-engineering.** Every subagent audits its own work before submitting.
 6. **State lives on disk.** `tasks.md` checkboxes and committed code are the only persistent state.
-7. **Fail fast, recover cleanly.** Failures trigger workspace revert (`git checkout .`) before retry — ensuring each attempt starts from a known-good state.
+7. **Fail fast, recover cleanly.** Failures trigger task-local rollback using the pre-task snapshot. Never run workspace-wide reset commands in a dirty tree.
 8. **Context hygiene.** Only pass relevant, minimal context to subagents. Error logs from failed attempts are summarized as hints, not passed verbatim.
 
 ---
 
@@ -33,7 +33,7 @@ Execute the following steps in strict order. **You must output your reasoning fo
 3. **Check Dependencies:** Verify that any modules you plan to import actually exist. Check `pyproject.toml`, `package.json`, `Cargo.toml`, or equivalent before importing third-party libraries.
 4. **Confirm Test Infrastructure:** Verify the test directory exists and check how existing tests are structured (test runner, naming conventions, fixture patterns).
 
-> **Why this step is mandatory:** Long-running agents are prone to "path hallucination" — assuming files exist at locations they don't oratethat code has a structure it doesn't. This grounding step synchronizes your mental model with the actual workspace state.
+> **Why this step is mandatory:** Long-running agents are prone to "path hallucination" — assuming files exist at locations they don't or that code has a structure it doesn't. This grounding step synchronizes your mental model with the actual workspace state.
 
 ### 2. TDD Cycle
 
@@ -149,6 +149,3 @@ These rules act as your safety harness — they prevent common failure modes in
 4. **Quote Errors:** When a test or command fails, always quote the specific error message in your reasoning before attempting a fix.
 5. **One Fix at a Time:** When debugging a failure, make exactly one change, then re-run. Do not stack multiple speculative fixes.
 6. **Path Verification:** Never hardcode or assume file paths. Use `ls`, `find`, or file search to confirm paths before using them.
-
-````text
-```
@@ -1,6 +1,6 @@
 ---
 name: pb-init
-description: "Project State Initialization"
+description: "Use when onboarding a repo or after major structural changes to regenerate AGENTS.md project context."
 ---
 
 # pb-init — Project Initialization
 
@@ -1,6 +1,6 @@
 ---
 name: pb-plan
-description: "Design & Task Planning"
+description: "Use when converting a requirement into a design proposal and executable tasks before coding."
 ---
 
 # pb-plan — Design & Task Planning
 
@@ -1,9 +1,8 @@
 ---
 name: pb-refine
-description: "Design & Plan Refinement"
+description: "Use when feedback or a Design Change Request requires incremental updates to design.md and tasks.md."
 ---
 
-````skill
 # pb-refine — Design & Plan Refinement
 
 You are the **pb-refine** agent. Your job is to read user feedback on an existing spec (`design.md` and/or `tasks.md`) and update them accordingly. This closes the gap between one-shot planning and iterative refinement.
@@ -19,12 +18,14 @@ Execute the following steps in order.
 ### Step 1: Resolve Spec Directory & Load Existing Spec
 
 **Resolve `<feature-name>` → `<spec-dir>`:**
+
 1. List all directories under `specs/`.
 2. Find the directory whose name ends with `-<feature-name>` (e.g., `2026-02-15-01-add-websocket-auth` for feature-name `add-websocket-auth`).
 3. If exactly one match is found, use it as `<spec-dir>`. All `specs/<spec-dir>/` paths below refer to this resolved directory.
 4. If multiple matches exist, use the most recent one (latest date prefix).
 5. If no match is found, stop and report:
-   ```
+
+   ```text
    ❌ No spec directory found for feature "<feature-name>" in specs/.
       Run /pb-plan <requirement> first to generate the spec.
    ```
@@ -46,6 +47,7 @@ The user's feedback may include:
 - **General feedback** — "this approach won't work because..." or "we should use X instead of Y".
 
 Categorize the feedback into:
+
 1. **Design changes** — modifications to `design.md`.
 2. **Task changes** — modifications to `tasks.md`.
 3. **Both** — changes that affect design and cascade to tasks.
@@ -91,7 +93,7 @@ After making changes, verify:
 
 ### Step 6: Output Summary
 
-```
+```text
 🔄 Spec refined: specs/<spec-dir>/
 
 Changes to design.md:
@@ -140,5 +142,3 @@ Next steps:
 - **Feedback invalidates completed tasks:** Flag this in the summary as a warning. Do not automatically undo completed tasks.
 - **Feedback requires entirely new design:** Recommend the user run `/pb-plan <feature-name>` instead.
 - **Multiple conflicting feedback items:** Apply them in the order given. Note conflicts in the Revision History.
-
-````
@@ -14,6 +14,7 @@ resolver = "3"
 agent-skills = "0.2"
 async-trait = "0.1"
 futures-core = "0.3"
+futures-util = "0.3"
 genai = "=0.6.0-beta.1"
 rmcp = { version = "0.16", features = [
     "client",
@@ -31,6 +32,7 @@ tokio = { version = "1", features = [
     "time",
     "process",
 ] }
+tokio-stream = "0.1"
 tokio-util = "0.7"
 tracing = "0.1.44"
 tracing-subscriber = "0.3.22"
 
@@ -15,6 +15,9 @@ max_steps = 12
 # Turn timeout in milliseconds (default: 90000 = 90s).
 turn_timeout_ms = 90000
 
+# Optional context window used for ratio-based skills budgets.
+# model_context_tokens = 128000
+
 # ── LLM Settings (optional, reserved for v2) ────────────────────────
 
 # [llm]
@@ -25,6 +28,7 @@ turn_timeout_ms = 90000
 
 # [policy]
 # deny_tools = []
+# allow_tools = ["local/read_file"]
 
 # ── MCP Servers (optional) ───────────────────────────────────────────
 # Uncomment and configure to enable tool use via MCP.
@@ -35,3 +39,17 @@ turn_timeout_ms = 90000
 # command = "npx"
 # args = ["-y", "@modelcontextprotocol/server-filesystem", "."]
 # tool_timeout_ms = 15000
+
+# ── Skills Sources (optional) ───────────────────────────────────────
+# When configured, Bob loads skill directories and injects selected
+# skill instructions into each turn based on user input.
+
+# [skills]
+# max_selected = 3
+# token_budget_tokens = 1800
+# token_budget_ratio = 0.10
+#
+# [[skills.sources]]
+# type = "directory"
+# path = "./skills"
+# recursive = false