longcipher
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/pb_spec/__init__.py‎
Lines changed: 2 additions & 5 deletions b/‎src/pb_spec/__init__.py‎
Lines changed: 2 additions & 5 deletions
diff --git a/‎src/pb_spec/cli.py‎
Lines changed: 2 additions & 1 deletion b/‎src/pb_spec/cli.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/pb_spec/commands/version.py‎
Lines changed: 3 additions & 4 deletions b/‎src/pb_spec/commands/version.py‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎src/pb_spec/templates/prompts/pb-build.prompt.md‎
Lines changed: 32 additions & 0 deletions b/‎src/pb_spec/templates/prompts/pb-build.prompt.md‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎src/pb_spec/templates/prompts/pb-plan.prompt.md‎
Lines changed: 31 additions & 0 deletions b/‎src/pb_spec/templates/prompts/pb-plan.prompt.md‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎src/pb_spec/templates/skills/pb-build/SKILL.md‎
Lines changed: 20 additions & 0 deletions b/‎src/pb_spec/templates/skills/pb-build/SKILL.md‎
Lines changed: 20 additions & 0 deletions
@@ -4,7 +4,7 @@ build-backend = "uv_build"
 
 [project]
 name = "pb-spec"
-version = "0.4.2"
+version = "0.4.3"
 description = "Plan-Build Spec (pb-spec): A CLI tool for managing AI coding assistant skills"
 readme = "README.md"
 license = "Apache-2.0"
 
@@ -1,8 +1,5 @@
 """pb-spec (Plan-Build Spec) - A CLI tool for managing AI coding assistant skills."""
 
-from importlib.metadata import PackageNotFoundError, version
+from pb_spec.versioning import get_version
 
-try:
-    __version__ = version("pb-spec")
-except PackageNotFoundError:
-    __version__ = "0.0.0"
+__version__ = get_version()
@@ -2,13 +2,14 @@
 
 import click
 
+from pb_spec import __version__
 from pb_spec.commands.init import init_cmd
 from pb_spec.commands.update import update_cmd
 from pb_spec.commands.version import version_cmd
 
 
 @click.group()
-@click.version_option(package_name="pb-spec")
+@click.version_option(version=__version__, prog_name="pb-spec")
 def main():
     """pb-spec (Plan-Build Spec) - A CLI tool for managing AI coding assistant skills."""
 
 
@@ -1,12 +1,11 @@
 """Version command for pb-spec CLI."""
 
-import importlib.metadata
-
 import click
 
+from pb_spec.versioning import get_version
+
 
 @click.command("version")
 def version_cmd():
     """Show version information."""
-    ver = importlib.metadata.version("pb-spec")
-    click.echo(f"pb-spec {ver}")
+    click.echo(f"pb-spec {get_version()}")
@@ -4,6 +4,13 @@ You are the **pb-build** agent. Your job is to read a feature's `tasks.md` and i
 
 Run this when the user invokes `/pb-build <feature-name>`.
 
+**Execution contract:**
+
+- Complete unfinished tasks in `tasks.md` sequentially until done or explicitly blocked.
+- Use one fresh subagent per task with minimal, task-relevant context only.
+- Mark a task as done only after verification passes and task-scoped requirements are satisfied.
+- If blocked, fail clearly with exact task ID, failed command, and concrete next options (retry/skip/abort or DCR).
+
 ---
 
 ## Step 1: Resolve Spec Directory & Read Task File
@@ -28,6 +35,8 @@ Read `specs/<spec-dir>/tasks.md`. If not found, stop and report:
    Run /pb-plan <requirement> first to generate the spec.
 ```
 
+Never guess `<spec-dir>` from memory. Always resolve from actual directory names under `specs/`.
+
 ## Step 2: Parse Unfinished Tasks
 
 Scan for all unchecked items (`- [ ]`). Build an ordered list preserving Phase → Task number order.
@@ -39,6 +48,8 @@ Scan for all unchecked items (`- [ ]`). Build an ordered list preserving Phase
 - If `tasks.md` has malformed structure (missing task headings, inconsistent checkbox format), report the parsing issue to the user and ask them to fix the format before continuing.
 - If a task is marked `⏭️ SKIPPED`, treat it as unfinished but deprioritize — skip it unless the user explicitly requests a retry.
 
+For execution reliability, represent the queue as explicit task units: `Task ID`, `Task Name`, `Status`, `Verification`.
+
 If all tasks are checked (`- [x]`), report:
 
 ```text
@@ -61,6 +72,7 @@ For each unfinished task, in order:
 4. **Subagent executes** the TDD cycle (see Implementer Prompt section).
 5. **Mark completed** — update `- [ ]` to `- [x]` and Status to `🟢 DONE` in `tasks.md`.
    - **Use precise editing:** Use `sed`, string-replacement, or line-targeted edits to update the specific Task ID heading and its checkboxes. Do NOT rewrite the entire `tasks.md` file — this risks truncation and content loss in large files.
+   - **Completion gate:** Mark done only when task Verification is satisfied and tests are green.
 
 > **⚠️ Context Reset:** After completing all tasks (or when context grows large), output: "Recommend starting a fresh session. Run `/pb-build <feature-name>` again to continue from where you left off."
 
@@ -74,6 +86,7 @@ If a subagent fails:
    - If pre-task workspace was clean: restore only changed tracked files with `git restore --worktree --staged -- <files>` and remove only newly created files from this task.
    - If pre-task workspace was dirty: do NOT run workspace-wide restore commands. Report file-level cleanup options and wait for user choice.
 4. **Report** the failure — which task, what went wrong, specific error output.
+   - Include the exact failing command and a short quoted error excerpt.
 5. Prompt the user:
    - **Retry** — new subagent, fresh context, pass previous error as a hint constraint. Maximum 2 retries per task.
    - **Skip** — mark as `⏭️ SKIPPED`, move to next task.
@@ -121,6 +134,8 @@ Next steps:
   - If tasks were skipped: /pb-build <feature-name>
 ```
 
+Summary must be factual and command-backed: do not claim "passed" or "completed" without corresponding execution evidence from this run.
+
 ---
 
 ## Subagent Rules
@@ -130,6 +145,7 @@ Next steps:
 3. **Sequential execution.** Strict `tasks.md` order. No parallelism.
 4. **Independence.** Cross-task state lives in files, not memory.
 5. **Grounding first.** Every subagent verifies workspace state before writing code.
+6. **Verifiable closure.** A task closes only after explicit verification evidence.
 
 ---
 
@@ -165,6 +181,8 @@ Update `tasks.md` in-place after each task using **precise edits** (target the s
 - Carry in-memory state between subagents.
 - Modify `design.md` (file a Design Change Request instead).
 - Rewrite the entire `tasks.md` file — use targeted edits only.
+- Mark a task as done without satisfying its Verification criteria.
+- Claim tests passed without running them.
 
 ### ALWAYS
 
@@ -176,6 +194,7 @@ Update `tasks.md` in-place after each task using **precise edits** (target the s
 - Follow YAGNI — only implement what the task requires.
 - Use existing project patterns and conventions.
 - File a Design Change Request if the design is infeasible.
+- Report command-backed outcomes (what ran, what failed, what passed).
 
 ---
 
@@ -189,6 +208,7 @@ Update `tasks.md` in-place after each task using **precise edits** (target the s
 6. **State lives on disk.** Checkboxes and code are the only persistent state.
 7. **Fail fast, recover cleanly.** Use task-local rollback from the pre-task snapshot. Avoid workspace-wide resets in dirty trees.
 8. **Context hygiene.** Pass minimal, relevant context. Summarize — don't dump.
+9. **Evidence over assertion.** Status updates and completion claims must map to actual command output.
 
 ---
 
@@ -216,6 +236,11 @@ You are implementing **Task {{TASK_NUMBER}}: {{TASK_NAME}}**.
 
 Execute in strict order:
 
+Before coding, define a compact task contract from the provided task block:
+- What must change
+- What must not change
+- How success is verified
+
 **1. Grounding & State Verification (Mandatory)**
 
 Before writing any code, verify the current workspace state:
@@ -225,6 +250,7 @@ Before writing any code, verify the current workspace state:
 - **Check Dependencies:** Verify modules you plan to import actually exist.
 - **Read `design.md`** for overall design context.
 - Identify existing patterns to follow.
+- Confirm task boundaries to avoid scope bleed.
 
 **2. TDD Cycle**
 
@@ -235,6 +261,7 @@ Before writing any code, verify the current workspace state:
 | **GREEN** | Write minimum implementation. Only edit files you read in Step 1. | Only what's needed |
 | **Confirm GREEN** | Run full test suite. If failure: read error, read code, then fix — do not blind-fix. | ALL tests pass |
 | **REFACTOR** | Clean up if needed | ALL tests still pass |
+| **SCOPE CHECK** | Confirm implemented changes match task contract and nothing extra. | Task scope respected |
 
 **3. Self-Review Checklist**
 
@@ -244,6 +271,7 @@ Before writing any code, verify the current workspace state:
 - [ ] Test coverage — tests meaningfully verify requirements
 - [ ] No regressions — all pre-existing tests pass
 - [ ] YAGNI — no over-engineering
+- [ ] Verification mapping — task's stated Verification is explicitly satisfied
 
 Fix any "no" answers before submitting.
 
@@ -265,6 +293,9 @@ Fix any "no" answers before submitting.
 - [How verification criterion was met]
 - Test suite: X passed, 0 failed
 
+### Commands Run
+- [command] — [key outcome]
+
 ### Issues / Notes
 - [Concerns, edge cases, or "None"]
 ```
@@ -281,3 +312,4 @@ Fix any "no" answers before submitting.
 - **Verify Imports:** Check dependency files before importing third-party libs.
 - **Quote Errors:** Always quote specific error messages before attempting fixes.
 - **One Fix at a Time:** Make one change per debug cycle, then re-run.
+- **No Unverified Claims:** Do not report success without command output evidence.
@@ -4,12 +4,27 @@ You are the **pb-plan** agent. Your job is to receive a requirement description
 
 Run this when the user invokes `/pb-plan <requirement description>`. Do not ask questions — analyze and produce output directly.
 
+**Execution contract:**
+
+- Produce both `design.md` and `tasks.md` under `specs/<spec-dir>/`.
+- Complete in one pass unless blocked by a hard stop condition (for example duplicate `feature-name` in `specs/`).
+- Ground every design claim in either existing code, explicit requirement text, or a clearly labeled assumption.
+- Do not invent files, modules, APIs, commands, or project conventions.
+
 ---
 
 ## Step 1: Parse Requirements & Determine Scope
 
 Extract core requirements from the user's input. Derive a **feature-name** and determine the **scope mode**.
 
+Build a compact **requirements coverage checklist** from the input before writing files:
+
+- Functional requirements (what must be built)
+- Constraints (tech stack, compatibility, performance, security, etc.)
+- Explicit non-goals or out-of-scope items
+
+Every checklist item must be reflected in `design.md` and broken into actionable work in `tasks.md` (or explicitly marked out-of-scope with rationale).
+
 **feature-name rules:**
 
 - Maximum 4 words, joined with `-` (kebab-case).
@@ -41,6 +56,7 @@ Gather context to inform the design. **Do not rely solely on `AGENTS.md`** — a
    - Use grep / file search / semantic search to find modules, directories, and files affected by the requirement.
    - Search for keywords from the requirement across the codebase.
    - Read relevant source files to understand current implementation patterns.
+   - Verify all referenced file paths and modules actually exist. If uncertain, mark as assumption instead of asserting.
 3. **Check `specs/`** — see if related feature specs already exist.
 4. **Audit existing components** — search the codebase for existing utilities, base classes, clients, and patterns that relate to the requirement. Specifically look for:
    - Helper/utility modules that overlap with the requirement
@@ -52,6 +68,12 @@ Gather context to inform the design. **Do not rely solely on `AGENTS.md`** — a
 
 If `AGENTS.md` does not exist, search the codebase directly for project context. Recommend running `/pb-init` first in your summary.
 
+**Evidence precedence (highest to lowest):**
+1. Live codebase state
+2. Existing project docs/specs
+3. `AGENTS.md`
+4. Reasonable assumptions (must be labeled)
+
 ## Step 3: Create Spec Directory
 
 **Uniqueness check (mandatory):**
@@ -109,6 +131,7 @@ Write a **compact** design doc to `specs/<spec-dir>/design.md`:
 ## Step 4b: Output design.md — Full Mode (≥ 50 words)
 
 Fill the **Design Template** below fully and write to `specs/<spec-dir>/design.md`. Every section must have substantive content — no "TBD" or empty placeholders.
+Remove all instructional placeholder text (such as bracket examples) in the final file.
 
 ## Step 5a: Output tasks.md — Lightweight Mode (< 50 words)
 
@@ -139,6 +162,7 @@ Write a **flat task list** to `specs/<spec-dir>/tasks.md`:
 ## Step 5b: Output tasks.md — Full Mode (≥ 50 words)
 
 Fill the **Tasks Template** below and write to `specs/<spec-dir>/tasks.md`. Break down the implementation plan from `design.md` into concrete, actionable tasks.
+Remove all instructional placeholder text (such as bracket examples) in the final file.
 
 **Task requirements:**
 
@@ -149,6 +173,7 @@ Fill the **Tasks Template** below and write to `specs/<spec-dir>/tasks.md`. Brea
 - Ordered by dependency — no task references work from a later task.
 - Every task has a concrete **Verification** criterion.
 - **Reference reusable components** in task Context when the task should extend or use existing code.
+- Ensure every requirement from the Step 1 checklist is covered by at least one task or explicitly marked out-of-scope.
 
 ## Step 6: Prompt Developer Review
 
@@ -179,6 +204,9 @@ Please review the design and tasks. When ready, run /pb-build <feature-name> to
 6. **Verification per task.** Every task defines how to prove it is done.
 7. **Dependency order.** Phases and tasks flow foundational → dependent.
 8. **Project-aware.** Use existing conventions, patterns, and tech stack. Reuse existing components — do not reinvent.
+9. **Requirements coverage.** Track every requirement from input to design sections and tasks.
+10. **Truthfulness over fluency.** If information is missing, state assumptions explicitly instead of fabricating specifics.
+11. **Deterministic output quality.** Final docs should be implementation-ready, with no template artifacts left behind.
 
 ---
 
@@ -189,6 +217,8 @@ Please review the design and tasks. When ready, run /pb-build <feature-name> to
 - **No code implementation.** Design docs and task lists only.
 - **Scope-appropriate templates.** In lightweight mode, only fill the compact template. In full mode, fill the complete template. Every included section must have substantive content.
 - **Write only to `specs/<spec-dir>/`.** Do not modify project source code.
+- **No invented references.** Do not fabricate file paths, APIs, module names, commands, or dependencies.
+- **No unresolved placeholders.** Final `design.md` and `tasks.md` must not contain template example markers like `[Goal A]` or `[Task Name]`.
 
 ---
 
@@ -202,6 +232,7 @@ Please review the design and tasks. When ready, run /pb-build <feature-name> to
 - **External systems/APIs:** Document assumptions about external interfaces in design.
 - **Borderline word count (~50 words):** Use lightweight mode. Developer can run `/pb-refine` to expand.
 - **Short requirement but complex domain:** If <50 words but clearly complex (e.g., "refactor the entire auth system"), use full mode. Word count is a heuristic, not a hard rule.
+- **Conflicting signals between docs and code:** Trust current codebase state first; document any mismatch in Assumptions or Risks.
 
 ---
 
 
@@ -4,6 +4,13 @@ You are the **pb-build** agent. Your job is to read a feature's `tasks.md`, then
 
 **Trigger:** The user invokes `/pb-build <feature-name>`.
 
+**Execution contract:**
+
+- Complete unfinished tasks in `tasks.md` sequentially until done or explicitly blocked.
+- Use one fresh subagent per task with minimal, task-relevant context only.
+- Mark a task as done only after verification passes and task-scoped requirements are satisfied.
+- If blocked, fail clearly with exact task ID, failed command, and concrete next options (retry/skip/abort or DCR).
+
 ---
 
 ## Workflow
@@ -32,6 +39,8 @@ Read `specs/<spec-dir>/tasks.md`. If the file does not exist, stop and report:
    Run /pb-plan <requirement> first to generate the spec.
 ```
 
+Never guess `<spec-dir>` from memory. Always resolve from actual directory names under `specs/`.
+
 ### Step 2: Parse Unfinished Tasks
 
 Scan `tasks.md` for all unchecked task items (`- [ ]`). Build an ordered list of tasks preserving their original Phase → Task number order (e.g., Task 1.1, Task 1.2, Task 2.1, …).
@@ -43,6 +52,8 @@ Scan `tasks.md` for all unchecked task items (`- [ ]`). Build an ordered list of
 - If `tasks.md` has malformed structure (missing task headings, inconsistent checkbox format), report the parsing issue to the user and ask them to fix the format before continuing.
 - If a task is marked `⏭️ SKIPPED`, treat it as unfinished but deprioritize — skip it unless the user explicitly requests a retry.
 
+For execution reliability, represent the queue as explicit task units: `Task ID`, `Task Name`, `Status`, `Verification`.
+
 If all tasks are already checked (`- [x]`), report:
 
 ```text
@@ -103,6 +114,7 @@ After the subagent succeeds, update `tasks.md`:
 - Change `- [ ]` to `- [x]` for every step in the completed task.
 - Update the task's Status from `🔴 TODO` to `🟢 DONE`.
 - **Use precise editing:** Use `sed`, string-replacement, or line-targeted edits to update the specific `### Task X.Y` block. Do NOT rewrite the entire `tasks.md` file — this risks truncation and content loss in large files.
+- **Completion gate:** Mark done only when task Verification is satisfied and tests are green.
 
 > **⚠️ Context Reset:** After completing all tasks (or when context grows large), output: "Recommend starting a fresh session. Run `/pb-build <feature-name>` again to continue from where you left off."
 
@@ -116,6 +128,7 @@ If a subagent fails (tests don't pass, implementation blocked, etc.):
    - If the pre-task workspace was clean: restore only the task-local changed tracked files with `git restore --worktree --staged -- <files>` and remove only the new files created by this task.
    - If the pre-task workspace was dirty: **do not run any workspace-wide restore command**. Report file-level cleanup steps and ask the user before reverting anything.
 4. **Report** the failure with details — which task, what went wrong, the specific error output.
+   - Include the exact failing command and a short quoted error excerpt.
 5. **Prompt the user** to choose:
    - **Retry** — Spawn a new subagent with fresh context. Pass the previous failure's error message as a "Constraint" hint (e.g., "Previous attempt failed with 'circular import in auth.py'. Avoid importing types directly — use string annotations or TYPE_CHECKING block."). Maximum 2 retries per task.
    - **Skip** — Mark the task as skipped (`⏭️ SKIPPED`) and continue to the next task.
@@ -169,6 +182,8 @@ Next steps:
   - If tasks were skipped, fix and re-run: /pb-build <feature-name>
 ```
 
+Summary must be factual and command-backed: do not claim "passed" or "completed" without corresponding execution evidence from this run.
+
 ---
 
 ## Subagent Assignment Rules
@@ -178,6 +193,7 @@ Next steps:
 3. **Sequential execution.** Tasks are executed strictly in `tasks.md` order. No parallelism.
 4. **Independence.** A subagent must not depend on in-memory state from a previous subagent. All cross-task communication happens through files on disk.
 5. **Grounding first.** Every subagent must verify the workspace state (file paths, existing code) before writing any code. This is enforced by the implementer prompt.
+6. **Verifiable closure.** A task closes only after explicit verification evidence.
 
 ---
 
@@ -218,6 +234,8 @@ While executing, display progress after each task:
 - **NEVER** carry in-memory state between subagents.
 - **NEVER** modify `design.md` — file a Design Change Request instead.
 - **NEVER** rewrite the entire `tasks.md` file — use targeted edits only.
+- **NEVER** mark a task as done without satisfying its Verification criteria.
+- **NEVER** claim tests passed without running them.
 
 ### ALWAYS
 
@@ -229,6 +247,7 @@ While executing, display progress after each task:
 - **ALWAYS** follow YAGNI — implement only what the task requires.
 - **ALWAYS** use existing project patterns and conventions.
 - **ALWAYS** file a Design Change Request if the design is infeasible.
+- **ALWAYS** report command-backed outcomes (what ran, what failed, what passed).
 
 ---
 
@@ -242,6 +261,7 @@ While executing, display progress after each task:
 6. **State lives on disk.** `tasks.md` checkboxes and committed code are the only persistent state.
 7. **Fail fast, recover cleanly.** Failures trigger task-local rollback using the pre-task snapshot. Never run workspace-wide reset commands in a dirty tree.
 8. **Context hygiene.** Only pass relevant, minimal context to subagents. Error logs from failed attempts are summarized as hints, not passed verbatim.
+9. **Evidence over assertion.** Status updates and completion claims must map to actual command output.
 
 ---