diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 98e77d35..6c092245 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -12,7 +12,7 @@ "name": "PACT", "source": "./pact-plugin", "description": "Orchestration harness that turns Claude Code into a coordinated team of specialist AI agents", - "version": "4.4.40", + "version": "4.4.41", "author": { "name": "Synaptic-Labs-AI" }, diff --git a/README.md b/README.md index 7acca236..2072bfed 100644 --- a/README.md +++ b/README.md @@ -605,7 +605,7 @@ When installed as a plugin, PACT lives in your plugin cache: │ └── cache/ │ └── pact-plugin/ │ └── PACT/ -│ └── 4.4.40/ # Plugin version +│ └── 4.4.41/ # Plugin version │ ├── agents/ │ ├── commands/ │ ├── skills/ diff --git a/pact-plugin/.claude-plugin/plugin.json b/pact-plugin/.claude-plugin/plugin.json index c76018b4..884b7817 100644 --- a/pact-plugin/.claude-plugin/plugin.json +++ b/pact-plugin/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "PACT", - "version": "4.4.40", + "version": "4.4.41", "description": "Orchestration harness that turns Claude Code into a coordinated team of specialist AI agents", "author": { "name": "Synaptic-Labs-AI", diff --git a/pact-plugin/README.md b/pact-plugin/README.md index 74354666..d823da02 100644 --- a/pact-plugin/README.md +++ b/pact-plugin/README.md @@ -1,6 +1,6 @@ # PACT — Orchestration Harness for Claude Code -> **Version**: 4.4.40 +> **Version**: 4.4.41 Turn a single Claude Code session into a managed team of specialist AI agents that prepare, design, build, and test your code systematically. diff --git a/pact-plugin/commands/comPACT.md b/pact-plugin/commands/comPACT.md index cef8f9c3..b55365c3 100644 --- a/pact-plugin/commands/comPACT.md +++ b/pact-plugin/commands/comPACT.md @@ -15,7 +15,7 @@ Dispatch concurrent specialists for this self-contained task: $ARGUMENTS Create a simpler Task hierarchy than full orchestrate: ``` -1. `TaskCreate`: Feature task "{verb} {feature}" (self-contained task) +1. `TaskCreate`: Feature task "{verb} {feature}" (self-contained task) — stamp metadata.variety.total (see below) 2. `TaskUpdate`: Feature task status = "in_progress" 3. Analyze: How many agents needed? 4. `TaskCreate`: Agent task(s) — direct children of feature @@ -29,6 +29,12 @@ Create a simpler Task hierarchy than full orchestrate: > Steps 8-10 are detailed in the [After Specialist Completes](#after-specialist-completes) section below (includes test verification and commit steps). +> **Feature-task variety stamp (step 1).** Stamp the feature-level variety total on the feature `TaskCreate`, so the wrap-up Orchestration Retrospective can compute feature-vs-dispatch divergence instead of `feature_variety_missing`: +> ```python +> TaskCreate(subject="{verb} {feature}", metadata={"variety": {"total": N}}) # N = feature-level total, 4-16 +> ``` +> Advisory, not enforced — the feature task has no teachback-gated sibling, so the dispatch-boundary gate cannot apply. The `.total` field is the load-bearing input `compute_variety_divergence` reads (the full D11 4-rationale block is fine too, but `.total` is the minimum). Mirrors what `orchestrate.md` already persists for its feature task. + **Example structure:** ``` [Feature] "Fix 3 backend bugs" (blockedBy: agent1, agent2, agent3) diff --git a/pact-plugin/commands/plan-mode.md b/pact-plugin/commands/plan-mode.md index 86c1545c..85307f91 100644 --- a/pact-plugin/commands/plan-mode.md +++ b/pact-plugin/commands/plan-mode.md @@ -208,7 +208,23 @@ A_id = TaskCreate( "Mission for Task B: the primary-work task assigned to you in your TaskList (the work task, NOT this TEACHBACK gate task), identified by its subject (the '{role}: {mission}' pattern). Claim it after this teachback is accepted." ) TaskUpdate(A_id, owner="{specialist-name}") -B_id = TaskCreate(subject="{specialist}: plan consultation for {feature}", description="") +B_id = TaskCreate( + subject="{specialist}: plan consultation for {feature}", + description="", + metadata={ + "variety": { + "novelty": N, + "novelty_rationale": "<1-sentence: why this score for THIS dispatch's novelty>", + "scope": N, + "scope_rationale": "<1-sentence: why this score for THIS dispatch's scope>", + "uncertainty": N, + "uncertainty_rationale": "<1-sentence: why this score for THIS dispatch's uncertainty>", + "risk": N, + "risk_rationale": "<1-sentence: why this score for THIS dispatch's risk>", + "total": N + } + } +) TaskUpdate(B_id, owner="{specialist-name}", addBlockedBy=[A_id]) TaskUpdate(A_id, addBlocks=[B_id]) ``` diff --git a/pact-plugin/commands/rePACT.md b/pact-plugin/commands/rePACT.md index 6a647675..3edfdb71 100644 --- a/pact-plugin/commands/rePACT.md +++ b/pact-plugin/commands/rePACT.md @@ -236,7 +236,23 @@ A_id = TaskCreate( "Mission for Task B: the primary-work task assigned to you in your TaskList (the work task, NOT this TEACHBACK gate task), identified by its subject (the '{role}: {mission}' pattern). Claim it after this teachback is accepted." ) TaskUpdate(A_id, owner="{scope-prefixed-name}") -B_id = TaskCreate(subject="{scope-prefixed-name}: implement {sub-task}", description="\n\nFIRST claim this task (TaskUpdate status=in_progress) before any implementation tool-use — it is pre-assigned to you but still pending; you flip it, not the lead.") +B_id = TaskCreate( + subject="{scope-prefixed-name}: implement {sub-task}", + description="\n\nFIRST claim this task (TaskUpdate status=in_progress) before any implementation tool-use — it is pre-assigned to you but still pending; you flip it, not the lead.", + metadata={ + "variety": { + "novelty": N, + "novelty_rationale": "<1-sentence: why this score for THIS dispatch's novelty>", + "scope": N, + "scope_rationale": "<1-sentence: why this score for THIS dispatch's scope>", + "uncertainty": N, + "uncertainty_rationale": "<1-sentence: why this score for THIS dispatch's uncertainty>", + "risk": N, + "risk_rationale": "<1-sentence: why this score for THIS dispatch's risk>", + "total": N + } + } +) TaskUpdate(B_id, owner="{scope-prefixed-name}", addBlockedBy=[A_id]) TaskUpdate(A_id, addBlocks=[B_id]) ``` diff --git a/pact-plugin/hooks/dispatch_gate.py b/pact-plugin/hooks/dispatch_gate.py index 475e3ec7..83bf883a 100644 --- a/pact-plugin/hooks/dispatch_gate.py +++ b/pact-plugin/hooks/dispatch_gate.py @@ -198,10 +198,13 @@ def _emit_load_failure_deny(stage: str, error: BaseException) -> NoReturn: # heuristic is muted. # Unknown values fall back to ``"warn"`` so a typo never disables the # gate's other rules. Default ``"warn"`` preserves Commit 2 behavior. +# The read is normalized with .strip().lower() BEFORE the membership check +# (``"DENY"`` / ``" deny "`` → deny; ``""`` / bogus → warn), parsing +# identically to handoff_ordering_gate.py's PACT_DISPATCH_VARIETY_MODE knob. _ALLOWED_INLINE_MISSION_MODES = frozenset({"warn", "deny", "shadow"}) INLINE_MISSION_MODE = os.environ.get( "PACT_DISPATCH_INLINE_MISSION_MODE", "warn", -) +).strip().lower() if INLINE_MISSION_MODE not in _ALLOWED_INLINE_MISSION_MODES: INLINE_MISSION_MODE = "warn" diff --git a/pact-plugin/hooks/handoff_ordering_gate.py b/pact-plugin/hooks/handoff_ordering_gate.py index 67ea592e..4ccb9a88 100644 --- a/pact-plugin/hooks/handoff_ordering_gate.py +++ b/pact-plugin/hooks/handoff_ordering_gate.py @@ -1,11 +1,18 @@ #!/usr/bin/env python3 """ Location: pact-plugin/hooks/handoff_ordering_gate.py -Summary: PreToolUse hook (matcher="TaskUpdate") that WARNS the lead when a +Summary: PreToolUse hook (matcher="TaskUpdate") with TWO independent branches: + (1) #956 completion-ordering nudge — WARNS the lead when a TaskUpdate(status="completed") lands on a HANDOFF-expecting task whose - metadata.handoff is not yet present on disk — the #956 write-after- - completion ordering mistake. Advisory only (additionalContext); NEVER + metadata.handoff is not yet present on disk. Advisory only; NEVER denies. + (2) #865 dispatch-variety gate — fires when a terminal dispatch-wiring + TaskUpdate (owner resolves to a pact-specialist agentType AND + addBlockedBy in the SAME tool_input) links + a Task B that carries no resolvable metadata.variety. Deterministic + STRONG-WARN by default; env-gated DENY opt-in via + PACT_DISPATCH_VARIETY_MODE. The deny path is the file's ONLY + fail-CLOSED exception — every other path fails OPEN. Used by: hooks.json PreToolUse hook (matcher="TaskUpdate") This is the NUDGE half of the #956 fix (defense-in-depth). The load-bearing @@ -47,10 +54,35 @@ # ─── stdlib first (used on the input-side fail-open BEFORE wrapped imports) ─ import json +import os import sys _SUPPRESS_OUTPUT = json.dumps({"suppressOutput": True}) +# ─── #865 dispatch-variety enforcement mode (env-knob) ───────────────────── +# Models dispatch_gate.py's PACT_DISPATCH_INLINE_MISSION_MODE: read once at +# import; an unknown value falls back to "warn" so a typo never silently +# disables (or, worse, silently DENIES on) the gate. Default "warn" is the +# non-negotiable consumer-blast-radius posture (#997): the gate ships +# deterministic-WARN; "deny" is an explicit per-consumer opt-in. +# warn → additionalContext advisory (the existing WARN mechanism) + exit 0 +# deny → permissionDecision:"deny" + exit 2 (the ONLY fail-CLOSED path in +# this file). SOURCE-PROVEN honor: the platform's PreToolUse deny +# branch returns before tool.call() with no tool_name carve-out, so +# a TaskUpdate-matcher deny IS honored — empirically un-exercised, so +# warn ships as the default and deny is opt-in. +# shadow → journal-only calibration; no additionalContext, no deny. +# The read is normalized with .strip().lower() BEFORE the membership check so a +# forgiving opt-in: "DENY" / " deny " / "Deny" → deny; "" / bogus / unknown → +# warn (the safe default). Strictly more forgiving — normalization can never +# enable an unintended mode (anything not in the set still falls back to warn). +_ALLOWED_VARIETY_MODES = frozenset({"warn", "deny", "shadow"}) +DISPATCH_VARIETY_MODE = os.environ.get( + "PACT_DISPATCH_VARIETY_MODE", "warn" +).strip().lower() +if DISPATCH_VARIETY_MODE not in _ALLOWED_VARIETY_MODES: + DISPATCH_VARIETY_MODE = "warn" + # Cap on the stdin read. Real PreToolUse TaskUpdate frames carry a tool_input # (taskId + small metadata) and stay well under this; an over-cap frame # truncates mid-JSON → JSONDecodeError → input-side fail-open. Bounds memory @@ -66,8 +98,10 @@ # clean (0) and the output well-formed. try: import shared.pact_context as pact_context + from shared.dispatch_helpers import is_pact_specialist_owner from shared.intentional_wait import is_self_complete_exempt from shared.task_utils import is_teachback_subject, read_task_json + from shared.teachback_schema import resolve_variety_total _IMPORTS_OK = True except BaseException: # noqa: BLE001 — fail-OPEN catch-all (warn gate never denies) _IMPORTS_OK = False @@ -161,6 +195,135 @@ def _evaluate(input_data: dict) -> str | None: ) +def _evaluate_dispatch_variety(input_data: dict) -> str | None: + """#865: return an actionable advisory string when a terminal + dispatch-wiring TaskUpdate links a Task B that carries no resolvable + metadata.variety, else None. The caller decides warn-vs-deny-vs-shadow + from DISPATCH_VARIETY_MODE; this function only detects the gap. + + This is a NEW branch, parallel to and independent of the #956 + completion-ordering _evaluate — neither calls the other. + + COMPOSITE-SIGNATURE TRIGGER (the FIRST-OBSERVABLE-WRITE / no-misfire + invariant): fire ONLY on the terminal dispatch-wiring write — a single + TaskUpdate whose tool_input carries BOTH: + - an owner that resolves (via team config) to a pact-specialist + agentType — owners are BARE names, so this is a team-config + resolution, NOT an owner.startswith("pact-") prefix check, AND + - addBlockedBy present and non-empty (the teachback-gate link), + in the SAME tool_input. This composite co-occurrence is uniquely the + dispatch-wiring shape (orchestrate/comPACT/plan-mode/rePACT all wire B + via `TaskUpdate(B, owner=..., addBlockedBy=[A])`). No fire at + TaskCreate(B) (owner empty there — wired by this later TaskUpdate) or on + a partial-wiring TaskUpdate (owner-only OR addBlockedBy-only). All other + addBlockedBy uses across the templates (phase/imPACT blocker blocking) + are addBlockedBy-ONLY with no owner in the same call → already excluded. + + STRUCTURAL DECISION (not actor-based): the gate READS the linked Task B's + metadata.variety from disk and fires ONLY when there is no resolvable + total (absent / non-dict / untotaled). Firing on the composite signature + alone would warn on every dispatch including correctly-stamped ones; the + read is what makes the decision detection-precise (and the deny safe). + The "present-but-malformed-rationale" case stays a PostToolUse advisory + in task_lifecycle_gate R4 (the surgical split) — this gate keys solely on + resolve_variety_total being None, the missing-stamp concern. + """ + tool_name = input_data.get("tool_name", "") + if tool_name != "TaskUpdate": + return None # matcher already scopes this, but be defensive + + # DUAL-MODE: lead frame only (same structural is_lead discriminator the + # #956 branch uses). A teammate frame emits nothing. + if not pact_context.is_lead(input_data): + return None + + tool_input = input_data.get("tool_input") or {} + if not isinstance(tool_input, dict): + return None + + # COMPOSITE signature — a pact-specialist owner AND addBlockedBy non-empty + # in the SAME tool_input. Either half alone is a non-terminal/partial write. + # Cheap in-memory guards FIRST (owner-present, addBlockedBy, taskId); the + # owner→agentType resolution is a disk read, deferred until after the + # team_name resolve below (cost-order). + owner = tool_input.get("owner") + if not isinstance(owner, str) or not owner.strip(): + return None # no owner → TaskCreate(B) / not a wiring write + add_blocked_by = tool_input.get("addBlockedBy") + if not isinstance(add_blocked_by, list) or not add_blocked_by: + return None # partial wiring (owner-only) → not yet terminal + + task_id = tool_input.get("taskId", "") or "" + if not task_id: + return None + try: + pact_context.init(input_data) + team_name = pact_context.get_pact_context().get("team_name", "") + except Exception: + team_name = "" + if not team_name: + return None # no team context → cannot resolve owner/Task B → bypass + + # CORRECTED PREDICATE (#865 cycle-1): identify a pact-specialist teammate by + # resolving the BARE owner → team-member → agentType (the same resolution + # the carve-out helpers use), NOT by an owner.startswith("pact-") prefix — + # real owners are bare names, so the old prefix check was always False (the + # gate was dead-on-arrival). is_pact_specialist_owner fail-CLOSES to False on + # any unresolvable path → this gate fail-OPENS (return None), never strands. + # SOLO_EXEMPT agents (general-purpose/Explore/Plan) have non-pact agentTypes + # → excluded here naturally; the secretary (pact-secretary) PASSES this check + # and is suppressed by the is_self_complete_exempt carve-out below. + if not is_pact_specialist_owner(owner, team_name): + return None # owner does not resolve to a pact specialist → not a dispatch + + task = read_task_json(task_id, team_name) + if not isinstance(task, dict) or not task: + return None # no task data → bypass (fail-open) + + # CARVE-OUTS (preserve R4's silence guarantees verbatim; the helpers are + # already imported). The pact-specialist resolution above admits the + # secretary (pact-secretary IS a registered specialist), so the + # is_self_complete_exempt carve-out is LOAD-BEARING here — it suppresses + # the secretary + signal tasks. is_teachback_subject suppresses the Task-A + # teachback gate by subject. + subject = task.get("subject") or "" + if is_self_complete_exempt(task, team_name) or is_teachback_subject(subject): + return None + + # STRUCTURAL READ: does the linked Task B carry a resolvable variety total? + # resolve_variety_total is the shared SSOT (also used by the read-time band + # resolver and write-time validator). None ⇒ absent / non-dict / untotaled + # ⇒ the missing-stamp gap this gate enforces. A resolvable total ⇒ silent + # (a present-but-malformed-rationale stamp is R4's PostToolUse concern). + metadata = task.get("metadata") + variety = metadata.get("variety") if isinstance(metadata, dict) else None + if resolve_variety_total(variety, metadata) is not None: + return None # stamp resolves → not a missing-stamp dispatch + + return ( + f"PACT dispatch-variety gate: Task {task_id} ({subject!r}) is being " + f"wired into a teachback-gated dispatch (owner {owner!r}) without a " + "resolvable metadata.variety. Per-dispatch variety stamping is " + "required so the hook can resolve the reasoning_reconstruction band " + "and the concurrent-auditor trigger. Stamp the D11 4-rationale block " + "(novelty/scope/uncertainty/risk + total 4-16) on this Task B BEFORE " + "wiring it — mirror the block in orchestrate.md / comPACT.md / " + "peer-review.md / plan-mode.md / rePACT.md." + ) + + +def _emit_warn(advisory: str) -> None: + """WARN output path: additionalContext advisory + exit 0 (never denies). + Shared by the #956 nudge and the dispatch-variety warn mode.""" + print(json.dumps({ + "hookSpecificOutput": { + "hookEventName": "PreToolUse", + "additionalContext": advisory, # advisory — NOT permissionDecision + } + })) + sys.exit(0) # exit 0 — advisory, never deny / exit-2 + + def main() -> None: # Input-side fail-open: an unreadable / oversized / malformed stdin frame # suppresses + exits 0 (never blocks the TaskUpdate). @@ -174,6 +337,36 @@ def main() -> None: print(_SUPPRESS_OUTPUT) sys.exit(0) + # #865 dispatch-variety branch FIRST: it is the only branch that can DENY + # (deny mode), and a denied wiring write should be blocked before the #956 + # completion nudge is even considered. Both branches fail-OPEN on any logic + # error — a gate that bricks legitimate writes is worse than the gap it + # guards. The deny path (deny mode + confirmed missing stamp) is the sole + # deliberate fail-CLOSED exception. + try: + variety_gap = _evaluate_dispatch_variety(input_data) + except Exception: + variety_gap = None # fail-OPEN on any logic error + if variety_gap: + if DISPATCH_VARIETY_MODE == "deny": + # The ONLY fail-CLOSED path in this file. Source-proven honor: + # the platform PreToolUse deny branch returns before tool.call() + # with no tool_name carve-out, so a TaskUpdate-matcher deny IS + # honored (empirically un-exercised → warn is the default). + print(json.dumps({ + "hookSpecificOutput": { + "hookEventName": "PreToolUse", + "permissionDecision": "deny", + "permissionDecisionReason": variety_gap, + } + })) + sys.exit(2) + if DISPATCH_VARIETY_MODE == "warn": + _emit_warn(variety_gap) + # shadow → fall through to suppress (journal-only telemetry is the + # PostToolUse R4/journal surface; here shadow simply does not surface). + + # #956 completion-ordering nudge: WARN-only, never denies. try: advisory = _evaluate(input_data) except Exception: @@ -183,13 +376,7 @@ def main() -> None: sys.exit(0) if advisory: - print(json.dumps({ - "hookSpecificOutput": { - "hookEventName": "PreToolUse", - "additionalContext": advisory, # advisory — NOT permissionDecision - } - })) - sys.exit(0) # exit 0 — advisory, never deny / exit-2 + _emit_warn(advisory) print(_SUPPRESS_OUTPUT) sys.exit(0) diff --git a/pact-plugin/hooks/shared/dispatch_helpers.py b/pact-plugin/hooks/shared/dispatch_helpers.py index 05aca7a4..c3389dbf 100644 --- a/pact-plugin/hooks/shared/dispatch_helpers.py +++ b/pact-plugin/hooks/shared/dispatch_helpers.py @@ -140,6 +140,51 @@ def is_registered_pact_specialist( return subagent_type in registry +def is_pact_specialist_owner( + owner: str, team_name: str, teams_dir: str | None = None +) -> bool: + """True iff ``owner`` names a team member whose agentType is a registered + pact specialist (``agents/pact-*.md``). + + Real task owners are BARE specialist names (``backend-coder``, + ``test-engineer``), NOT ``pact-``-prefixed — ``pact-*`` is the team-config + agentType, never the owner. So identifying "this owner is a pact specialist + teammate" requires resolving the bare owner through team config to its + agentType (the SAME owner→member→agentType resolution + ``intentional_wait._is_exempt_agent_type`` uses), then checking registry + membership. Composes the two existing primitives — ``_iter_members`` (the + resolution SSOT) + ``is_registered_pact_specialist`` (the registry check) — + rather than duplicating either. + + Fail-CLOSED to ``False`` on every unresolvable path (empty owner/team_name, + missing/malformed config, member-not-found, missing/non-str agentType, OR + any unexpected exception). For the dispatch-variety gate this composes to + FAIL-OPEN: "not positively a pact specialist" → the gate returns None → + does not fire, so an unresolvable owner never strands a dispatch. + + The outer bare-except is load-bearing: ``_iter_members`` resolves the + config dir via ``get_claude_config_dir()`` (→ ``Path.home()``), which can + raise ``RuntimeError`` that ESCAPES ``_iter_members``'s own typed except — + so a totally fail-closed helper must catch it here, not rely on a caller's + outer guard. + """ + if not isinstance(owner, str) or not owner: + return False + if not isinstance(team_name, str) or not team_name: + return False + try: + for member in pact_context._iter_members(team_name, teams_dir): + if member.get("name") == owner: + agent_type = member.get("agentType") + return ( + isinstance(agent_type, str) + and is_registered_pact_specialist(agent_type) + ) + return False + except Exception: + return False # fail-closed → gate fail-OPEN (never strands a dispatch) + + # ─── task-assigned check ─────────────────────────────────────────────────── def has_task_assigned(team_name: str, name: str) -> bool: diff --git a/pact-plugin/hooks/task_lifecycle_gate.py b/pact-plugin/hooks/task_lifecycle_gate.py index 24345861..a05bd1be 100644 --- a/pact-plugin/hooks/task_lifecycle_gate.py +++ b/pact-plugin/hooks/task_lifecycle_gate.py @@ -47,9 +47,11 @@ (missing blocks, missing Task B, missing variety, or variety present but no resolvable total); fail-open advisory documents the gap without blocking lifecycle - - variety_missing_on_dispatch_task — pact-* work Task created without - metadata.variety, with malformed per-dimension rationales, OR with no - resolvable variety total + - variety_missing_on_dispatch_task — pact-* work Task created with + metadata.variety present but malformed per-dimension rationales OR no + resolvable variety total. (The ABSENT-stamp arm was moved to the + dispatch-boundary gate in handoff_ordering_gate.py per the #865 surgical + split — this PostToolUse rule keeps only the present-but-malformed checks.) - variety_acknowledgment_missing — Teachback submitted without variety_acknowledgment field (D10 teammate verification) - variety_acknowledgment_schema_invalid_at_write_time — Teachback @@ -787,6 +789,63 @@ def _validate_variety_schema( return None +def _band_from_total(total: int) -> str: + """Map a resolved variety total to a reasoning_reconstruction band + string ("required" / "recommended" / "skipped"). Shared by the direct + Task-B resolution path and the C2 parent-inheritance fallback so both + apply the identical threshold logic (one band-cut SSOT).""" + if total >= TEACHBACK_REASONING_RECONSTRUCTION_REQUIRED_MIN: + return "required" + if total >= TEACHBACK_RECOMMENDED_BAND_MIN: + return "recommended" + return "skipped" + + +def _inherit_band_from_parent(task_b: dict, team_name: str) -> str | None: + """#891 Opt2 parent-inheritance fallback: when Task B carries no + resolvable variety, inherit the band from its PARENT task's variety + total. Returns a band string when the parent resolves, else None + (caller treats None as "unresolvable" — the preserved floor). + + Modular/separable: this is the entire Opt2 surface. Deleting this + function + its two call sites reverts to Opt1-alone behavior. + + Parent resolution + guardrail (don't inherit a WRONG parent's band — + a wrong inherit mis-resolves the band, the exact bug being fixed): + - Task B blocks the parent (Plan/feature/umbrella) task. The parent + pointer is task_b.blocks[0], but only when blocks is UNAMBIGUOUS: + a singleton list. Empty, multi-entry, or non-list blocks → fail + open (None) rather than guess among candidates. + - The parent must itself carry a resolvable variety total (via the + shared resolve_variety_total, reused unchanged). A non-parent task + (phase / teachback gate) does not carry variety, so a mis-pointed + blocks[0] fails open here rather than inheriting a wrong band. This + is the structural "looks like a Plan/feature task" guardrail: the + defining property of an inheritable parent is that it is stamped. + """ + blocks = task_b.get("blocks") + # Singleton-only: >1 entry is ambiguous (which is the parent?); 0/None + # has no parent pointer. Either way fail open. + if not isinstance(blocks, list) or len(blocks) != 1: + return None + parent_id = blocks[0] + if not isinstance(parent_id, str) or not parent_id: + return None + parent = read_task_json(parent_id, team_name) + if not parent: + return None + parent_metadata = parent.get("metadata") + if not isinstance(parent_metadata, dict): + return None + parent_variety = parent_metadata.get("variety") + if not isinstance(parent_variety, dict): + return None + parent_total = resolve_variety_total(parent_variety, parent_metadata) + if parent_total is None: + return None + return _band_from_total(parent_total) + + def _resolve_required_band_via_blocks( task_a: dict, team_name: str ) -> str: @@ -798,13 +857,20 @@ def _resolve_required_band_via_blocks( - "recommended": TEACHBACK_RECOMMENDED_BAND_MIN <= total < required-min - "skipped": total < TEACHBACK_RECOMMENDED_BAND_MIN - "unresolvable": blocks link missing, Task B file missing, or - variety absent/malformed/untotaled on Task B (fail-open: caller - emits a separate band_unresolvable advisory documenting the gap) + variety absent/malformed/untotaled on Task B AND the parent + inheritance fallback also failed (fail-open: caller emits a separate + band_unresolvable advisory documenting the gap) The total is resolved via the shared resolve_variety_total helper, so a non-canonical stamp (score / top-level variety_score / dimension-sum) resolves rather than reading as "unresolvable" — the same resolver the write-time validator consults (the cross-rule consistency property). + + #891 Opt2: when Task B has no resolvable variety, the band is inherited + from the parent (Plan/feature/umbrella) task before returning + "unresolvable" — see _inherit_band_from_parent. This keeps an unstamped + Task B's band resolvable (consultations are frequently 11-13) instead of + silently mis-resolving as "skipped". """ blocks = task_a.get("blocks") if not isinstance(blocks, list) or not blocks: @@ -821,19 +887,23 @@ def _resolve_required_band_via_blocks( if not task_b: return "unresolvable" metadata = task_b.get("metadata") - if not isinstance(metadata, dict): - return "unresolvable" - variety = metadata.get("variety") - if not isinstance(variety, dict): - return "unresolvable" - total = resolve_variety_total(variety, metadata) + # Opt2 broadened the inherit trigger: a metadata-not-dict OR variety-not-dict + # Task B (not only an absent variety) now resolves total=None and flows to + # the parent-inherit fallback below, rather than returning "unresolvable" + # immediately as the pre-Opt2 code did. The floor is preserved (an + # unresolvable parent still yields "unresolvable"). + variety = metadata.get("variety") if isinstance(metadata, dict) else None + total = ( + resolve_variety_total(variety, metadata) + if isinstance(variety, dict) + else None + ) if total is None: - return "unresolvable" - if total >= TEACHBACK_REASONING_RECONSTRUCTION_REQUIRED_MIN: - return "required" - if total >= TEACHBACK_RECOMMENDED_BAND_MIN: - return "recommended" - return "skipped" + # Task B variety absent/malformed/untotaled → try parent inheritance + # (Opt2) before conceding "unresolvable". Floor preserved: a parent + # that also fails to resolve returns None → "unresolvable". + return _inherit_band_from_parent(task_b, team_name) or "unresolvable" + return _band_from_total(total) def evaluate_lifecycle(input_data: dict) -> list[tuple[str, str]]: @@ -996,15 +1066,19 @@ def _exempt() -> bool: )) # R4: variety_missing_on_dispatch_task (D11-refined). - # Same discriminator pattern as work_addblockedby_missing - # (not-teachback + pact-* owner + not exempt). Single rule with - # three trigger paths (absent variety OR malformed per-dimension - # rationales OR no resolvable total); distinct message text per - # path, same rule name — the lead-side correction is the same - # (re-stamp variety) in every case. Clause order mirrors L575-580: - # cheap dict-lookups first, disk-touching exempt-check last - # (cached via `_exempt()` so the second invocation reuses the - # first call's result). + # SURGICAL SPLIT (#865 enforce-vs-advise re-scope): the ABSENT-stamp + # arm (variety entirely missing on a dispatched Task B) was MOVED to + # the dispatch-boundary gate in handoff_ordering_gate.py — that is the + # FIRST-OBSERVABLE-WRITE concern with a timing constraint (catch it at + # the terminal owner+addBlockedBy wiring write, deterministically). R4 + # KEEPS the present-but-malformed arm: a variety that IS stamped but + # carries malformed per-dimension rationales OR no resolvable total. + # These are post-write QUALITY checks with no dispatch-boundary timing + # constraint, so they stay a PostToolUse advisory here. The rule name + # is retained for the malformed-present paths; the bare-absent path no + # longer fires here (it fires at the wiring write). Clause order + # mirrors L575-580: cheap dict-lookups first, disk-touching + # exempt-check last (cached via `_exempt()`). if ( not is_teachback and owner.startswith("pact-") @@ -1012,16 +1086,9 @@ def _exempt() -> bool: ): incoming_metadata = tool_input.get("metadata") or {} incoming_variety = incoming_metadata.get("variety") - if not incoming_variety: - advisories.append(( - "variety_missing_on_dispatch_task", - f"PACT task_lifecycle_gate: pact-* Task created " - f"(owner={owner!r}) without metadata.variety. " - "Per-dispatch variety stamping is required for hook " - "band-resolution. See orchestrate / comPACT / " - "peer-review dispatch surfaces.", - )) - else: + # ABSENT variety is no longer enforced here (moved to the wiring + # write). Only validate a variety that IS present. + if incoming_variety: # The validator forwards the surrounding metadata so the # non-canonical top-level variety_score sibling is reachable # by the shared resolver. It returns the rationale problem diff --git a/pact-plugin/protocols/pact-protocols.md b/pact-plugin/protocols/pact-protocols.md index 2d40e2a3..7c896228 100644 --- a/pact-plugin/protocols/pact-protocols.md +++ b/pact-plugin/protocols/pact-protocols.md @@ -940,6 +940,10 @@ The feature-level CalibrationRecord above coexists with per-dispatch variety sta > The canonical total key is `total`. The lifecycle hook's band resolver additionally tolerates non-canonical `score` / top-level `variety_score`, or the sum of the four dimension scores, as fallbacks for stamps seen in the field — but orchestrators MUST stamp `total`. +> **Inheritance fallback (resolver-side, not stamp-side).** If a Task B is dispatched WITHOUT a resolvable `metadata.variety` despite the requirement above, the band resolver inherits the band from the PARENT (Plan/feature/umbrella) task that Task B blocks — so `reasoning_reconstruction` stays resolvable rather than silently mis-resolving as `skipped` (consultation Task Bs are frequently 11-13). This is a read-time SAFETY NET for an omission, NOT a license to skip stamping: orchestrators still stamp each Task B afresh per the directive above. Inheritance fires only when the parent pointer is unambiguous (Task B blocks exactly one task) and that parent is itself stamped; otherwise the resolver fails open to `unresolvable`. + +> **Enforcement split (dispatch-boundary vs advisory).** A MISSING stamp on a dispatched Task B is caught at the terminal dispatch-wiring write (the `TaskUpdate` setting `owner`+`addBlockedBy`) as a deterministic warning (env-gated deny opt-in); a stamp that IS present but malformed/untotaled stays a post-write advisory. The wiring-write gate reads the linked Task B's variety structurally — it never keys on actor identity. + **Why per-dimension rationales (not a single rationale)**: A single rationale field tolerates cargo-cult ("matches feature complexity" satisfies it). Four distinct rationale fields, one per dimension, force the orchestrator to articulate four independent judgments — cargo-culting all four with one phrase is mechanically incoherent (cannot coherently explain why novelty AND scope AND uncertainty AND risk are simultaneously "the same as feature" without exposing the copy-paste). #### Q5 Coverage Denominator (Wrap-Up Aggregation) diff --git a/pact-plugin/protocols/pact-variety.md b/pact-plugin/protocols/pact-variety.md index 550dd8c6..63033967 100644 --- a/pact-plugin/protocols/pact-variety.md +++ b/pact-plugin/protocols/pact-variety.md @@ -144,6 +144,10 @@ The feature-level CalibrationRecord above coexists with per-dispatch variety sta > The canonical total key is `total`. The lifecycle hook's band resolver additionally tolerates non-canonical `score` / top-level `variety_score`, or the sum of the four dimension scores, as fallbacks for stamps seen in the field — but orchestrators MUST stamp `total`. +> **Inheritance fallback (resolver-side, not stamp-side).** If a Task B is dispatched WITHOUT a resolvable `metadata.variety` despite the requirement above, the band resolver inherits the band from the PARENT (Plan/feature/umbrella) task that Task B blocks — so `reasoning_reconstruction` stays resolvable rather than silently mis-resolving as `skipped` (consultation Task Bs are frequently 11-13). This is a read-time SAFETY NET for an omission, NOT a license to skip stamping: orchestrators still stamp each Task B afresh per the directive above. Inheritance fires only when the parent pointer is unambiguous (Task B blocks exactly one task) and that parent is itself stamped; otherwise the resolver fails open to `unresolvable`. + +> **Enforcement split (dispatch-boundary vs advisory).** A MISSING stamp on a dispatched Task B is caught at the terminal dispatch-wiring write (the `TaskUpdate` setting `owner`+`addBlockedBy`) as a deterministic warning (env-gated deny opt-in); a stamp that IS present but malformed/untotaled stays a post-write advisory. The wiring-write gate reads the linked Task B's variety structurally — it never keys on actor identity. + **Why per-dimension rationales (not a single rationale)**: A single rationale field tolerates cargo-cult ("matches feature complexity" satisfies it). Four distinct rationale fields, one per dimension, force the orchestrator to articulate four independent judgments — cargo-culting all four with one phrase is mechanically incoherent (cannot coherently explain why novelty AND scope AND uncertainty AND risk are simultaneously "the same as feature" without exposing the copy-paste). #### Q5 Coverage Denominator (Wrap-Up Aggregation) diff --git a/pact-plugin/tests/test_commands_structure.py b/pact-plugin/tests/test_commands_structure.py index 45cab1c3..9d25fb2f 100644 --- a/pact-plugin/tests/test_commands_structure.py +++ b/pact-plugin/tests/test_commands_structure.py @@ -672,11 +672,11 @@ class TestPerLoopDispatchSites: ("orchestrate.md", 558, "ARCHITECT"), ("orchestrate.md", 681, "CODE"), ("orchestrate.md", 816, "TEST"), - ("comPACT.md", 227, "MultipleSpecialists"), - ("comPACT.md", 287, "SingleSpecialist"), + ("comPACT.md", 233, "MultipleSpecialists"), + ("comPACT.md", 293, "SingleSpecialist"), ("peer-review.md", 192, "Reviewers"), - ("plan-mode.md", 220, "Consultants"), - ("rePACT.md", 246, "SubScopeSpecialists"), + ("plan-mode.md", 236, "Consultants"), + ("rePACT.md", 262, "SubScopeSpecialists"), ] @staticmethod diff --git a/pact-plugin/tests/test_dispatch_gate.py b/pact-plugin/tests/test_dispatch_gate.py index 4633ef08..f8c8f595 100644 --- a/pact-plugin/tests/test_dispatch_gate.py +++ b/pact-plugin/tests/test_dispatch_gate.py @@ -558,6 +558,38 @@ def test_deny_when_task_completed_only(tmp_path, monkeypatch, capsys): # ============================================================================= +@pytest.mark.parametrize("env_value, expected", [ + ("deny", "deny"), + ("DENY", "deny"), # case-folded + (" deny ", "deny"), # whitespace-stripped + ("Deny", "deny"), + (" shadow\t", "shadow"), + ("warn", "warn"), + ("", "warn"), # empty → safe default + ("bogus", "warn"), # unknown → safe default +]) +def test_inline_mission_mode_strip_lower_normalization( + monkeypatch, env_value, expected, +): + """PACT_DISPATCH_INLINE_MISSION_MODE normalizes with .strip().lower() before + the membership check, parsing IDENTICALLY to handoff_ordering_gate.py's + PACT_DISPATCH_VARIETY_MODE knob. NON-TAUTOLOGICAL: reloads the module under + the real env value to exercise the actual os.environ read + normalize + + fallback. Case/whitespace variants of 'deny' arm deny; unknown/empty stay + warn — normalization can never accidentally enable an unintended mode.""" + import importlib + + import dispatch_gate + + monkeypatch.setenv("PACT_DISPATCH_INLINE_MISSION_MODE", env_value) + reloaded = importlib.reload(dispatch_gate) + try: + assert reloaded.INLINE_MISSION_MODE == expected + finally: + monkeypatch.delenv("PACT_DISPATCH_INLINE_MISSION_MODE", raising=False) + importlib.reload(dispatch_gate) + + def test_warn_when_prompt_lacks_task_reference(tmp_path, monkeypatch, capsys): """Default mode is 'warn' → ALLOW with additionalContext advisory. INLINE_MISSION_MODE was read at module-load BEFORE this test — we don't diff --git a/pact-plugin/tests/test_dispatch_variety_deny_honor_probe.py b/pact-plugin/tests/test_dispatch_variety_deny_honor_probe.py new file mode 100644 index 00000000..d75f1214 --- /dev/null +++ b/pact-plugin/tests/test_dispatch_variety_deny_honor_probe.py @@ -0,0 +1,333 @@ +"""Live deny-honor probe for the #865 dispatch-variety enforcement gate. + +WHY THIS FILE EXISTS — converting SOURCE-PROOF to RUNTIME-PROOF. +The #865 deny arm (handoff_ordering_gate.py, PACT_DISPATCH_VARIETY_MODE=deny) +emits a PreToolUse `permissionDecision:"deny"` + exit 2 on the terminal +dispatch-wiring write of an unstamped Task B. That deny path is SOURCE-PROVEN +but, until this file, empirically UN-EXERCISED: no PACT hook had ever fired a +TaskUpdate-matcher deny. The in-process unit test +(`test_handoff_ordering_gate.py::TestDispatchVarietyEnvKnobModes:: +test_deny_mode_permission_decision_exit_two`) proves the gate FUNCTION returns +the deny verdict, but it monkeypatches the in-memory `DISPATCH_VARIETY_MODE` +constant and patches the context-path — it never crosses a real process / real +env / real disk boundary. + +WHAT THIS PROBE ADDS (the closest-to-live, non-mocked seam-integration safely +achievable in-harness, per architecture spec §3.6): +- a REAL subprocess (`subprocess.run([sys.executable, hook])`) — real module + load, not an in-process import with a pre-warmed `DISPATCH_VARIETY_MODE`; +- the REAL env knob `PACT_DISPATCH_VARIETY_MODE=deny`, read at module import + from `os.environ` (NOT a `monkeypatch.setattr` of the resolved constant); +- a REAL isolated HOME holding a real on-disk session-context.json, a real + team config.json (with leadSessionId for the both-modes discriminator), and a + real unstamped Task-B JSON — the FULL `get_team_name` -> `read_task_json` -> + `resolve_variety_total` resolution chain, UNSTUBBED. +The probe asserts the exact deny CONTRACT the platform consumes to block a tool +call: exit 2 AND `hookSpecificOutput.permissionDecision == "deny"` AND +`hookEventName == "PreToolUse"` (the fields the platform's PreToolUse executor +reads before `tool.call()`). + +DOCUMENTED RESIDUAL (an ACCEPTED by-design boundary, not a skipped gap). +This probe proves the gate EMITS the platform-honored deny contract through the +real environment seam. It does NOT drive the actual Claude Code binary's +PreToolUse executor to observe the TaskUpdate being blocked — doing that would +require wiring a scratch hooks.json/settings into a real binary subprocess, +which edges into live-session-config territory the TEST phase deliberately does +not touch. That final "the platform honors the contract" leg stays +SOURCE-PROVEN (the platform deny branch returns before `tool.call()` with no +`tool.name` carve-out — tool-agnostic). The risk of the residual is bounded by +design: `deny` is OPT-IN (default ships `warn`), and an un-honored deny would +degrade to warn-like (the call proceeds, the advisory is the floor) — so this +is a runtime-CONFIDENCE gate, not a merge blocker. + +NON-MOCKED SEAM + NON-VACUITY. The task JSON is read over the real +`read_task_json` disk seam (no stub). `test_deny_does_not_fire_when_task_b_is +_stamped` is the non-vacuity lever: it shares the identical frame/env and flips +ONLY the on-disk task's variety stamp, so a regression that denied on the +composite signature WITHOUT reading the stamp (or a stubbed seam that always +reads "unstamped") would make that test RED. A vacuous always-deny gate cannot +pass both the deny case and the stamped-silent case. +""" +import json +import os +import subprocess +import sys +from pathlib import Path + +import pytest + +_HOOK = Path(__file__).parent.parent / "hooks" / "handoff_ordering_gate.py" + +# Frame agent_type spellings — is_lead reads top-level agent_type directly. +_LEAD = "PACT:pact-orchestrator" +_TEAMMATE = "pact-backend-coder" + +_SID = "deadbeef-4242-4242-4242-deadbeef4242" +_TEAM = "scratch-deny-probe-team" +_TASK_ID = "42" + +# The production-real owner shape: a BARE specialist name that the team config +# maps to a registered pact agentType. The corrected gate predicate +# (is_pact_specialist_owner) resolves _OWNER → member → _AGENT_TYPE → +# registry(agents/pact-*.md). Real task owners are bare names; `pact-*` is the +# team-config agentType, NEVER the owner — feeding `pact-backend-coder` as the +# owner (the pre-fix probe shape) is a production-impossible frame that the +# corrected predicate (correctly) no longer matches. +_OWNER = "backend-coder" +_AGENT_TYPE = "pact-backend-coder" + + +def _wiring_frame(*, agent_type=_LEAD, task_id=_TASK_ID): + """A terminal dispatch-wiring PreToolUse(TaskUpdate) frame: a BARE + specialist owner (resolving to a pact agentType via team config) AND + addBlockedBy non-empty in the SAME tool_input (the composite signature). + agent_type selects the lead/teammate (is_lead) branch.""" + return json.dumps({ + "tool_name": "TaskUpdate", + "session_id": _SID, + "agent_type": agent_type, + "tool_input": { + "taskId": task_id, + "owner": _OWNER, + "addBlockedBy": ["A"], + }, + }) + + +@pytest.fixture +def isolated_session(tmp_path): + """A fully isolated on-disk session: real HOME with session-context.json, + team config.json (leadSessionId == _SID → in-process / is-lead topology), + a seeded specialist registry (plugin_root/agents/pact-*.md), and an + UNSTAMPED Task-B JSON owned by the BARE specialist name. Returns + (env, home, task_path). + + No monkeypatch — every value is read by the subprocess from real env + + real disk, exactly as a live hook process would. The seeding mirrors the + corrected predicate's resolution chain: the gate's is_pact_specialist_owner + resolves the bare owner → team-config member → agentType → registry. So the + fixture MUST register (a) the member name→agentType mapping in the team + config AND (b) an agents/pact-*.md file for that agentType, with the + session-context plugin_root pointing at the seeded plugin root (the + subprocess resolves the registry via get_plugin_root() → the context-file + plugin_root field, with a CLAUDE_PLUGIN_ROOT env fallback). plugin_root="" + (the pre-fix value) yields an EMPTY registry → is_pact_specialist_owner + returns False → the gate never fires (the vacuity this de-fossilize closes). + """ + home = tmp_path / "home" + home.mkdir() + project = tmp_path / "proj" + project.mkdir() + slug = "proj" # CLAUDE_PROJECT_DIR basename + + # Seed the specialist registry: one agents/pact-*.md for the agentType the + # bare owner resolves to. Without this, is_registered_pact_specialist sees + # an empty registry and the corrected gate cannot identify the dispatch. + plugin_root = tmp_path / "plugin" + (plugin_root / "agents").mkdir(parents=True) + (plugin_root / "agents" / f"{_AGENT_TYPE}.md").write_text( + f"# {_AGENT_TYPE}\n", encoding="utf-8", + ) + + ctx_dir = home / ".claude" / "pact-sessions" / slug / _SID + ctx_dir.mkdir(parents=True) + (ctx_dir / "pact-session-context.json").write_text( + json.dumps({ + "team_name": _TEAM, + "session_id": _SID, + "project_dir": str(project), + "plugin_root": str(plugin_root), # registry resolves against this + "started_at": "2026-01-01T00:00:00Z", + }), + encoding="utf-8", + ) + + team_dir = home / ".claude" / "teams" / _TEAM + team_dir.mkdir(parents=True) + (team_dir / "config.json").write_text( + json.dumps({ + "team_name": _TEAM, + "leadSessionId": _SID, # == session_id → in-process topology + "members": [ + # BARE name → pact agentType: the resolution the gate performs. + {"name": _OWNER, "agentType": _AGENT_TYPE}, + ], + }), + encoding="utf-8", + ) + + tasks_dir = home / ".claude" / "tasks" / _TEAM + tasks_dir.mkdir(parents=True) + task_path = tasks_dir / f"{_TASK_ID}.json" + task_path.write_text( + json.dumps({ + "id": _TASK_ID, + "subject": "impl foo", + "owner": _OWNER, # BARE name (matches the team-config member) + "metadata": {}, # UNSTAMPED — the missing-stamp gap + }), + encoding="utf-8", + ) + + env = os.environ.copy() + env["HOME"] = str(home) + env.pop("CLAUDE_CONFIG_DIR", None) # force the HOME/.claude resolution + env["CLAUDE_PROJECT_DIR"] = str(project) + # Belt-and-suspenders: the context-file plugin_root wins, but the env + # fallback keeps the registry resolvable if that read ever regresses. + env["CLAUDE_PLUGIN_ROOT"] = str(plugin_root) + return env, home, task_path + + +def _run_hook(env, frame, *, mode): + """Fire the REAL gate as a subprocess with PACT_DISPATCH_VARIETY_MODE=mode + read from the environment at module import. Returns (returncode, parsed + stdout JSON).""" + run_env = dict(env) + run_env["PACT_DISPATCH_VARIETY_MODE"] = mode + result = subprocess.run( + [sys.executable, str(_HOOK)], + input=frame, + capture_output=True, + text=True, + env=run_env, + cwd=str(Path(env["HOME"])), + timeout=20, + ) + out = result.stdout.strip() + parsed = json.loads(out) if out else {} + return result.returncode, parsed, result.stderr + + +def test_hook_module_exists(): + assert _HOOK.exists(), f"gate hook missing at {_HOOK}" + + +# ============================================================================= +# THE LIVE DENY-HONOR PROBE — deny mode emits the platform-honored contract. +# ============================================================================= +def test_deny_mode_emits_platform_honored_deny_contract(isolated_session): + """deny mode + lead frame + unstamped Task B → the gate emits, through a + REAL subprocess / REAL env knob / REAL disk seam, the exact contract the + platform's PreToolUse executor reads to BLOCK the tool call: exit 2 + + permissionDecision:"deny" + hookEventName:"PreToolUse".""" + env, _home, _task = isolated_session + code, out, stderr = _run_hook(env, _wiring_frame(), mode="deny") + + assert code == 2, ( + f"deny mode must exit 2 (the platform-blocking code); " + f"got {code}. stderr={stderr!r}" + ) + hso = out.get("hookSpecificOutput", {}) + assert hso.get("permissionDecision") == "deny", ( + f"deny mode must emit permissionDecision:deny; got {out!r}" + ) + assert hso.get("hookEventName") == "PreToolUse", ( + f"the deny contract requires hookEventName=PreToolUse; got {out!r}" + ) + # The actionable reason reaches the dispatcher (the verbatim re-stamp recipe). + assert "metadata.variety" in hso.get("permissionDecisionReason", "") + + +def test_warn_mode_emits_advisory_not_deny(isolated_session): + """Default (warn) on the identical frame/disk → exit 0 + additionalContext + advisory, NEVER a permissionDecision. Proves the env knob actually selects + the mode at module import (not a hardcoded deny).""" + env, _home, _task = isolated_session + code, out, _stderr = _run_hook(env, _wiring_frame(), mode="warn") + + assert code == 0, "warn mode must exit 0 (never blocks)" + hso = out.get("hookSpecificOutput", {}) + assert "additionalContext" in hso + assert "permissionDecision" not in hso, ( + "warn mode must NEVER emit a deny verdict" + ) + + +def test_shadow_mode_suppresses_through_real_env(isolated_session): + """shadow mode on the identical frame/disk → suppressOutput, no deny, + no advisory (journal-only calibration posture).""" + env, _home, _task = isolated_session + code, out, _stderr = _run_hook(env, _wiring_frame(), mode="shadow") + + assert code == 0 + assert out == {"suppressOutput": True}, f"shadow must suppress; got {out!r}" + + +# ============================================================================= +# BOTH-MODES — is_lead structural discriminator through the real frame. +# In-process (lead frame) denies; a teammate frame on identical disk does not. +# ============================================================================= +def test_deny_does_not_fire_on_teammate_frame(isolated_session): + """DUAL-MODE: deny mode + TEAMMATE agent_type (is_lead False) on the + identical unstamped disk → no deny (exit 0, suppressOutput). The gate is + lead-frame-only; a teammate observing the same wiring write must not block + it. Paired with the lead-frame deny above, this exercises BOTH topology + branches of the is_lead discriminator over the real frame.""" + env, _home, _task = isolated_session + code, out, _stderr = _run_hook( + env, _wiring_frame(agent_type=_TEAMMATE), mode="deny" + ) + + assert code == 0, "a teammate frame must never deny (is_lead False)" + assert out == {"suppressOutput": True}, ( + f"teammate frame in deny mode must suppress; got {out!r}" + ) + + +# ============================================================================= +# NON-VACUITY LEVER — the deny is gated on the STRUCTURAL read, not the +# composite signature alone. Flip ONLY the on-disk stamp; the deny must vanish. +# A vacuous always-deny (or a stubbed seam that always reads "unstamped") makes +# this RED while the deny case above stays green — they cannot both pass. +# ============================================================================= +def test_deny_does_not_fire_when_task_b_is_stamped(isolated_session): + """deny mode + lead frame + identical composite signature, but the Task B + on disk now carries a resolvable variety.total → NO deny (exit 0). This is + the non-vacuity proof: the deny reads the real stamp over the unstubbed + disk seam and silences when it resolves.""" + env, _home, task_path = isolated_session + # Flip ONLY the on-disk stamp — same frame, same env, same everything else. + task_path.write_text( + json.dumps({ + "id": _TASK_ID, + "subject": "impl foo", + "owner": _OWNER, # SAME bare owner as the deny case — so the + # silence is attributable to the stamp resolving, NOT to the owner + # failing to resolve (which would be a wrong-reason green). + "metadata": {"variety": {"total": 12}}, + }), + encoding="utf-8", + ) + code, out, _stderr = _run_hook(env, _wiring_frame(), mode="deny") + + assert code == 0, ( + "a STAMPED Task B must never be denied even in deny mode — the deny is " + "gated on the structural read, not the composite signature" + ) + assert out == {"suppressOutput": True}, ( + f"stamped Task B in deny mode must suppress; got {out!r}" + ) + + +def test_deny_does_not_fire_at_taskcreate_no_misfire(isolated_session): + """FIRST-OBSERVABLE-WRITE no-misfire, proven live: a TaskCreate frame (no + owner+addBlockedBy composite) in deny mode must NOT block — enforcing at + the initial create would be unsatisfiable-by-construction. Owner is empty + at TaskCreate (the later wiring TaskUpdate sets it), so the composite + signature does not match.""" + env, _home, _task = isolated_session + create_frame = json.dumps({ + "tool_name": "TaskCreate", + "session_id": _SID, + "agent_type": _LEAD, + "tool_input": { + "subject": "impl foo", + "owner": _OWNER, + # NO addBlockedBy → not the terminal wiring write + }, + }) + code, out, _stderr = _run_hook(env, create_frame, mode="deny") + + assert code == 0, "TaskCreate must never deny (no-misfire invariant)" + assert "permissionDecision" not in out.get("hookSpecificOutput", {}) diff --git a/pact-plugin/tests/test_gate_crashpath_hardening.py b/pact-plugin/tests/test_gate_crashpath_hardening.py index f33a0552..5c517384 100644 --- a/pact-plugin/tests/test_gate_crashpath_hardening.py +++ b/pact-plugin/tests/test_gate_crashpath_hardening.py @@ -240,13 +240,26 @@ class TestStdinCaps: # additionalContext) — a distinct observable from the bare suppress the # capped read produces. Used by the tlg primary-read test so the # disposition discriminates capped-vs-uncapped, not merely "exit 0". + # NOTE: the variety is PRESENT-but-malformed (missing novelty_rationale), + # which is the R4 arm RETAINED after the #865 surgical split — a bare-absent + # variety no longer fires R4 (its enforcement moved to the wiring-write + # gate), so the advisory-bearing frame must carry a malformed-present stamp. _ADVISORY_BEARING_BASE = { "tool_name": "TaskCreate", "tool_input": { "subject": "implement foo", "owner": "pact-backend-coder", "addBlockedBy": ["41"], - "metadata": {}, + "metadata": { + "variety": { + "novelty": 3, + # novelty_rationale intentionally omitted → malformed-present + "scope": 3, "scope_rationale": "x", + "uncertainty": 3, "uncertainty_rationale": "x", + "risk": 3, "risk_rationale": "x", + "total": 12, + }, + }, }, "tool_response": {}, "session_id": "s", diff --git a/pact-plugin/tests/test_handoff_ordering_gate.py b/pact-plugin/tests/test_handoff_ordering_gate.py index b5da4e3f..8175b143 100644 --- a/pact-plugin/tests/test_handoff_ordering_gate.py +++ b/pact-plugin/tests/test_handoff_ordering_gate.py @@ -39,6 +39,54 @@ def _seed_task(tmp_path, team, task_id, **fields): (tasks_dir / f"{task_id}.json").write_text(json.dumps(payload), encoding="utf-8") +# Default team member set the dispatch-variety predicate resolves against. +# REAL owners are BARE names; the team config maps each bare name to its +# pact-* agentType — the resolution the corrected gate predicate performs. +_DEFAULT_MEMBERS = [ + {"name": "backend-coder", "agentType": "pact-backend-coder"}, + {"name": "test-engineer", "agentType": "pact-test-engineer"}, + {"name": "secretary", "agentType": "pact-secretary"}, + {"name": "explorer", "agentType": "general-purpose"}, # SOLO_EXEMPT (non-pact) +] + + +def _seed_team_config(tmp_path, monkeypatch, team, members=None): + """Make the corrected gate predicate resolvable in-test: + (a) write ~/.claude/teams/{team}/config.json so pact_context._iter_members + resolves bare owners → agentType, and + (b) seed a plugin root with agents/pact-*.md for each member's pact-* + agentType + point the context's plugin_root at it + clear the + registry cache so is_registered_pact_specialist resolves (in + production the live agents/ dir is found; in-test the glob needs a + seeded plugin root, mirroring test_dispatch_gate._seed_plugin). + Forces HOME/.claude resolution by clearing CLAUDE_CONFIG_DIR.""" + import shared.dispatch_helpers as dh + import shared.pact_context as ctx_module + + members = _DEFAULT_MEMBERS if members is None else members + monkeypatch.setattr(Path, "home", lambda: tmp_path) + monkeypatch.delenv("CLAUDE_CONFIG_DIR", raising=False) + + team_dir = tmp_path / ".claude" / "teams" / team + team_dir.mkdir(parents=True, exist_ok=True) + (team_dir / "config.json").write_text( + json.dumps({"members": members}), encoding="utf-8", + ) + + # Seed the specialist registry: one agents/pact-*.md per pact-* agentType. + plugin_root = tmp_path / "plugin" + agents_dir = plugin_root / "agents" + agents_dir.mkdir(parents=True, exist_ok=True) + for m in members: + at = m.get("agentType", "") + if isinstance(at, str) and at.startswith("pact-"): + (agents_dir / f"{at}.md").write_text( + f"---\nname: {at}\n---\n", encoding="utf-8", + ) + monkeypatch.setattr(ctx_module, "get_plugin_root", lambda: str(plugin_root)) + dh._specialist_registry.cache_clear() + + def _complete_update(task_id, *, agent_type=LEAD, metadata=None): """A TaskUpdate(status=completed). `metadata` (if given) is the INCOMING update metadata (e.g. a bundled handoff).""" @@ -334,3 +382,594 @@ def test_advisory_path_through_real_on_disk_context( ) assert "42" in hso["additionalContext"] and "devops" in hso["additionalContext"] assert "permissionDecision" not in hso, "a WARN gate must NEVER emit a deny" + + +# ============================================================================= +# #865 dispatch-variety gate — the NEW branch (_evaluate_dispatch_variety), +# parallel to and independent of the #956 completion-ordering _evaluate. +# ============================================================================= +# +# Composite-signature trigger: a TaskUpdate whose tool_input carries BOTH +# owner=pact-* AND a non-empty addBlockedBy in the SAME call (the terminal +# dispatch-wiring write). Fires (warn/deny/shadow per env-knob) ONLY when the +# linked Task B carries no resolvable metadata.variety. No misfire at +# TaskCreate(B) or partial-wiring; carve-outs preserved. +# ============================================================================= + + +def _variety(total): + """A resolvable D11 variety stamp at the given total.""" + return { + "novelty": 2, "novelty_rationale": "x", + "scope": 2, "scope_rationale": "x", + "uncertainty": 2, "uncertainty_rationale": "x", + "risk": 2, "risk_rationale": "x", + "total": total, + } + + +def _wiring_update(task_id, *, owner="backend-coder", + add_blocked_by=("A",), agent_type=LEAD): + """A terminal dispatch-wiring TaskUpdate: owner + addBlockedBy in the SAME + tool_input. owner is a BARE specialist name (the real shape) resolving via + team config to a pact agentType. add_blocked_by=None / [] omits it + (partial-wiring case).""" + tool_input = {"taskId": task_id} + if owner is not None: + tool_input["owner"] = owner + if add_blocked_by: + tool_input["addBlockedBy"] = list(add_blocked_by) + payload = {"tool_name": "TaskUpdate", "tool_input": tool_input} + if agent_type is not None: + payload["agent_type"] = agent_type + return payload + + +class TestDispatchVarietyTrigger: + """The composite signature fires iff owner pact-* AND addBlockedBy are in + the SAME tool_input AND the linked Task B has no resolvable variety.""" + + def test_fires_on_wiring_write_unstamped_task_b( + self, tmp_path, monkeypatch, pact_context, + ): + """Terminal wiring write linking an unstamped Task B → advisory. The + BARE-NAME-FIRES case: owner 'backend-coder' resolves via team config to + agentType pact-backend-coder. This is the case that was DEAD under the + old owner.startswith('pact-') predicate.""" + _ctx(pact_context, monkeypatch, tmp_path) + _seed_team_config(tmp_path, monkeypatch, TEAM) + _seed_task(tmp_path, TEAM, "42", subject="impl foo", + owner="backend-coder", metadata={}) + adv = gate._evaluate_dispatch_variety(_wiring_update("42")) + assert adv is not None and "metadata.variety" in adv + + def test_fires_reds_if_predicate_reverted_to_prefix( + self, tmp_path, monkeypatch, pact_context, + ): + """NON-VACUITY: the bare-name-fires case proves the gate is ALIVE only + if it REDs under the reverted (dead) predicate. Simulate the revert by + monkeypatching the predicate back to owner.startswith('pact-'): with a + BARE owner that check is False → gate silent → adv is None. So the test + above genuinely depends on the corrected resolution, not on the + composite signature alone.""" + _ctx(pact_context, monkeypatch, tmp_path) + _seed_team_config(tmp_path, monkeypatch, TEAM) + _seed_task(tmp_path, TEAM, "42", subject="impl foo", + owner="backend-coder", metadata={}) + # Revert: the dead prefix predicate on the BARE owner. + monkeypatch.setattr( + gate, "is_pact_specialist_owner", + lambda owner, team_name: isinstance(owner, str) + and owner.startswith("pact-"), + ) + assert gate._evaluate_dispatch_variety(_wiring_update("42")) is None + + def test_silent_when_task_b_is_stamped( + self, tmp_path, monkeypatch, pact_context, + ): + """READ+VALIDATE: a stamped Task B → silent (the structural read is + what makes the gate precise; it does NOT fire on the composite + signature alone).""" + _ctx(pact_context, monkeypatch, tmp_path) + _seed_team_config(tmp_path, monkeypatch, TEAM) + _seed_task(tmp_path, TEAM, "42", subject="impl foo", + owner="backend-coder", + metadata={"variety": _variety(12)}) + assert gate._evaluate_dispatch_variety(_wiring_update("42")) is None + + def test_silent_when_task_b_stamped_via_fallback( + self, tmp_path, monkeypatch, pact_context, + ): + """A non-canonical but resolvable stamp (score, no total) → silent. + The gate uses the shared resolve_variety_total, so any shape that + resolves at write/read time also satisfies the gate.""" + _ctx(pact_context, monkeypatch, tmp_path) + _seed_team_config(tmp_path, monkeypatch, TEAM) + v = _variety(0) + v.pop("total") + v["score"] = 9 + _seed_task(tmp_path, TEAM, "42", subject="impl foo", + owner="backend-coder", metadata={"variety": v}) + assert gate._evaluate_dispatch_variety(_wiring_update("42")) is None + + def test_silent_when_owner_not_in_team_config( + self, tmp_path, monkeypatch, pact_context, + ): + """FAIL-OPEN: a bare owner that is NOT a known team member resolves to + False → gate silent (never strands). The corrected predicate's + unresolvable-owner floor.""" + _ctx(pact_context, monkeypatch, tmp_path) + _seed_team_config(tmp_path, monkeypatch, TEAM) + _seed_task(tmp_path, TEAM, "42", subject="impl foo", + owner="ghost-coder", metadata={}) + assert gate._evaluate_dispatch_variety( + _wiring_update("42", owner="ghost-coder") + ) is None + + def test_silent_when_team_config_missing( + self, tmp_path, monkeypatch, pact_context, + ): + """FAIL-OPEN: no team config on disk → _iter_members returns [] → + is_pact_specialist_owner False → gate silent. Consumer-wide-safe: an + unresolvable config never strands a dispatch.""" + _ctx(pact_context, monkeypatch, tmp_path) + # Deliberately do NOT seed team config. + monkeypatch.delenv("CLAUDE_CONFIG_DIR", raising=False) + _seed_task(tmp_path, TEAM, "42", subject="impl foo", + owner="backend-coder", metadata={}) + assert gate._evaluate_dispatch_variety(_wiring_update("42")) is None + + +class TestDispatchVarietyNoMisfire: + """The FIRST-OBSERVABLE-WRITE / no-misfire invariant: never fire at + TaskCreate(B) or on a partial-wiring TaskUpdate.""" + + def test_no_fire_on_taskcreate( + self, tmp_path, monkeypatch, pact_context, + ): + """A TaskCreate (different tool) never reaches the branch.""" + _ctx(pact_context, monkeypatch, tmp_path) + _seed_task(tmp_path, TEAM, "42", subject="impl foo", + owner="pact-backend-coder", metadata={}) + payload = { + "tool_name": "TaskCreate", + "tool_input": {"subject": "impl foo", "owner": "pact-backend-coder", + "addBlockedBy": ["A"]}, + "agent_type": LEAD, + } + assert gate._evaluate_dispatch_variety(payload) is None + + def test_no_fire_on_owner_only_partial_wiring( + self, tmp_path, monkeypatch, pact_context, + ): + """owner set but NO addBlockedBy in the same call → not yet terminal + → silent.""" + _ctx(pact_context, monkeypatch, tmp_path) + _seed_task(tmp_path, TEAM, "42", subject="impl foo", + owner="pact-backend-coder", metadata={}) + assert gate._evaluate_dispatch_variety( + _wiring_update("42", add_blocked_by=None) + ) is None + + def test_no_fire_on_addblockedby_only_partial_wiring( + self, tmp_path, monkeypatch, pact_context, + ): + """addBlockedBy set but NO owner in the same call → silent. This is + the imPACT blocker-reassign / phase-task-blocking shape (scenario 12): + every NON-dispatch addBlockedBy use is addBlockedBy-ONLY, so the + composite never false-positives on it.""" + _ctx(pact_context, monkeypatch, tmp_path) + _seed_task(tmp_path, TEAM, "42", subject="impl foo", + owner="pact-backend-coder", metadata={}) + assert gate._evaluate_dispatch_variety( + _wiring_update("42", owner=None) + ) is None + + +class TestDispatchVarietyCarveOuts: + """Carve-outs preserve R4's silence guarantees verbatim.""" + + def test_silent_non_pact_owner( + self, tmp_path, monkeypatch, pact_context, + ): + """A SOLO_EXEMPT owner resolves to a NON-pact agentType + (explorer→general-purpose) → is_pact_specialist_owner False → never + fires (scenario 9: SOLO_EXEMPT agents have non-pact agentTypes, so the + corrected predicate excludes them naturally — no explicit check).""" + _ctx(pact_context, monkeypatch, tmp_path) + _seed_team_config(tmp_path, monkeypatch, TEAM) + _seed_task(tmp_path, TEAM, "42", subject="impl foo", + owner="explorer", metadata={}) + assert gate._evaluate_dispatch_variety( + _wiring_update("42", owner="explorer") + ) is None + + def test_silent_teachback_subject( + self, tmp_path, monkeypatch, pact_context, + ): + """A Task-A teachback gate subject is exempt (is_teachback_subject). + The bare owner RESOLVES (passes the trigger), so the subject carve-out + is what suppresses it.""" + _ctx(pact_context, monkeypatch, tmp_path) + _seed_team_config(tmp_path, monkeypatch, TEAM) + _seed_task(tmp_path, TEAM, "42", + subject="backend: TEACHBACK for the thing", + owner="backend-coder", metadata={}) + assert gate._evaluate_dispatch_variety(_wiring_update("42")) is None + + @pytest.mark.parametrize("signal_type", ["blocker", "algedonic"]) + def test_silent_signal_task( + self, tmp_path, monkeypatch, pact_context, signal_type, + ): + """A signal task (completion_type=signal) is exempt via + is_self_complete_exempt — auditor/blocker signal tasks carry no + variety obligation. The bare 'auditor' owner RESOLVES to pact-auditor + (passes the trigger), so the signal carve-out is what suppresses it.""" + _ctx(pact_context, monkeypatch, tmp_path) + _seed_team_config(tmp_path, monkeypatch, TEAM, members=[ + {"name": "auditor", "agentType": "pact-auditor"}, + ]) + _seed_task(tmp_path, TEAM, "42", subject="impl foo", + owner="auditor", + metadata={"completion_type": "signal", "type": signal_type}) + assert gate._evaluate_dispatch_variety(_wiring_update( + "42", owner="auditor")) is None + + def test_silent_secretary_owner( + self, tmp_path, monkeypatch, pact_context, + ): + """LOAD-BEARING carve-out: the secretary (pact-secretary) IS a + registered specialist, so the bare 'secretary' owner PASSES the + corrected trigger — meaning is_self_complete_exempt MUST suppress it. + A secretary-owned wiring write with no variety → SILENT, NOT warn/deny. + If the carve-out regressed, this would wrongly warn/deny a legit + secretary dispatch.""" + _ctx(pact_context, monkeypatch, tmp_path) + _seed_team_config(tmp_path, monkeypatch, TEAM) + _seed_task(tmp_path, TEAM, "42", subject="impl foo", + owner="secretary", metadata={}) + assert gate._evaluate_dispatch_variety( + _wiring_update("42", owner="secretary") + ) is None + + def test_silent_teammate_frame( + self, tmp_path, monkeypatch, pact_context, + ): + """DUAL-MODE: a teammate frame emits nothing (is_lead structural + discriminator). Short-circuits before owner resolution.""" + _ctx(pact_context, monkeypatch, tmp_path) + _seed_team_config(tmp_path, monkeypatch, TEAM) + _seed_task(tmp_path, TEAM, "42", subject="impl foo", + owner="backend-coder", metadata={}) + assert gate._evaluate_dispatch_variety( + _wiring_update("42", agent_type=TEAMMATE) + ) is None + + +class TestDispatchVarietyPerfGuard: + """F1 — the extra per-dispatch task-read is BOUNDED: read_task_json must + NOT be called when the cheap in-memory guards (is_lead / owner-present / + addBlockedBy-present / taskId-present) fail. Pins the cost-order so a future + refactor cannot move the disk read ahead of the guards (which would add a + disk hit to EVERY TaskUpdate in EVERY consumer session, not just genuine + dispatch-wiring writes).""" + + def _spy_read_task_json(self, monkeypatch): + """Replace gate.read_task_json with a call-counting spy. Patches the + name in the GATE module namespace (where it is looked up via the + module-level `from shared.task_utils import read_task_json`), NOT + shared.task_utils (where it is defined) — patch-where-looked-up.""" + calls = [] + real = gate.read_task_json + + def _spy(task_id, team_name, *a, **k): + calls.append((task_id, team_name)) + return real(task_id, team_name, *a, **k) + + monkeypatch.setattr(gate, "read_task_json", _spy) + return calls + + @pytest.mark.parametrize( + "frame_desc, frame_factory", + [ + # is_lead guard: a teammate frame short-circuits first. + ("teammate_frame", + lambda: _wiring_update("42", agent_type=TEAMMATE)), + # owner-absent guard (partial wiring). + ("owner_absent", + lambda: _wiring_update("42", owner=None)), + # addBlockedBy-absent guard (partial wiring). + ("addblockedby_absent", + lambda: _wiring_update("42", add_blocked_by=None)), + # taskId-absent guard. + ("taskid_absent", + lambda: {"tool_name": "TaskUpdate", "agent_type": LEAD, + "tool_input": {"owner": "backend-coder", + "addBlockedBy": ["A"]}}), + ], + ) + def test_no_disk_read_when_cheap_guards_fail( + self, tmp_path, monkeypatch, pact_context, frame_desc, frame_factory, + ): + """Each guard-failing frame must bypass read_task_json entirely.""" + _ctx(pact_context, monkeypatch, tmp_path) + _seed_team_config(tmp_path, monkeypatch, TEAM) + _seed_task(tmp_path, TEAM, "42", subject="impl foo", + owner="backend-coder", metadata={}) + calls = self._spy_read_task_json(monkeypatch) + gate._evaluate_dispatch_variety(frame_factory()) + assert calls == [], ( + f"read_task_json must NOT be called on {frame_desc} " + f"(cheap guards short-circuit first); got {calls}" + ) + + def test_disk_read_happens_on_a_real_dispatch_wiring_write( + self, tmp_path, monkeypatch, pact_context, + ): + """Counter-case (proves the spy is wired): a genuine wiring write that + passes every cheap guard DOES read the task — so the test above asserts + a real short-circuit, not a dead spy.""" + _ctx(pact_context, monkeypatch, tmp_path) + _seed_team_config(tmp_path, monkeypatch, TEAM) + _seed_task(tmp_path, TEAM, "42", subject="impl foo", + owner="backend-coder", metadata={}) + calls = self._spy_read_task_json(monkeypatch) + gate._evaluate_dispatch_variety(_wiring_update("42")) + assert calls == [("42", TEAM)], ( + f"a real dispatch wiring write must read the linked task once; " + f"got {calls}" + ) + + +class TestDispatchVarietyBothModesMatrix: + """F2 — the is_lead dual-mode discriminator as a single explicit matrix: + a lead frame FIRES (advisory present); a teammate frame SUPPRESSES (None). + Consolidates the previously-paired lead-fire / teammate-silent coverage + into one parametrized case so the both-directions contract reads as a unit. + """ + + @pytest.mark.parametrize( + "mode, agent_type, expect_fires", + [ + ("in_process_lead", LEAD, True), + ("tmux_teammate", TEAMMATE, False), + ], + ) + def test_is_lead_matrix( + self, tmp_path, monkeypatch, pact_context, mode, agent_type, + expect_fires, + ): + """Same unstamped bare-owner wiring write under each frame role: + lead → advisory; teammate → silent (is_lead structural branch).""" + _ctx(pact_context, monkeypatch, tmp_path) + _seed_team_config(tmp_path, monkeypatch, TEAM) + _seed_task(tmp_path, TEAM, "42", subject="impl foo", + owner="backend-coder", metadata={}) + adv = gate._evaluate_dispatch_variety( + _wiring_update("42", agent_type=agent_type) + ) + if expect_fires: + assert adv is not None and "metadata.variety" in adv, ( + f"{mode}: lead frame must fire the advisory; got {adv!r}" + ) + else: + assert adv is None, ( + f"{mode}: teammate frame must suppress (is_lead False); " + f"got {adv!r}" + ) + + +class TestDispatchVarietyMalformedType: + """F3 — a variety value of the WRONG TYPE (list / string, not a dict) at + the gate: resolve_variety_total returns None for it, so the gate treats it + as the missing-stamp gap and FIRES. Closes the one un-probed malformed + shape (the dict-shaped malformed cases are covered by the R4 split tests).""" + + @pytest.mark.parametrize( + "bad_variety", + [ + pytest.param(["not", "a", "dict"], id="variety_is_list"), + pytest.param("twelve", id="variety_is_string"), + pytest.param(12, id="variety_is_bare_int"), + ], + ) + def test_fires_on_non_dict_variety( + self, tmp_path, monkeypatch, pact_context, bad_variety, + ): + """A non-dict metadata.variety does not resolve to a total → the gate + fires the missing-stamp advisory (never crashes, never silently + passes).""" + _ctx(pact_context, monkeypatch, tmp_path) + _seed_team_config(tmp_path, monkeypatch, TEAM) + _seed_task(tmp_path, TEAM, "42", subject="impl foo", + owner="backend-coder", metadata={"variety": bad_variety}) + adv = gate._evaluate_dispatch_variety(_wiring_update("42")) + assert adv is not None and "metadata.variety" in adv, ( + f"a non-dict variety ({bad_variety!r}) must fire the missing-stamp " + f"advisory; got {adv!r}" + ) + + +class TestDispatchVarietyEnvKnobModes: + """main()-level: PACT_DISPATCH_VARIETY_MODE selects warn / deny / shadow. + The module reads the knob at import; monkeypatch the resolved constant.""" + + def _run_main(self, monkeypatch, capsys, stdin_obj): + monkeypatch.setattr(sys, "stdin", io.StringIO(json.dumps(stdin_obj))) + with pytest.raises(SystemExit) as exc: + gate.main() + return exc.value.code, capsys.readouterr().out + + def _seed_unstamped(self, tmp_path, monkeypatch): + _seed_team_config(tmp_path, monkeypatch, TEAM) + _seed_task(tmp_path, TEAM, "42", subject="impl foo", + owner="backend-coder", metadata={}) + + def test_warn_mode_additional_context_exit_zero( + self, tmp_path, monkeypatch, pact_context, capsys, + ): + _ctx(pact_context, monkeypatch, tmp_path) + monkeypatch.setattr(gate, "DISPATCH_VARIETY_MODE", "warn") + self._seed_unstamped(tmp_path, monkeypatch) + code, out = self._run_main(monkeypatch, capsys, _wiring_update("42")) + assert code == 0 + hso = json.loads(out)["hookSpecificOutput"] + assert hso["hookEventName"] == "PreToolUse" + assert "additionalContext" in hso + assert "permissionDecision" not in hso + + def test_deny_mode_permission_decision_exit_two( + self, tmp_path, monkeypatch, pact_context, capsys, + ): + """deny mode → permissionDecision:"deny" + exit 2 (the sole + fail-CLOSED path). Source-proven honor; opt-in only.""" + _ctx(pact_context, monkeypatch, tmp_path) + monkeypatch.setattr(gate, "DISPATCH_VARIETY_MODE", "deny") + self._seed_unstamped(tmp_path, monkeypatch) + code, out = self._run_main(monkeypatch, capsys, _wiring_update("42")) + assert code == 2 + hso = json.loads(out)["hookSpecificOutput"] + assert hso["permissionDecision"] == "deny" + assert hso["hookEventName"] == "PreToolUse" + + def test_shadow_mode_suppresses( + self, tmp_path, monkeypatch, pact_context, capsys, + ): + """shadow mode → no additionalContext, no deny (journal-only + telemetry; here it suppresses).""" + _ctx(pact_context, monkeypatch, tmp_path) + monkeypatch.setattr(gate, "DISPATCH_VARIETY_MODE", "shadow") + self._seed_unstamped(tmp_path, monkeypatch) + code, out = self._run_main(monkeypatch, capsys, _wiring_update("42")) + assert code == 0 + assert json.loads(out) == {"suppressOutput": True} + + def test_deny_mode_does_not_deny_stamped_task_b( + self, tmp_path, monkeypatch, pact_context, capsys, + ): + """Even in deny mode, a STAMPED Task B is never denied — the + structural read gates the deny. Counter-pin against a deny-on-every- + wiring-write regression.""" + _ctx(pact_context, monkeypatch, tmp_path) + monkeypatch.setattr(gate, "DISPATCH_VARIETY_MODE", "deny") + _seed_team_config(tmp_path, monkeypatch, TEAM) + _seed_task(tmp_path, TEAM, "42", subject="impl foo", + owner="backend-coder", + metadata={"variety": _variety(12)}) + code, out = self._run_main(monkeypatch, capsys, _wiring_update("42")) + assert code == 0 + assert json.loads(out) == {"suppressOutput": True} + + @pytest.mark.parametrize("env_value, expected", [ + ("deny", "deny"), + ("DENY", "deny"), # case-folded + (" deny ", "deny"), # whitespace-stripped + ("Deny", "deny"), + (" shadow\t", "shadow"), + ("warn", "warn"), + ("", "warn"), # empty → safe default + ("bogus", "warn"), # unknown → safe default + ("denY ", "deny"), + ]) + def test_env_knob_strip_lower_normalization( + self, monkeypatch, env_value, expected, + ): + """The PACT_DISPATCH_VARIETY_MODE read normalizes with .strip().lower() + BEFORE the membership check, then falls back to warn for anything not in + the allowed set. NON-TAUTOLOGICAL: reloads the module under the real env + value so it exercises the actual os.environ read + normalize + fallback + (not the already-resolved constant). Asserts case/whitespace variants of + 'deny' arm deny, and unknown/empty values stay warn — proving the + normalization can never accidentally enable an unintended mode.""" + import importlib + monkeypatch.setenv("PACT_DISPATCH_VARIETY_MODE", env_value) + reloaded = importlib.reload(gate) + try: + assert reloaded.DISPATCH_VARIETY_MODE == expected + finally: + # Restore the module's default-env resolution for sibling tests. + monkeypatch.delenv("PACT_DISPATCH_VARIETY_MODE", raising=False) + importlib.reload(gate) + + def test_deny_mode_fails_open_when_evaluation_raises( + self, tmp_path, monkeypatch, pact_context, capsys, + ): + """ADVERSARIAL fail-OPEN invariant: in DENY mode, an exception inside + _evaluate_dispatch_variety must NOT brick the TaskUpdate — main() + catches it, sets variety_gap=None, and falls through to suppress + (exit 0), NEVER exit-2/deny. The deny is fail-CLOSED only on a + CONFIRMED missing stamp, never on evaluation uncertainty. A consumer- + wide deny gate that denied on its own crash would strand every + legitimate dispatch wiring write whenever the evaluator hit a malformed + frame — strictly worse than the gap it guards.""" + _ctx(pact_context, monkeypatch, tmp_path) + monkeypatch.setattr(gate, "DISPATCH_VARIETY_MODE", "deny") + self._seed_unstamped(tmp_path, monkeypatch) + + def _boom(_input_data): + raise RuntimeError("simulated evaluator crash") + + monkeypatch.setattr(gate, "_evaluate_dispatch_variety", _boom) + code, out = self._run_main(monkeypatch, capsys, _wiring_update("42")) + assert code == 0, "deny mode must fail-OPEN (exit 0) on evaluator crash" + parsed = json.loads(out) + assert "permissionDecision" not in parsed.get( + "hookSpecificOutput", {} + ), "a crash must never produce a deny verdict" + + +# ============================================================================= +# is_pact_specialist_owner — the corrected-predicate resolution helper. +# Direct unit coverage of the bare owner → agentType → registry resolution + +# the fail-CLOSED-to-False contract (which composes to gate fail-OPEN). +# ============================================================================= +class TestIsPactSpecialistOwner: + def test_true_for_bare_specialist_owner(self, tmp_path, monkeypatch): + _seed_team_config(tmp_path, monkeypatch, TEAM) + from shared.dispatch_helpers import is_pact_specialist_owner + assert is_pact_specialist_owner("backend-coder", TEAM) is True + + def test_true_for_secretary_owner(self, tmp_path, monkeypatch): + """pact-secretary IS a registered specialist → True. (The gate's + is_self_complete_exempt carve-out, NOT this helper, suppresses it.)""" + _seed_team_config(tmp_path, monkeypatch, TEAM) + from shared.dispatch_helpers import is_pact_specialist_owner + assert is_pact_specialist_owner("secretary", TEAM) is True + + def test_false_for_solo_exempt_owner(self, tmp_path, monkeypatch): + """explorer→general-purpose is a NON-pact agentType (no + agents/general-purpose.md) → False.""" + _seed_team_config(tmp_path, monkeypatch, TEAM) + from shared.dispatch_helpers import is_pact_specialist_owner + assert is_pact_specialist_owner("explorer", TEAM) is False + + def test_false_for_unknown_owner(self, tmp_path, monkeypatch): + _seed_team_config(tmp_path, monkeypatch, TEAM) + from shared.dispatch_helpers import is_pact_specialist_owner + assert is_pact_specialist_owner("ghost", TEAM) is False + + def test_false_for_missing_team_config(self, tmp_path, monkeypatch): + monkeypatch.setattr(Path, "home", lambda: tmp_path) + monkeypatch.delenv("CLAUDE_CONFIG_DIR", raising=False) + from shared.dispatch_helpers import is_pact_specialist_owner + assert is_pact_specialist_owner("backend-coder", TEAM) is False + + def test_false_for_empty_inputs(self): + from shared.dispatch_helpers import is_pact_specialist_owner + assert is_pact_specialist_owner("", TEAM) is False + assert is_pact_specialist_owner("backend-coder", "") is False + assert is_pact_specialist_owner(None, TEAM) is False + + def test_false_and_never_raises_on_iter_members_exception(self, monkeypatch): + """The bare-except fail-closed wrap: if _iter_members raises (e.g. the + get_claude_config_dir→Path.home RuntimeError seam that ESCAPES + _iter_members' own typed except), the helper returns False and never + propagates — so the gate fail-OPENS rather than crashing.""" + import shared.pact_context as ctx_module + from shared.dispatch_helpers import is_pact_specialist_owner + + def _raise(team_name, teams_dir=None): + raise RuntimeError("simulated config-dir resolution failure") + + monkeypatch.setattr(ctx_module, "_iter_members", _raise) + assert is_pact_specialist_owner("backend-coder", TEAM) is False diff --git a/pact-plugin/tests/test_per_dispatch_variety.py b/pact-plugin/tests/test_per_dispatch_variety.py index 9c3bc82a..91988e69 100644 --- a/pact-plugin/tests/test_per_dispatch_variety.py +++ b/pact-plugin/tests/test_per_dispatch_variety.py @@ -490,6 +490,167 @@ def test_first_block_dominates_even_if_skipped( ) == "skipped" +# ============================================================================= +# #891 Opt2: parent-inheritance fallback (_inherit_band_from_parent) +# ============================================================================= +# +# When Task B carries no resolvable variety, the band is inherited from the +# PARENT (Plan/feature/umbrella) task that Task B blocks. These tests pin both +# halves of the C2 contract: +# - RESOLVE: an unstamped Task B whose singleton blocks[0] points at a +# STAMPED parent inherits the parent's REAL band (the #891 fix — a +# consultation Task B that would mis-resolve as "skipped" now resolves to +# the parent's 11-13 reality). +# - FAIL-OPEN GUARDRAIL: never inherit a WRONG parent's band. Ambiguous +# blocks (>1 entry, empty, non-list) or a parent that is not itself +# stamped → "unresolvable" (the preserved floor), NOT a guess. +# ============================================================================= + + +def _seed_task( + tmp_path, monkeypatch, team_name, task_id, *, metadata=None, blocks=None, +): + """Seed an arbitrary task on disk with optional `blocks` + `metadata`. + Used to build the Task B → parent chain the inheritance fallback walks.""" + monkeypatch.setattr(Path, "home", lambda: tmp_path) + tasks_dir = tmp_path / ".claude" / "tasks" / team_name + tasks_dir.mkdir(parents=True, exist_ok=True) + task = {"id": task_id, "subject": "x", "owner": "pact-backend-coder"} + if blocks is not None: + task["blocks"] = blocks + if metadata is not None: + task["metadata"] = metadata + (tasks_dir / f"{task_id}.json").write_text( + json.dumps(task), encoding="utf-8" + ) + + +class TestParentInheritanceFallback: + """#891 Opt2: an unstamped Task B inherits its parent's band.""" + + # ----- RESOLVE: parent inheritance maps to the parent's real band ---- + + def test_inherits_required_band_from_parent(self, tmp_path, monkeypatch): + """Task B has no variety but blocks a stamped parent (total=12) → + the band resolves to the parent's REQUIRED band instead of + unresolvable. The #891 consultation-Task-B fix.""" + _seed_task( + tmp_path, monkeypatch, "test-team", "2", + metadata={}, blocks=["100"], + ) + _seed_task( + tmp_path, monkeypatch, "test-team", "100", + metadata={"variety": _well_formed_variety(total=12)}, + ) + assert tlg._resolve_required_band_via_blocks( + {"blocks": ["2"]}, "test-team" + ) == "required" + + def test_inherits_recommended_band_from_parent(self, tmp_path, monkeypatch): + """Parent total=9 → inherited band is recommended.""" + _seed_task( + tmp_path, monkeypatch, "test-team", "2", + metadata={}, blocks=["100"], + ) + _seed_task( + tmp_path, monkeypatch, "test-team", "100", + metadata={"variety": _well_formed_variety(total=9)}, + ) + assert tlg._resolve_required_band_via_blocks( + {"blocks": ["2"]}, "test-team" + ) == "recommended" + + def test_inherits_skipped_band_from_parent(self, tmp_path, monkeypatch): + """Parent total=5 → inherited band is skipped (the parent's REAL + low band; inheritance is not biased toward required).""" + _seed_task( + tmp_path, monkeypatch, "test-team", "2", + metadata={}, blocks=["100"], + ) + _seed_task( + tmp_path, monkeypatch, "test-team", "100", + metadata={"variety": _well_formed_variety(total=5)}, + ) + assert tlg._resolve_required_band_via_blocks( + {"blocks": ["2"]}, "test-team" + ) == "skipped" + + def test_stamped_task_b_does_not_inherit(self, tmp_path, monkeypatch): + """When Task B IS stamped, its OWN band wins — the parent is never + consulted (inheritance is a fallback, not an override). Task B + total=7 (recommended); parent total=16 (required) is ignored.""" + _seed_task( + tmp_path, monkeypatch, "test-team", "2", + metadata={"variety": _well_formed_variety(total=7)}, + blocks=["100"], + ) + _seed_task( + tmp_path, monkeypatch, "test-team", "100", + metadata={"variety": _well_formed_variety(total=16)}, + ) + assert tlg._resolve_required_band_via_blocks( + {"blocks": ["2"]}, "test-team" + ) == "recommended" + + # ----- FAIL-OPEN GUARDRAILS: never inherit a WRONG parent ------------ + + def test_unstamped_b_no_blocks_is_unresolvable(self, tmp_path, monkeypatch): + """Unstamped Task B with NO blocks pointer → no parent to inherit + from → unresolvable (floor preserved). This is the legacy shape; + pins that inheritance does not regress it.""" + _seed_task( + tmp_path, monkeypatch, "test-team", "2", metadata={}, + ) + assert tlg._resolve_required_band_via_blocks( + {"blocks": ["2"]}, "test-team" + ) == "unresolvable" + + def test_unstamped_b_multi_block_fails_open(self, tmp_path, monkeypatch): + """Task B.blocks has >1 entry → AMBIGUOUS parent → fail-open to + unresolvable rather than guess blocks[0]. The lead's + don't-blindly-trust-blocks[0] guardrail: a wrong-parent inherit + mis-resolves the band, the exact bug being fixed.""" + _seed_task( + tmp_path, monkeypatch, "test-team", "2", + metadata={}, blocks=["100", "200"], + ) + _seed_task( + tmp_path, monkeypatch, "test-team", "100", + metadata={"variety": _well_formed_variety(total=12)}, + ) + assert tlg._resolve_required_band_via_blocks( + {"blocks": ["2"]}, "test-team" + ) == "unresolvable" + + def test_unstamped_b_unstamped_parent_fails_open(self, tmp_path, monkeypatch): + """Singleton blocks[0] points at a parent that is NOT itself stamped + → fail-open to unresolvable. The structural 'looks like a + Plan/feature task' guardrail: only a stamped task is an inheritable + parent, so a mis-pointed blocks[0] (e.g. at a phase/teachback task) + cannot inherit a wrong band.""" + _seed_task( + tmp_path, monkeypatch, "test-team", "2", + metadata={}, blocks=["100"], + ) + _seed_task( + tmp_path, monkeypatch, "test-team", "100", metadata={}, + ) + assert tlg._resolve_required_band_via_blocks( + {"blocks": ["2"]}, "test-team" + ) == "unresolvable" + + def test_unstamped_b_missing_parent_file_fails_open(self, tmp_path, monkeypatch): + """blocks[0] points at a parent id with no file on disk → fail-open + to unresolvable (read_task_json returns {}).""" + _seed_task( + tmp_path, monkeypatch, "test-team", "2", + metadata={}, blocks=["999"], + ) + assert tlg._resolve_required_band_via_blocks( + {"blocks": ["2"]}, "test-team" + ) == "unresolvable" + + # ============================================================================= # Divergence-computation helper (shared/variety_divergence.py) # ============================================================================= @@ -691,3 +852,51 @@ def test_surfaced_direction_pairing(self): result = compute_variety_divergence(feature, dispatches) assert result["surfaced"] is False assert result["direction"] is None + + +# ============================================================================= +# #873: feature-task variety stamp → computable divergence (integration) +# ============================================================================= +# +# C4 stamps metadata.variety.total on the comPACT feature task at creation, so +# the wrap-up Orchestration Retrospective's divergence computation has the +# feature_variety half. These tests pin the OBSERVABLE the #873 stamp unlocks: +# a STAMPED feature total drives a real numeric delta and a reason that is NOT +# "feature_variety_missing" — the exact gap #873 closes. The complement (an +# UNstamped feature → feature_variety_missing) is the contrast case the stamp +# eliminates. +# ============================================================================= + + +class TestFeatureStampUnlocksDivergence: + """#873: a stamped feature total makes divergence computable.""" + + def test_stamped_feature_yields_real_delta_not_missing(self): + """A feature total (the #873 stamp's metadata.variety.total) drives a + real numeric delta and a reason != feature_variety_missing — the #873 + fix observable. Feature 9 vs dispatches [5,5,5] → delta 4 surfaced.""" + result = compute_variety_divergence(9, [5, 5, 5]) + assert result["delta"] == 4 + assert result["reason"] != "feature_variety_missing" + assert result["surfaced"] is True + assert result["direction"] == "overshot" + + def test_unstamped_feature_is_the_gap_873_closes(self): + """Contrast/counter-pin: WITHOUT the feature stamp (feature_variety + None) the computation degrades to feature_variety_missing with a null + delta — exactly the wrap-up Q5 gap #873's comPACT stamp eliminates. + Dispatch stats are still computed over the stamped dispatches.""" + result = compute_variety_divergence(None, [5, 5, 5]) + assert result["reason"] == "feature_variety_missing" + assert result["delta"] is None + assert result["mean"] == 5 # dispatch stats still computed + + def test_stamped_feature_within_threshold_is_computable_not_missing(self): + """Even when the delta is within threshold (not surfaced), a stamped + feature is COMPUTABLE — reason is within_threshold, NOT + feature_variety_missing. The stamp's value is computability, distinct + from whether a divergence is surfaced.""" + result = compute_variety_divergence(8, [7, 8, 9]) + assert result["reason"] == "within_threshold" + assert result["reason"] != "feature_variety_missing" + assert result["delta"] == 0 diff --git a/pact-plugin/tests/test_task_lifecycle_gate.py b/pact-plugin/tests/test_task_lifecycle_gate.py index 1d5662f2..61cf056d 100644 --- a/pact-plugin/tests/test_task_lifecycle_gate.py +++ b/pact-plugin/tests/test_task_lifecycle_gate.py @@ -1888,12 +1888,15 @@ def test_silent_when_extra_reasoning_reconstruction_present( # ============================================================================= -def test_advisory_when_pact_work_task_created_without_variety( +def test_no_r4_advisory_when_pact_work_task_created_without_variety( pact_context, ): - """TaskCreate pact-* work task without metadata.variety → R4 advisory. - Mirrors work_addblockedby_missing trigger shape (same discriminators) - but checks the variety field instead of addBlockedBy.""" + """#865 surgical R4 split: TaskCreate pact-* work task WITHOUT + metadata.variety NO LONGER fires R4 here. The absent-stamp enforcement + moved to the dispatch-boundary gate (handoff_ordering_gate.py), which + catches it at the terminal owner+addBlockedBy wiring write. R4 keeps only + the present-but-malformed arm, so a bare-absent stamp is SILENT at this + PostToolUse surface.""" pact_context(team_name="test-team", session_id="test-session") payload = { "tool_name": "TaskCreate", @@ -1906,9 +1909,9 @@ def test_advisory_when_pact_work_task_created_without_variety( "tool_response": {}, } advisories = tlg.evaluate_lifecycle(payload) - assert any( + assert not any( rule == "variety_missing_on_dispatch_task" for rule, _ in advisories - ) + ), f"absent variety must NOT fire R4 post-split, got: {advisories}" @pytest.mark.parametrize( @@ -2197,9 +2200,12 @@ def test_r4_unresolvable_total_message_distinct_from_rationale_path(pact_context assert "no resolvable total" not in malformed -def test_r4_unresolvable_total_message_distinct_from_absent_path(pact_context): - """The unresolvable-total message differs from the absent-variety - message — three distinct paths under one rule name.""" +def test_r4_unresolvable_fires_but_absent_path_is_silent(pact_context): + """#865 surgical R4 split: the unresolvable-total path (variety PRESENT + but no resolvable total) STILL fires R4 here; the absent-variety path + (variety entirely missing) is now SILENT — its enforcement moved to the + dispatch-boundary gate. Two of the original three paths remain under one + rule; the bare-absent path no longer fires at this PostToolUse surface.""" pact_context(team_name="test-team", session_id="test-session") unresolvable = _r4_message( tlg.evaluate_lifecycle(_r4_create_payload(_rationales_only_variety(total=99))) @@ -2214,8 +2220,9 @@ def test_r4_unresolvable_total_message_distinct_from_absent_path(pact_context): "tool_response": {}, } absent = _r4_message(tlg.evaluate_lifecycle(absent_payload)) - assert unresolvable is not None and absent is not None - assert unresolvable != absent + assert unresolvable is not None, "present-but-untotaled must still fire R4" + assert "no resolvable total" in unresolvable + assert absent is None, "absent variety must be silent at R4 post-split" def test_r4_rationale_problem_reported_before_total_problem(pact_context): @@ -4077,13 +4084,20 @@ def test_advisory_output_carries_no_health_marker( sibling module.)""" monkeypatch.setenv("CLAUDE_CONFIG_DIR", str(tmp_path / "cfg")) pact_context(team_name="test-team", session_id="test-session") + # A present-but-malformed variety (missing a per-dimension rationale) is + # the R4 arm retained after the #865 surgical split — it still fires a + # real advisory through main(), which is all this health-marker-shape test + # needs. (A bare-absent variety no longer fires R4 here; its enforcement + # moved to the dispatch-boundary gate.) + malformed_variety = _well_formed_variety() + malformed_variety.pop("novelty_rationale") payload = { "tool_name": "TaskCreate", "tool_input": { "subject": "implement foo", "owner": "pact-backend-coder", "addBlockedBy": ["41"], - "metadata": {"variety": None}, + "metadata": {"variety": malformed_variety}, }, "tool_response": {}, }