Skip to content

Commit bc5e8ed

Browse files
committed
ship: prepare lane for review
1 parent fd04444 commit bc5e8ed

38 files changed

Lines changed: 2784 additions & 386 deletions

apps/ade-cli/src/tuiClient/__tests__/ApprovalPrompt.test.tsx

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,4 +66,51 @@ describe("ApprovalPrompt", () => {
6666
expect(frame).toContain("enter");
6767
expect(frame).toContain("next/send");
6868
});
69+
70+
it("renders orchestration model-selection briefing metadata", () => {
71+
const approval: PendingApproval = {
72+
itemId: "model-1",
73+
description: "Build the orchestration roster.",
74+
highStakes: false,
75+
mode: "question",
76+
request: {
77+
requestId: "model-1",
78+
source: "ade",
79+
kind: "model_selection",
80+
title: "Pick a model for the web-ui worker",
81+
description: "Build the orchestration roster.",
82+
allowsFreeform: true,
83+
blocking: true,
84+
canProceedWithoutAnswer: false,
85+
providerMetadata: {
86+
role: "worker",
87+
tag: "web-ui",
88+
workDescription: "Build the orchestration roster.",
89+
filesHint: ["OrchestrationPanel.tsx", "TaskCard.tsx"],
90+
dependsOn: ["planning-rounds", "model-routing"],
91+
},
92+
questions: [
93+
{
94+
id: "model",
95+
header: "Model",
96+
question: "Which model should the web-ui worker use?",
97+
},
98+
],
99+
},
100+
};
101+
102+
const frame = stripAnsi(render(
103+
<ApprovalPrompt
104+
approval={approval}
105+
questionState={createPendingQuestionSelectionState(approval)}
106+
width={100}
107+
/>,
108+
).lastFrame() ?? "");
109+
110+
expect(frame).toContain("MODEL SELECTION");
111+
expect(frame).toContain("Description: Build the orchestration roster.");
112+
expect(frame).toContain("Files: OrchestrationPanel.tsx, TaskCard.tsx");
113+
expect(frame).toContain("Runs after: planning-rounds, model-routing");
114+
expect(frame).toContain("Which model should the web-ui worker use?");
115+
});
69116
});

apps/ade-cli/src/tuiClient/components/ApprovalPrompt.tsx

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,34 @@ function pendingInputAccent(source: string | null | undefined): string {
4646
return theme.provider(normalized as AdeCodeProvider).color;
4747
}
4848

49+
function stringValue(value: unknown): string | null {
50+
return typeof value === "string" && value.trim() ? value.trim() : null;
51+
}
52+
53+
function stringListValue(value: unknown): string[] {
54+
return Array.isArray(value)
55+
? value.filter((entry): entry is string => typeof entry === "string" && entry.trim().length > 0)
56+
: [];
57+
}
58+
59+
function compactList(values: string[], limit = 3): string {
60+
const visible = values.slice(0, limit);
61+
const suffix = values.length > limit ? `, +${values.length - limit} more` : "";
62+
return `${visible.join(", ")}${suffix}`;
63+
}
64+
65+
function modelSelectionBriefing(metadata: Record<string, unknown> | undefined): Array<[string, string]> {
66+
if (!metadata) return [];
67+
const rows: Array<[string, string]> = [];
68+
const description = stringValue(metadata.workDescription);
69+
const files = stringListValue(metadata.filesHint);
70+
const dependsOn = stringListValue(metadata.dependsOn);
71+
if (description) rows.push(["Description", description]);
72+
if (files.length) rows.push(["Files", compactList(files)]);
73+
if (dependsOn.length) rows.push(["Runs after", compactList(dependsOn)]);
74+
return rows;
75+
}
76+
4977
/**
5078
* An access-key prefix shown immediately before an action's pill, e.g. the `a`
5179
* in `a [ approve ]`. Accentuated when the action is highlighted.
@@ -174,6 +202,9 @@ export function ApprovalPrompt({
174202
const answeredCount = isQuestion
175203
? pendingQuestionAnsweredCount(approval.request, questionState?.answers ?? {})
176204
: 0;
205+
const briefingRows = kind === "model_selection"
206+
? modelSelectionBriefing(approval.request?.providerMetadata)
207+
: [];
177208

178209
const card = (
179210
<Box
@@ -220,6 +251,16 @@ export function ApprovalPrompt({
220251
</Text>
221252
) : null}
222253

254+
{briefingRows.length ? (
255+
<Box flexDirection="column" marginTop={1}>
256+
{briefingRows.map(([label, value]) => (
257+
<Text key={label} color={theme.color.t3} wrap="truncate-end">
258+
{truncateEnd(`${label}: ${value}`, textWidth)}
259+
</Text>
260+
))}
261+
</Box>
262+
) : null}
263+
223264
{isQuestion && questions.length ? (
224265
<Box flexDirection="column" marginTop={1}>
225266
{questions.map((entry, questionIndex) => {

apps/desktop/resources/agent-skills/ade-orchestrator/SKILL.md

Lines changed: 34 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -33,41 +33,33 @@ When you accept an override:
3333

3434
## §3 — Planning protocol (lead only)
3535

36-
**Planning is interactive. You MUST ask the user questions at each step — do not silently plan and present a finished plan. The steps below are sequential; complete each one (with user confirmation) before moving to the next.**
36+
**Planning is a deterministic, gated sequence — server-enforced, not a suggestion.** It mirrors the user's dev loop: **context intake → three deliberation rounds (functional → UI → extras) → validation derivation → model picks → approval**. The gates physically block you: `askUserForModelSelection` is locked until all three rounds are recorded, and `requestPlanApproval` is locked until intake + rounds + validation steps exist. You cannot skip ahead by writing prose. Do not silently plan and present a finished plan — every round asks the user real questions through the question card.
3737

38-
1. Read `goal.md` if present in the lane worktree; otherwise `askUser` for a one-line goal. Persist it to `manifest.goalSummary`.
38+
The planning state lives in `manifest.leadState.planning.stage` and advances `intake → round_functional → round_ui → round_extras → rounds_complete → ready`. Each transition is written only through the tools below (you are denied raw patch access to `/leadState/planning` and `/planSpec`).
3939

40-
2. **Codebase intake — inspect-first, ask-on-uncertainty.** Read `CLAUDE.md`, `README.md`, package manifests (`package.json` / `pyproject.toml` / `Cargo.toml` / `go.mod` / etc.), CI config (`.github/workflows/`, `.circleci/`, `.gitlab-ci.yml`), top-level directory listing, recent `git log --oneline -50`. Infer: project shape, test stack, ancillary surfaces (docs/, mobile apps, SDKs, OpenAPI specs), available CI gates, doc structure.
40+
1. **Goal.** Read `goal.md` if present in the lane worktree; otherwise `askUser` for a one-line goal. Persist it to `manifest.goalSummary`.
4141

42-
3. Propose a **tag taxonomy** (3–6 tags) and confirm via `askUser`. Tags are project-specific, not preset. Examples by shape:
43-
- Fullstack web → `web-ui` / `backend` / `docs` / `tests`
44-
- Graphics → `render-pipeline` / `shaders` / `assets`
45-
- Mobile → `swiftui` / `storekit` / `share-extension`
46-
- Library → `core-api` / `examples` / `docs`
42+
2. **Codebase intake (the `/context` step) — REQUIRED FIRST.** Read `CLAUDE.md`/`README.md`, package manifests (`package.json` / `pyproject.toml` / `Cargo.toml` / `go.mod`), CI config (`.github/workflows/` etc.), the top-level directory listing, and recent `git log`/`git diff main`. `planAppend` a human-readable **"Codebase intake"** section, then call **`recordCodebaseIntake({ projectShape, testStack, inFlightWork, ancillarySurfaces, docMap, ciGates })`**. This advances the stage to `round_functional`; nothing else unlocks until it is recorded.
4743

48-
4. Propose **tasks** per phase. For Developing tasks, include `filesHint` derived from the intake (files most likely to be touched).
44+
3. **Three deliberation rounds (the `/plan` step).** Run each with **`askPlanningRound`**, in order — the tool enforces it:
45+
- **Round 1 — functional** (`kind: "functional"`): resolve the real functional ambiguities. Offer concrete `options` with tradeoffs in `description`; never ask the user to write prose.
46+
- **Round 2 — UI** (`kind: "ui"`): put an **ASCII wireframe in each option's `preview`** (rendered as a monospace box). If the change has no UI, offer a single "N/A — no UI" option.
47+
- **Round 3 — extras** (`kind: "extras"`, usually `multiSelect: true`): delightful extras the user didn't ask for but might want.
48+
Always pass `lockedSummary` (your one-line locked outcome). **Cascade rule:** if the user introduces new functional scope mid-plan, run a focused mini-round for just that piece (`askPlanningRound({ cascadedFrom: <round id>, ... })`) and merge it — do not redesign locked decisions.
4949

50-
5. **Plan quality minimum.** The plan may include any extra detail that helps the user or workers, but before approval it must include at least:
51-
- Goal, assumptions, and locked user decisions.
52-
- In-scope work.
53-
- Clear out-of-scope / non-goals.
54-
- Alternatives, options, or tradeoffs considered for major choices.
55-
- UI / UX / user-facing decisions when applicable, or an explicit "not applicable" note.
56-
- Planned implementation order, dependencies, and what can run in parallel.
57-
- Agent plan: worker / validator tags to spawn, model-routing status, and what each owns.
58-
- Coordination/logging plan: how `plan.md` and the manifest stay updated as agents start, fail, discover gaps, finish, and replan.
59-
- Validation / proof plan with concrete checks or evidence derived from the repo.
60-
- Plan presentation details for the plan pane. Use GFM tables, mermaid fences, images, and links to `artifacts/ui/*.html` for design specs. Do not embed raw iframes; ADE renders `artifacts/ui/*.html` links as sandboxed previews with a full-design action.
50+
4. **Tag taxonomy + tasks.** Propose a project-specific tag taxonomy (3–6 tags, e.g. `web-ui` / `backend` / `docs`). Create tasks per phase via `manifestPatch`; for Developing tasks include `filesHint` derived from the intake.
6151

62-
6. **Validation step derivation.** See §6. Detect which `ValidationConcern`s apply by inspecting the repo; ask the user where uncertain; write codebase-specific `prompt` text into each `validationStrategy.steps[]` entry. Do not assume vitest / pytest / specific CI commands unless the inspection confirmed them.
52+
5. **Validation derivation (the `/quality` + `/test` step).** See §6. Detect which `ValidationConcern`s apply by inspecting the repo and write codebase-specific `prompt` text into each `validationStrategy.steps[]` entry. At least one validation step is required before approval (or log a skip-validation override — see §1). Per-worker tasks get `reverify_changes`; the heavier `/quality` dual-review + `/test` stewardship + parity run as the `validating` phase panel.
6353

64-
7. **Model picks.** For every `(role, tag)` pair (where role ∈ `worker`, `validator`), call `askUserForModelSelection(role, tag, workDescription)`. Always include a short `workDescription` (one sentence) explaining what this agent will do — e.g. "Implement the login form component and auth route" not just "renderer worker". The picker UI is ADE's in-house `ModelPicker` — never present a flat option list. Model selection must happen during planning, before `requestPlanApproval`. The tool will reject calls after the plan is approved.
54+
6. **Model picks.** Now unlocked. For every `(role, tag)` pair, call **`askUserForModelSelection({ role, tag, workDescription, filesHint, dependsOn })`**. Always include a one-sentence `workDescription` and, when known, `filesHint` (files it will touch) and `dependsOn` the picker renders these as an agent briefing so the user can choose a fitting model. Never present a flat option list.
6555

66-
8. Append a `DecisionLogEntry` per lock-in (tags, validation strategy, model routing, etc.). Each entry carries `source: "lead"`, `at`, and a short `summary`.
56+
7. **plan.md is the single source of truth.** Author the plan narrative incrementally as each round locks — `planAppend` the required sections so the user watches the plan grow live on the sidebar. The required sections (checked structurally at approval): **Goal · In scope · Out of scope · Alternatives · Implementation order · Agent plan · Validation plan · UI decisions (or N/A) · Coordination.** Use GFM tables, mermaid fences, and links to `artifacts/ui/*.html` for specs (rendered as sandboxed previews). There is no separate "approval summary" — the user approves the live plan.md.
6757

68-
9. **Plan-ready gate.** Once Planning is complete, append a final plan-ready note and tell the user they can keep planning in chat or review the plan pane. Then call `requestPlanApproval` / present a `kind: "plan_approval"` pending input that summarises the proposed plan. This surfaces the plan-pane **Implement** button. The approval summary must pass the plan quality minimum above. **Until the user clicks Implement or otherwise approves, do not call `spawnAgent`.**
58+
8. **Approval.** Call **`requestPlanApproval`** (no summary argument — it reads the live plan.md). It marks planning ready, runs the structural readiness check over plan.md + manifest state, and surfaces the **Implement** button on the plan narrative. On approval the run advances to `developing`; on decline it records `changes_requested` so the panel can show a re-approval diff. **Until the user approves, `spawnAgent` is blocked.**
6959

70-
10. **Live plan sync.** During Developing and Validating, keep `plan.md` synchronized as the shared operations log. Append worker starts, ownership changes, failures, material discoveries, re-plans, validation evidence, and final handoff notes so every agent can understand the live run without reading private chat transcripts.
60+
9. **User override (§1).** If the user explicitly waives a round ("no UI here, skip it") or validation, call **`recordPlanningOverride({ skippedRounds, skipReason })`** and log a `UserOverrideEntry`. Skipped rounds are then treated as satisfied by the gate.
61+
62+
10. **Live plan sync.** During Developing and Validating, keep `plan.md` synchronized as the shared operations log — worker starts, ownership changes, failures, material discoveries, re-plans, validation evidence, and final handoff notes.
7163

7264
## §4 — Developing protocol (worker only)
7365

@@ -105,7 +97,23 @@ When the planner writes a `validationStrategy.steps[]` entry, pick a `Validation
10597

10698
**Planner derivation.** Write the prompt naming the file types the worker is touching and the relevant edge-case categories for *this* codebase. No vitest / React / specific tooling unless the inspection confirmed it exists.
10799

108-
### `test_suite_truthfulness` (automate principle, only when codebase has tests)
100+
### The validation panel (how the heavy pass runs)
101+
102+
The Validating phase runs as a **lean perspective-diverse panel**: spawn a small set of validators, each with a distinct lens, then synthesize. Call `proposeValidationSteps` to get codebase-aware suggestions seeded from the intake; review, edit, and write the ones you want via `manifestPatch`. Validators emit **structured findings** through `recordValidationRun({ findings: [{ severity, locus, title, fix, regressionTestTarget }] })` — the panel rolls these into a Blocker/High/Medium/Low table, and every Blocker/High must carry a `regressionTestTarget` (the named test that pins it). Keep the panel small (one validator per lens, not a fan-out).
103+
104+
### `dual_review_correctness_security` (the /quality correctness + security track)
105+
106+
**Principle.** Review the whole diff for bugs, broken existing features (trace cross-app/IPC side effects), unhandled error branches, and the security surface (secrets, permission/allowlist gaps, data-integrity). Emit structured findings with honest severity; never pad.
107+
108+
### `dual_review_maintainability` (the /quality maintainability track)
109+
110+
**Principle.** Review the diff for structural simplification, dead code, spaghetti conditionals, unnecessary optionality/casts, and feature logic leaking into shared/canonical layers. Each finding names the smallest behavior-preserving fix.
111+
112+
### `regression_pinning` (ties /quality → /test)
113+
114+
**Principle.** Turn every Blocker/High from the dual-review into a named regression test that fails on the bug and passes once fixed. A finding is not handled until a test pins it. Only meaningful when the codebase has tests.
115+
116+
### `test_suite_truthfulness` / `test_stewardship` (automate principle, only when codebase has tests)
109117

110118
**Principle.** "Leave the suite more truthful and smaller, not just larger." Three passes in order:
111119
- **PRUNE** — orphaned tests, `skip` / `only` / `todo`, anti-pattern tests like `expect(true)` or zero-assertion bodies, over-mocked fixtures, render-only UI tests.

0 commit comments

Comments
 (0)