Align prompt output specs with compact check output

easel · easel · commit 8e0161f678e9 · 2026-02-04T15:27:05.000-05:00
diff --git a/cmd/dun/main.go b/cmd/dun/main.go
@@ -292,7 +292,8 @@ func runCheck(args []string, stdout io.Writer, stderr io.Writer) int {
 	case "llm":
 		printLLM(stdout, result)
 	case "json", "prompt":
-		if err := json.NewEncoder(stdout).Encode(result); err != nil {
+		compact := compactResultForOutput(result, root)
+		if err := json.NewEncoder(stdout).Encode(compact); err != nil {
 			fmt.Fprintf(stderr, "encode json: %v\n", err)
 			return dun.ExitCheckFailed
 		}
@@ -1157,6 +1158,32 @@ func printLLM(stdout io.Writer, result dun.Result) {
 	}
 }
 
+func compactResultForOutput(result dun.Result, root string) dun.Result {
+	stateHash := repoStateHashFn(root)
+	out := dun.Result{Checks: make([]dun.CheckResult, len(result.Checks))}
+	for i, check := range result.Checks {
+		out.Checks[i] = check
+		if check.Prompt == nil {
+			continue
+		}
+		compact := *check.Prompt
+		taskHint := "Prompt omitted. Run `dun check --prompt` to get task IDs, then `dun task <id> --prompt`."
+		if stateHash != "" {
+			group := buildTaskGroup(check, stateHash)
+			taskID := ""
+			if len(group.Tasks) > 0 {
+				taskID = group.Tasks[0].ID
+			}
+			if taskID != "" {
+				taskHint = fmt.Sprintf("Prompt omitted. Run `dun task %s --prompt` to view full prompt.", taskID)
+			}
+		}
+		compact.Prompt = taskHint
+		out.Checks[i].Prompt = &compact
+	}
+	return out
+}
+
 func runVersion(args []string, stdout io.Writer, stderr io.Writer) int {
 	fs := flag.NewFlagSet("version", flag.ContinueOnError)
 	fs.SetOutput(stderr)
diff --git a/cmd/dun/main_test.go b/cmd/dun/main_test.go
@@ -257,6 +257,51 @@ func TestRunTaskPrompt(t *testing.T) {
 	}
 }
 
+func TestRunCheckJSONOmitsPromptContent(t *testing.T) {
+	root := setupEmptyRepo(t)
+	origCheck := checkRepo
+	origHash := repoStateHashFn
+	repoStateHashFn = func(string) string { return "deadbeef" }
+	checkRepo = func(_ string, _ dun.Options) (dun.Result, error) {
+		return dun.Result{
+			Checks: []dun.CheckResult{
+				{
+					ID:     "agent-check",
+					Status: "prompt",
+					Signal: "prompt signal",
+					Prompt: &dun.PromptEnvelope{
+						Kind:   "dun.prompt.v1",
+						ID:     "agent-check",
+						Prompt: "Check-ID: agent-check\n\nThis is a big prompt.",
+						Callback: dun.PromptCallback{
+							Command: "dun respond --id agent-check --response -",
+							Stdin:   true,
+						},
+					},
+				},
+			},
+		}, nil
+	}
+	t.Cleanup(func() {
+		checkRepo = origCheck
+		repoStateHashFn = origHash
+	})
+
+	var stdout bytes.Buffer
+	var stderr bytes.Buffer
+	code := runInDirWithWriters(t, root, []string{"check", "--format=json"}, &stdout, &stderr)
+	if code != dun.ExitSuccess {
+		t.Fatalf("expected success, got %d: %s", code, stderr.String())
+	}
+	output := stdout.String()
+	if strings.Contains(output, "This is a big prompt.") {
+		t.Fatalf("expected prompt content to be omitted")
+	}
+	if !strings.Contains(output, "dun task agent-check@deadbeef --prompt") {
+		t.Fatalf("expected prompt hint to include task id")
+	}
+}
+
 func TestRunCheckJSONEncodeError(t *testing.T) {
 	root := setupEmptyRepo(t)
 	errWriter := &failWriter{err: errors.New("write failed")}
diff --git a/docs/design/contracts/API-001-dun-cli.md b/docs/design/contracts/API-001-dun-cli.md
@@ -38,7 +38,9 @@ $ dun [command] [options] [arguments]
 - Schema: See Data Contracts (config schema)
 
 **Output**:
-- Format: `llm` text blocks or JSON
+- Format: prompt-as-data JSON by default (prompt payloads omitted; use
+  `dun task <id> --prompt` to fetch), `llm` text for `--format=llm`, JSON for
+  `--format=json`
 - Schema: See Data Contracts (output schema)
 
 **Exit Codes**:
@@ -52,7 +54,7 @@ $ dun [command] [options] [arguments]
 ```bash
 # Default prompt-as-data output
 $ dun check
-{"checks":[{"id":"helix-create-architecture","status":"prompt","signal":"agent prompt ready","prompt":{"kind":"dun.prompt.v1","id":"helix-create-architecture","prompt":"Check-ID: helix-create-architecture\n...","callback":{"command":"dun respond --id helix-create-architecture --response -","stdin":true}}}]}
+{"checks":[{"id":"helix-create-architecture","status":"prompt","signal":"agent prompt ready","prompt":{"kind":"dun.prompt.v1","id":"helix-create-architecture","prompt":"Prompt omitted. Run `dun task helix-create-architecture@abcd123 --prompt` to view full prompt.","callback":{"command":"dun respond --id helix-create-architecture --response -","stdin":true}}}]}
 
 # LLM output
 $ dun check --format=llm
@@ -433,6 +435,10 @@ equivalent JSON shape.
 }
 ```
 
+**Note**: When emitted via `dun check`, `prompt.prompt` may contain a compact
+placeholder instead of the full prompt. Use `dun task <id> --prompt` to
+retrieve the full prompt payload.
+
 ### Output Schema (list)
 ```json
 {
diff --git a/docs/helix/01-frame/features/F-002-output-formats.md b/docs/helix/01-frame/features/F-002-output-formats.md
@@ -4,9 +4,9 @@ dun:
   depends_on:
     - helix.prd
   review:
-    self_hash: 83b2a3c2ac4e9a760bd04598c3ff9c3ea3504c0f49e8d246cd9a61d540e87898
+    self_hash: 5d4226456b8fd1dca4daae652bbcd24fb50d14f2b1e01193db67cd5a5cf2da35
     deps:
-      helix.prd: 58d3c4be8edb0a0be9d01a3325824c9b350f758a998d02f16208525949c4f1ad
+      helix.prd: 07d49919dec51a33254b7630622ee086a5108ed5deecd456f7228f03712e699d
 ---
 # Feature Spec: F-002 Output Formats
 
@@ -17,7 +17,8 @@ consumption by humans and tools.
 
 ## Requirements
 
-- Default output format is prompt envelopes for agent checks.
+- Default output format is prompt envelopes for agent checks, but `dun check`
+  omits full prompt payloads (prompt field contains a task hint).
 - Provide `--format=llm` for concise human-readable summaries.
 - Provide `--format=json` for structured results.
 - The decision prompt (`dun check --prompt`) must list tasks without inlining
@@ -41,7 +42,8 @@ consumption by humans and tools.
 
 ## Acceptance Criteria
 
-- `dun check` emits prompt envelopes by default when agent checks are present.
+- `dun check` emits prompt envelope metadata by default when agent checks are
+  present; prompt payloads are omitted and replaced with a task hint.
 - `dun check --format=llm` prints concise summaries.
 - `dun check --format=json` emits structured JSON output.
 - `dun check --prompt` emits a compact, bounded task list (no inline prompt
@@ -50,8 +52,8 @@ consumption by humans and tools.
 - Default limits: top 10 tasks per check; summary <= 200 bytes; reason <= 160
   bytes; truncation uses `...`.
 - Task IDs include the repo-state hash and are rejected if stale.
-- JSON output remains a full check result (including prompt envelopes where
-  available); it is not size-bounded like the decision prompt.
+- JSON output remains structured and deterministic but omits full prompt
+  payloads; it is not size-bounded like the decision prompt.
 
 ## Gaps & Conflicts
 
diff --git a/docs/helix/01-frame/user-stories/US-002-output-formats.md b/docs/helix/01-frame/user-stories/US-002-output-formats.md
@@ -11,7 +11,8 @@ can consume results in the right format for my workflow.
 
 ## Acceptance Criteria
 
-- `dun check` emits prompt envelopes by default when agent checks are present.
+- `dun check` emits prompt envelope metadata by default when agent checks are
+  present; prompt payloads are omitted and replaced with a task hint.
 - `dun check --format=llm` prints concise summaries for humans.
 - `dun check --format=json` emits structured JSON output.
 - Output is deterministic for a given repo state.
diff --git a/docs/helix/01-frame/user-stories/US-016-task-workflow.md b/docs/helix/01-frame/user-stories/US-016-task-workflow.md
@@ -28,6 +28,7 @@ command to fetch full prompts,
 
 ### AC-3: Task Prompt Retrieval
 - [ ] `dun task <task-id>` prints a concise task summary.
+- [ ] Task summaries never include full prompt payloads.
 - [ ] `dun task <task-id> --prompt` prints the full prompt payload.
 - [ ] Decision prompt hints how to fetch the full prompt.
 
diff --git a/docs/helix/02-design/solution-designs/SD-002-output-formats.md b/docs/helix/02-design/solution-designs/SD-002-output-formats.md
@@ -4,9 +4,9 @@ dun:
   depends_on:
     - F-002
   review:
-    self_hash: f543f44dd37dea76fd38919c5f15a737ba8fbb46e621a5a1c83c526892fb0757
+    self_hash: 8c32ec1de236d13a6d5cbd49f766f542ae1222821a9de0fe451079e7ec351af0
     deps:
-      F-002: 83b2a3c2ac4e9a760bd04598c3ff9c3ea3504c0f49e8d246cd9a61d540e87898
+      F-002: 5d4226456b8fd1dca4daae652bbcd24fb50d14f2b1e01193db67cd5a5cf2da35
 ---
 # Solution Design: Output Formats
 
@@ -17,7 +17,8 @@ remain deterministic and easy to parse.
 
 ## Goals
 
-- Emit prompt envelopes by default for agent checks.
+- Emit prompt envelopes by default for agent checks, omitting full prompt
+  payloads from `dun check` output.
 - Provide `--format=llm` for concise human-readable summaries.
 - Provide `--format=json` for structured results.
 - Preserve deterministic ordering and stable results for a given repo state.
@@ -48,7 +49,7 @@ remain deterministic and easy to parse.
 ## Components
 
 - Result Model: canonical representation of check outcomes.
-- Prompt Emitter: renders prompt envelopes.
+- Prompt Emitter: renders prompt envelopes with compact prompt placeholders.
 - LLM Renderer: emits concise summaries.
 - JSON Renderer: emits structured machine output.
 - Output Selector: chooses renderer based on CLI flags.
diff --git a/docs/helix/02-design/technical-designs/TD-002-output-formats.md b/docs/helix/02-design/technical-designs/TD-002-output-formats.md
@@ -17,6 +17,8 @@ dun:
 - Provide prompt, LLM, and JSON output formats for check results.
 - Keep output deterministic and stable for automation.
 - Allow format selection via CLI flags and config.
+- Omit full prompt payloads from `dun check` output and expose them via
+  `dun task <id> --prompt`.
 
 ## Non-Goals
 
@@ -35,6 +37,8 @@ dun:
 ### Key Decisions
 
 - Default to `prompt` format for agent loops.
+- `dun check` output uses prompt placeholders; full prompts are retrieved via
+  `dun task`.
 - JSON output should be schema-stable to avoid breaking integrations.
 
 ## Component Changes
diff --git a/docs/helix/02-design/technical-designs/TD-016-task-workflow.md b/docs/helix/02-design/technical-designs/TD-016-task-workflow.md
@@ -39,6 +39,7 @@ full prompt payloads to a follow-up `dun task` command.
 **File**: `cmd/dun/task.go`
 
 - Add `dun task <task-id>` to emit task summary metadata.
+- Ensure task summaries never include full prompt payloads.
 - Add `--prompt` flag to print the full prompt.
 - Re-run checks to resolve the selected task in the current state.
 
diff --git a/docs/helix/03-test/test-plans/TP-002-output-formats.md b/docs/helix/03-test/test-plans/TP-002-output-formats.md
@@ -12,7 +12,7 @@ Test plan for US-002: Emit Output Formats for Agents and Tools.
 
 | ID | Criterion | Status |
 |----|-----------|--------|
-| AC-1 | `dun check` emits prompt envelopes by default when agent checks are present | Partially Covered |
+| AC-1 | `dun check` emits prompt envelopes by default when agent checks are present (prompt payloads omitted) | Partially Covered |
 | AC-2 | `dun check --format=llm` prints concise summaries for humans | Covered |
 | AC-3 | `dun check --format=json` emits structured JSON output | Covered |
 | AC-4 | Output is deterministic for a given repo state | Gap |
@@ -37,6 +37,7 @@ Test plan for US-002: Emit Output Formats for Agents and Tools.
 - No test verifies prompt envelope `kind` field is always `dun.prompt.v1`
 - No test verifies callback command format is correct across all agent checks
 - No test verifies prompt envelope contains required fields (id, prompt, callback)
+- No test verifies prompt payloads are omitted from `dun check` output
 
 ### AC-2: LLM Format Output
 
@@ -106,7 +107,7 @@ func TestCheckDefaultFormatIsPrompt(t *testing.T) {
         t.Fatalf("expected success, got %d", code)
     }
 
-    // Default output should be parseable as JSON with prompt envelope
+    // Default output should be parseable as JSON with prompt envelope placeholder
     var result dun.Result
     if err := json.Unmarshal(stdout.Bytes(), &result); err != nil {
         t.Fatalf("expected JSON output by default: %v", err)
@@ -120,6 +121,9 @@ func TestCheckDefaultFormatIsPrompt(t *testing.T) {
     if check.Prompt == nil {
         t.Fatalf("expected prompt envelope in default output")
     }
+    if !strings.Contains(check.Prompt.Prompt, "Prompt omitted") {
+        t.Fatalf("expected compact prompt placeholder, not full prompt payload")
+    }
 }
 ```
 
diff --git a/docs/helix/03-test/test-plans/TP-016-task-workflow.md b/docs/helix/03-test/test-plans/TP-016-task-workflow.md
@@ -31,7 +31,7 @@ This test plan verifies the IP-016 task workflow changes:
 | AC-1 | `dun check --prompt` lists bounded tasks with summaries + reasons |
 | AC-2 | Decision prompt omits full prompt payloads |
 | AC-3 | Task IDs include repo-state hash and are rejected when stale |
-| AC-4 | `dun task <task-id>` prints task summary metadata |
+| AC-4 | `dun task <task-id>` prints task summary metadata without full prompt payloads |
 | AC-5 | `dun task <task-id> --prompt` prints the full prompt |
 | AC-6 | Summary/reason text is truncated to configured byte limits |
 
@@ -53,7 +53,7 @@ This test plan verifies the IP-016 task workflow changes:
 | GAP-016-01 | Enforce max tasks per check (top N) | P1 | AC-1 |
 | GAP-016-02 | Stale task ID rejection (state mismatch) | P0 | AC-3 |
 | GAP-016-03 | Invalid task ID formatting errors | P1 | AC-3 |
-| GAP-016-04 | `dun task` summary output assertions | P1 | AC-4 |
+| GAP-016-04 | `dun task` summary output omits prompt payloads | P1 | AC-4 |
 | GAP-016-05 | Summary/reason truncation limits | P1 | AC-6 |
 
 ## 5. Proposed Test Cases
@@ -118,7 +118,7 @@ func TestRunTaskRejectsInvalidID(t *testing.T) {
 ```go
 func TestRunTaskSummaryOutput(t *testing.T) {
     // Given: a task ID for an issue
-    // Then: output includes summary, status, and check metadata
+    // Then: output includes summary, status, and check metadata without prompt payloads
 }
 ```