fix: populate agent_outcome diagnostic fields with safe extraction

anandgupta42 · claude · anandgupta42 · commit 259c4d7e310c · 2026-04-29T13:45:18.000-07:00
Telemetry showed ~30% of builder runs ending with `outcome` in
{abandoned, aborted, error} but `reason`, `final_tool`, and `error_class`
all empty — undiagnosable failures. This wires the three fields end-to-end
through a pure helper, with masking applied at the extraction site.

Changes:
- Extend `agent_outcome` event type with required `final_tool`,
  `error_class`, `reason` fields.
- Add `Telemetry.deriveAgentOutcomeReason()` pure helper that maps
  outcome + last-tool + last-error + abort-reason + last-error-class
  to diagnostic strings. Apply `maskString` to both `error` and
  `aborted` reasons (symmetric protection).
- In `prompt.ts`: track `lastToolName`, capture only `err.data.message`
  (not the whole `error.data`, which can carry `responseBody` /
  `responseHeaders` / API tokens), and apply `maskString` at extraction.
  Wire `lastErrorClass` from `errorRecords[]` so `aborted` outcomes
  surface the failing tool's classification.
- Refine `abort.reason` extraction: `instanceof Error` check,
  fallback to `"non_string_reason"` instead of `"[object Object]"`.
- Restore upstream single-line `if (processor.message.error) ...`
  to keep marker discipline clean during future merges.
- 16 unit tests covering all 4 outcomes, masking-actually-removes-secret
  assertion, MCP-namespaced tool names, truncation bounds, and
  `lastErrorClass` for `aborted`.
- Update `regression.test.ts` and `plan-skill-telemetry.test.ts`
  literals for the new required fields.

Reviewed via /consensus:code-review (9 reviewers, 1 convergence round).
Critical finding from all 9 reviewers: original code did
`JSON.stringify(error.data)` before masking, which both leaked
sensitive fields (responseBody, responseHeaders, metadata) and
collapsed all quoted content to `?` via `maskString`. Fix extracts
only the human-readable message field and masks at extraction.

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/packages/opencode/src/altimate/telemetry/index.ts b/packages/opencode/src/altimate/telemetry/index.ts
@@ -290,6 +290,11 @@ export namespace Telemetry {
         cost: number
         compactions: number
         outcome: "completed" | "abandoned" | "aborted" | "error"
+        // altimate_change start — agent_outcome diagnostic fields
+        final_tool: string
+        error_class: string
+        reason: string
+        // altimate_change end
       }
     | {
         type: "error_recovered"
@@ -780,6 +785,46 @@ export namespace Telemetry {
     }
   }
 
+  // altimate_change start — agent_outcome diagnostic field derivation
+  /** Derive diagnostic fields for the agent_outcome telemetry event.
+   *  Pure helper so the logic is unit-testable without standing up a full session.
+   *
+   *  Why: today the agent_outcome event ships with empty reason/final_tool/error_class
+   *  for every non-completed outcome, leaving ~30% of builder failures undiagnosable
+   *  in telemetry. This concentrates the rules in one place and gives us a guarantee
+   *  that the three fields are always populated (with explicit empty strings when
+   *  the outcome carries no diagnostic info — e.g. completed sessions).
+   */
+  export function deriveAgentOutcomeReason(input: {
+    outcome: "completed" | "abandoned" | "aborted" | "error"
+    lastToolName: string | null
+    lastMessageError: string | null
+    abortReason: string | null
+    lastErrorClass: string
+  }): { final_tool: string; error_class: string; reason: string } {
+    const final_tool = input.lastToolName ?? ""
+    switch (input.outcome) {
+      case "completed":
+        return { final_tool, error_class: "", reason: "" }
+      case "abandoned":
+        return { final_tool, error_class: "", reason: "no_tools_invoked" }
+      case "aborted": {
+        const reason = maskString(input.abortReason ?? "user_cancelled").slice(0, 200)
+        return { final_tool, error_class: input.lastErrorClass, reason }
+      }
+      case "error": {
+        const msg = input.lastMessageError ?? ""
+        const masked = maskString(msg).slice(0, 500)
+        return {
+          final_tool,
+          error_class: msg ? classifyError(msg) : "unknown",
+          reason: masked,
+        }
+      }
+    }
+  }
+  // altimate_change end
+
   // altimate_change start — expanded error classification patterns for better triage
   // Order matters: earlier patterns take priority. Use specific phrases, not
   // single words, to avoid false positives (e.g., "connection refused" not "connection").
diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts
@@ -332,6 +332,10 @@ export namespace SessionPrompt {
     let emergencySessionEndFired = false
     // altimate_change start — quality signal, tool chain, error fingerprint tracking
     let lastToolCategory = ""
+    // altimate_change start — agent_outcome diagnostic tracking
+    let lastToolName = ""
+    let lastMessageError = ""
+    // altimate_change end
     const toolChain: string[] = []
     let toolErrorCount = 0
     let errorRecoveryCount = 0
@@ -929,13 +933,29 @@ export namespace SessionPrompt {
       const stepParts = await MessageV2.parts(processor.message.id)
       toolCallCount += stepParts.filter((p) => p.type === "tool").length
       if (processor.message.error) sessionHadError = true
+      // altimate_change start — capture last message error for agent_outcome reason
+      if (processor.message.error) {
+        const err = processor.message.error as any
+        try {
+          const name = typeof err?.name === "string" ? err.name : "unknown"
+          const rawMessage = typeof err?.data?.message === "string" ? err.data.message : ""
+          const masked = rawMessage ? Telemetry.maskString(rawMessage).slice(0, 300) : ""
+          lastMessageError = masked ? `${name}: ${masked}` : String(name)
+        } catch {
+          lastMessageError = "unknown"
+        }
+      }
+      // altimate_change end
       // altimate_change start — quality signal + tool chain + error fingerprints
       const toolParts = stepParts.filter((p) => p.type === "tool")
       for (const part of toolParts) {
         if (part.type !== "tool") continue
         const toolType = part.tool.startsWith("mcp__") ? "mcp" as const : "standard" as const
         const toolCategory = Telemetry.categorizeToolName(part.tool, toolType)
         lastToolCategory = toolCategory
+        // altimate_change start — track last tool name for agent_outcome diagnostics
+        lastToolName = part.tool
+        // altimate_change end
         if (toolChain.length < 50) toolChain.push(part.tool)
         const isError = part.state?.status === "error"
         if (isError) {
@@ -1046,6 +1066,27 @@ export namespace SessionPrompt {
       })
     }
     // altimate_change end — emit quality signal, tool chain, and error fingerprint events
+    // altimate_change start — populate agent_outcome diagnostic fields
+    const abortReason: string | null = abort.aborted
+      ? (typeof abort.reason === "string"
+          ? abort.reason
+          : abort.reason instanceof Error
+            ? abort.reason.message
+            : abort.reason
+              ? "non_string_reason"
+              : null)
+      : null
+    const lastErrorClass = errorRecords.length > 0
+      ? errorRecords[errorRecords.length - 1].errorClass
+      : ""
+    const diag = Telemetry.deriveAgentOutcomeReason({
+      outcome,
+      lastToolName: lastToolName || null,
+      lastMessageError: lastMessageError || null,
+      abortReason,
+      lastErrorClass,
+    })
+    // altimate_change end
     Telemetry.track({
       type: "agent_outcome",
       timestamp: Date.now(),
@@ -1057,6 +1098,11 @@ export namespace SessionPrompt {
       cost: sessionTotalCost,
       compactions: compactionCount,
       outcome,
+      // altimate_change start — agent_outcome diagnostic fields
+      final_tool: diag.final_tool,
+      error_class: diag.error_class,
+      reason: diag.reason,
+      // altimate_change end
     })
     if (!emergencySessionEndFired) {
       emergencySessionEndFired = true
diff --git a/packages/opencode/test/altimate/agent-outcome-reason.test.ts b/packages/opencode/test/altimate/agent-outcome-reason.test.ts
@@ -0,0 +1,180 @@
+/**
+ * Unit tests for Telemetry.deriveAgentOutcomeReason — the helper that
+ * populates the diagnostic fields (final_tool, error_class, reason)
+ * on the agent_outcome telemetry event.
+ *
+ * Why these tests matter: ~30% of builder runs end with outcome != completed,
+ * and before this helper existed the event payload had empty diagnostic fields
+ * for all of them, making the failures undiagnosable from telemetry alone.
+ */
+import { describe, expect, test } from "bun:test"
+import { Telemetry } from "../../src/altimate/telemetry"
+
+const baseInput = {
+  lastToolName: null as string | null,
+  lastMessageError: null as string | null,
+  abortReason: null as string | null,
+  lastErrorClass: "",
+}
+
+describe("deriveAgentOutcomeReason", () => {
+  test("completed outcome: empty diagnostic fields, final_tool preserved", () => {
+    const out = Telemetry.deriveAgentOutcomeReason({
+      ...baseInput,
+      outcome: "completed",
+      lastToolName: "edit",
+    })
+    expect(out.final_tool).toBe("edit")
+    expect(out.error_class).toBe("")
+    expect(out.reason).toBe("")
+  })
+
+  test("completed outcome with no tool: final_tool empty", () => {
+    const out = Telemetry.deriveAgentOutcomeReason({ ...baseInput, outcome: "completed" })
+    expect(out.final_tool).toBe("")
+    expect(out.error_class).toBe("")
+    expect(out.reason).toBe("")
+  })
+
+  test("abandoned outcome: reason is 'no_tools_invoked'", () => {
+    const out = Telemetry.deriveAgentOutcomeReason({ ...baseInput, outcome: "abandoned" })
+    expect(out.final_tool).toBe("")
+    expect(out.error_class).toBe("")
+    expect(out.reason).toBe("no_tools_invoked")
+  })
+
+  test("aborted with explicit reason: reason carried through (masked)", () => {
+    const out = Telemetry.deriveAgentOutcomeReason({
+      ...baseInput,
+      outcome: "aborted",
+      lastToolName: "sql_execute",
+      abortReason: "user pressed escape",
+    })
+    expect(out.final_tool).toBe("sql_execute")
+    expect(out.error_class).toBe("")
+    expect(out.reason).toBe("user pressed escape")
+  })
+
+  test("aborted without reason: defaults to 'user_cancelled'", () => {
+    const out = Telemetry.deriveAgentOutcomeReason({
+      ...baseInput,
+      outcome: "aborted",
+      lastToolName: "edit",
+    })
+    expect(out.final_tool).toBe("edit")
+    expect(out.reason).toBe("user_cancelled")
+  })
+
+  test("aborted reason is masked (quoted secrets stripped)", () => {
+    const out = Telemetry.deriveAgentOutcomeReason({
+      ...baseInput,
+      outcome: "aborted",
+      abortReason: 'cancel because "sk-secret-token-12345"',
+    })
+    expect(out.reason).not.toContain("sk-secret-token-12345")
+  })
+
+  test("aborted reason is truncated to 200 chars", () => {
+    const longReason = "x".repeat(500)
+    const out = Telemetry.deriveAgentOutcomeReason({
+      ...baseInput,
+      outcome: "aborted",
+      abortReason: longReason,
+    })
+    expect(out.reason.length).toBe(200)
+  })
+
+  test("aborted with prior tool error: surfaces lastErrorClass", () => {
+    const out = Telemetry.deriveAgentOutcomeReason({
+      ...baseInput,
+      outcome: "aborted",
+      lastToolName: "data_diff",
+      lastErrorClass: "connection",
+      abortReason: "user_cancelled",
+    })
+    expect(out.error_class).toBe("connection")
+  })
+
+  test("error outcome with file_not_found message: classified", () => {
+    const out = Telemetry.deriveAgentOutcomeReason({
+      ...baseInput,
+      outcome: "error",
+      lastToolName: "read",
+      lastMessageError: "ENOENT: no such file or directory",
+    })
+    expect(out.final_tool).toBe("read")
+    expect(out.error_class).toBe("file_not_found")
+    expect(out.reason).toContain("ENOENT")
+  })
+
+  test("error outcome with edit_mismatch message: classified", () => {
+    const out = Telemetry.deriveAgentOutcomeReason({
+      ...baseInput,
+      outcome: "error",
+      lastToolName: "edit",
+      lastMessageError: "could not find oldString in file",
+    })
+    expect(out.error_class).toBe("edit_mismatch")
+  })
+
+  test("error outcome with unknown message: classified as 'unknown'", () => {
+    const out = Telemetry.deriveAgentOutcomeReason({
+      ...baseInput,
+      outcome: "error",
+      lastMessageError: "something weird happened that nobody anticipated",
+    })
+    expect(out.error_class).toBe("unknown")
+  })
+
+  test("error outcome with empty message: error_class is 'unknown'", () => {
+    const out = Telemetry.deriveAgentOutcomeReason({ ...baseInput, outcome: "error" })
+    expect(out.error_class).toBe("unknown")
+    expect(out.reason).toBe("")
+  })
+
+  test("error reason masking: quoted API key is stripped", () => {
+    const errMsg = 'request failed with "sk-abcdef0123456789"'
+    const out = Telemetry.deriveAgentOutcomeReason({
+      ...baseInput,
+      outcome: "error",
+      lastMessageError: errMsg,
+    })
+    expect(out.reason).not.toContain("sk-abcdef0123456789")
+    expect(out.reason.length).toBeLessThanOrEqual(500)
+  })
+
+  test("error reason: truncated to 500 chars", () => {
+    const longErr = "boom: ".concat("a".repeat(1000))
+    const out = Telemetry.deriveAgentOutcomeReason({
+      ...baseInput,
+      outcome: "error",
+      lastMessageError: longErr,
+    })
+    expect(out.reason.length).toBeLessThanOrEqual(500)
+  })
+
+  test("MCP-namespaced tool name preserved verbatim in final_tool", () => {
+    const out = Telemetry.deriveAgentOutcomeReason({
+      ...baseInput,
+      outcome: "completed",
+      lastToolName: "mcp__playwright__navigate",
+    })
+    expect(out.final_tool).toBe("mcp__playwright__navigate")
+  })
+
+  test("all four outcomes always populate the three fields (no undefined)", () => {
+    const outcomes = ["completed", "abandoned", "aborted", "error"] as const
+    for (const outcome of outcomes) {
+      const out = Telemetry.deriveAgentOutcomeReason({
+        outcome,
+        lastToolName: "x",
+        lastMessageError: "fail",
+        abortReason: "cancel",
+        lastErrorClass: "unknown",
+      })
+      expect(typeof out.final_tool).toBe("string")
+      expect(typeof out.error_class).toBe("string")
+      expect(typeof out.reason).toBe("string")
+    }
+  })
+})
diff --git a/packages/opencode/test/session/regression.test.ts b/packages/opencode/test/session/regression.test.ts
@@ -335,6 +335,9 @@ describe("compaction count in telemetry", () => {
       cost: 0.05,
       compactions: compactionCount,
       outcome: "completed" as const,
+      final_tool: "",
+      error_class: "",
+      reason: "",
     }
 
     expect(event.compactions).toBe(2)
diff --git a/packages/opencode/test/telemetry/plan-skill-telemetry.test.ts b/packages/opencode/test/telemetry/plan-skill-telemetry.test.ts
@@ -184,6 +184,9 @@ describe("telemetry.agent-outcome", () => {
         cost: 0.05,
         compactions: 0,
         outcome,
+        final_tool: "",
+        error_class: "",
+        reason: "",
       }
       expect(event.outcome).toBe(outcome)
       expect(event.agent).toBe("plan")

Original file line number	Diff line number	Diff line change
`@@ -335,6 +335,9 @@ describe("compaction count in telemetry", () => {`
`335`	`335`	`cost: 0.05,`
`336`	`336`	`compactions: compactionCount,`
`337`	`337`	`outcome: "completed" as const,`
	`338`	`+ final_tool: "",`
	`339`	`+ error_class: "",`
	`340`	`+ reason: "",`
`338`	`341`	`}`
`339`	`342`
`340`	`343`	`expect(event.compactions).toBe(2)`