Skip to content

Commit 259c4d7

Browse files
anandgupta42claude
andcommitted
fix: populate agent_outcome diagnostic fields with safe extraction
Telemetry showed ~30% of builder runs ending with `outcome` in {abandoned, aborted, error} but `reason`, `final_tool`, and `error_class` all empty — undiagnosable failures. This wires the three fields end-to-end through a pure helper, with masking applied at the extraction site. Changes: - Extend `agent_outcome` event type with required `final_tool`, `error_class`, `reason` fields. - Add `Telemetry.deriveAgentOutcomeReason()` pure helper that maps outcome + last-tool + last-error + abort-reason + last-error-class to diagnostic strings. Apply `maskString` to both `error` and `aborted` reasons (symmetric protection). - In `prompt.ts`: track `lastToolName`, capture only `err.data.message` (not the whole `error.data`, which can carry `responseBody` / `responseHeaders` / API tokens), and apply `maskString` at extraction. Wire `lastErrorClass` from `errorRecords[]` so `aborted` outcomes surface the failing tool's classification. - Refine `abort.reason` extraction: `instanceof Error` check, fallback to `"non_string_reason"` instead of `"[object Object]"`. - Restore upstream single-line `if (processor.message.error) ...` to keep marker discipline clean during future merges. - 16 unit tests covering all 4 outcomes, masking-actually-removes-secret assertion, MCP-namespaced tool names, truncation bounds, and `lastErrorClass` for `aborted`. - Update `regression.test.ts` and `plan-skill-telemetry.test.ts` literals for the new required fields. Reviewed via /consensus:code-review (9 reviewers, 1 convergence round). Critical finding from all 9 reviewers: original code did `JSON.stringify(error.data)` before masking, which both leaked sensitive fields (responseBody, responseHeaders, metadata) and collapsed all quoted content to `?` via `maskString`. Fix extracts only the human-readable message field and masks at extraction. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 261fe8e commit 259c4d7

5 files changed

Lines changed: 277 additions & 0 deletions

File tree

packages/opencode/src/altimate/telemetry/index.ts

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,11 @@ export namespace Telemetry {
290290
cost: number
291291
compactions: number
292292
outcome: "completed" | "abandoned" | "aborted" | "error"
293+
// altimate_change start — agent_outcome diagnostic fields
294+
final_tool: string
295+
error_class: string
296+
reason: string
297+
// altimate_change end
293298
}
294299
| {
295300
type: "error_recovered"
@@ -780,6 +785,46 @@ export namespace Telemetry {
780785
}
781786
}
782787

788+
// altimate_change start — agent_outcome diagnostic field derivation
789+
/** Derive diagnostic fields for the agent_outcome telemetry event.
790+
* Pure helper so the logic is unit-testable without standing up a full session.
791+
*
792+
* Why: today the agent_outcome event ships with empty reason/final_tool/error_class
793+
* for every non-completed outcome, leaving ~30% of builder failures undiagnosable
794+
* in telemetry. This concentrates the rules in one place and gives us a guarantee
795+
* that the three fields are always populated (with explicit empty strings when
796+
* the outcome carries no diagnostic info — e.g. completed sessions).
797+
*/
798+
export function deriveAgentOutcomeReason(input: {
799+
outcome: "completed" | "abandoned" | "aborted" | "error"
800+
lastToolName: string | null
801+
lastMessageError: string | null
802+
abortReason: string | null
803+
lastErrorClass: string
804+
}): { final_tool: string; error_class: string; reason: string } {
805+
const final_tool = input.lastToolName ?? ""
806+
switch (input.outcome) {
807+
case "completed":
808+
return { final_tool, error_class: "", reason: "" }
809+
case "abandoned":
810+
return { final_tool, error_class: "", reason: "no_tools_invoked" }
811+
case "aborted": {
812+
const reason = maskString(input.abortReason ?? "user_cancelled").slice(0, 200)
813+
return { final_tool, error_class: input.lastErrorClass, reason }
814+
}
815+
case "error": {
816+
const msg = input.lastMessageError ?? ""
817+
const masked = maskString(msg).slice(0, 500)
818+
return {
819+
final_tool,
820+
error_class: msg ? classifyError(msg) : "unknown",
821+
reason: masked,
822+
}
823+
}
824+
}
825+
}
826+
// altimate_change end
827+
783828
// altimate_change start — expanded error classification patterns for better triage
784829
// Order matters: earlier patterns take priority. Use specific phrases, not
785830
// single words, to avoid false positives (e.g., "connection refused" not "connection").

packages/opencode/src/session/prompt.ts

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,10 @@ export namespace SessionPrompt {
332332
let emergencySessionEndFired = false
333333
// altimate_change start — quality signal, tool chain, error fingerprint tracking
334334
let lastToolCategory = ""
335+
// altimate_change start — agent_outcome diagnostic tracking
336+
let lastToolName = ""
337+
let lastMessageError = ""
338+
// altimate_change end
335339
const toolChain: string[] = []
336340
let toolErrorCount = 0
337341
let errorRecoveryCount = 0
@@ -929,13 +933,29 @@ export namespace SessionPrompt {
929933
const stepParts = await MessageV2.parts(processor.message.id)
930934
toolCallCount += stepParts.filter((p) => p.type === "tool").length
931935
if (processor.message.error) sessionHadError = true
936+
// altimate_change start — capture last message error for agent_outcome reason
937+
if (processor.message.error) {
938+
const err = processor.message.error as any
939+
try {
940+
const name = typeof err?.name === "string" ? err.name : "unknown"
941+
const rawMessage = typeof err?.data?.message === "string" ? err.data.message : ""
942+
const masked = rawMessage ? Telemetry.maskString(rawMessage).slice(0, 300) : ""
943+
lastMessageError = masked ? `${name}: ${masked}` : String(name)
944+
} catch {
945+
lastMessageError = "unknown"
946+
}
947+
}
948+
// altimate_change end
932949
// altimate_change start — quality signal + tool chain + error fingerprints
933950
const toolParts = stepParts.filter((p) => p.type === "tool")
934951
for (const part of toolParts) {
935952
if (part.type !== "tool") continue
936953
const toolType = part.tool.startsWith("mcp__") ? "mcp" as const : "standard" as const
937954
const toolCategory = Telemetry.categorizeToolName(part.tool, toolType)
938955
lastToolCategory = toolCategory
956+
// altimate_change start — track last tool name for agent_outcome diagnostics
957+
lastToolName = part.tool
958+
// altimate_change end
939959
if (toolChain.length < 50) toolChain.push(part.tool)
940960
const isError = part.state?.status === "error"
941961
if (isError) {
@@ -1046,6 +1066,27 @@ export namespace SessionPrompt {
10461066
})
10471067
}
10481068
// altimate_change end — emit quality signal, tool chain, and error fingerprint events
1069+
// altimate_change start — populate agent_outcome diagnostic fields
1070+
const abortReason: string | null = abort.aborted
1071+
? (typeof abort.reason === "string"
1072+
? abort.reason
1073+
: abort.reason instanceof Error
1074+
? abort.reason.message
1075+
: abort.reason
1076+
? "non_string_reason"
1077+
: null)
1078+
: null
1079+
const lastErrorClass = errorRecords.length > 0
1080+
? errorRecords[errorRecords.length - 1].errorClass
1081+
: ""
1082+
const diag = Telemetry.deriveAgentOutcomeReason({
1083+
outcome,
1084+
lastToolName: lastToolName || null,
1085+
lastMessageError: lastMessageError || null,
1086+
abortReason,
1087+
lastErrorClass,
1088+
})
1089+
// altimate_change end
10491090
Telemetry.track({
10501091
type: "agent_outcome",
10511092
timestamp: Date.now(),
@@ -1057,6 +1098,11 @@ export namespace SessionPrompt {
10571098
cost: sessionTotalCost,
10581099
compactions: compactionCount,
10591100
outcome,
1101+
// altimate_change start — agent_outcome diagnostic fields
1102+
final_tool: diag.final_tool,
1103+
error_class: diag.error_class,
1104+
reason: diag.reason,
1105+
// altimate_change end
10601106
})
10611107
if (!emergencySessionEndFired) {
10621108
emergencySessionEndFired = true
Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
/**
2+
* Unit tests for Telemetry.deriveAgentOutcomeReason — the helper that
3+
* populates the diagnostic fields (final_tool, error_class, reason)
4+
* on the agent_outcome telemetry event.
5+
*
6+
* Why these tests matter: ~30% of builder runs end with outcome != completed,
7+
* and before this helper existed the event payload had empty diagnostic fields
8+
* for all of them, making the failures undiagnosable from telemetry alone.
9+
*/
10+
import { describe, expect, test } from "bun:test"
11+
import { Telemetry } from "../../src/altimate/telemetry"
12+
13+
const baseInput = {
14+
lastToolName: null as string | null,
15+
lastMessageError: null as string | null,
16+
abortReason: null as string | null,
17+
lastErrorClass: "",
18+
}
19+
20+
describe("deriveAgentOutcomeReason", () => {
21+
test("completed outcome: empty diagnostic fields, final_tool preserved", () => {
22+
const out = Telemetry.deriveAgentOutcomeReason({
23+
...baseInput,
24+
outcome: "completed",
25+
lastToolName: "edit",
26+
})
27+
expect(out.final_tool).toBe("edit")
28+
expect(out.error_class).toBe("")
29+
expect(out.reason).toBe("")
30+
})
31+
32+
test("completed outcome with no tool: final_tool empty", () => {
33+
const out = Telemetry.deriveAgentOutcomeReason({ ...baseInput, outcome: "completed" })
34+
expect(out.final_tool).toBe("")
35+
expect(out.error_class).toBe("")
36+
expect(out.reason).toBe("")
37+
})
38+
39+
test("abandoned outcome: reason is 'no_tools_invoked'", () => {
40+
const out = Telemetry.deriveAgentOutcomeReason({ ...baseInput, outcome: "abandoned" })
41+
expect(out.final_tool).toBe("")
42+
expect(out.error_class).toBe("")
43+
expect(out.reason).toBe("no_tools_invoked")
44+
})
45+
46+
test("aborted with explicit reason: reason carried through (masked)", () => {
47+
const out = Telemetry.deriveAgentOutcomeReason({
48+
...baseInput,
49+
outcome: "aborted",
50+
lastToolName: "sql_execute",
51+
abortReason: "user pressed escape",
52+
})
53+
expect(out.final_tool).toBe("sql_execute")
54+
expect(out.error_class).toBe("")
55+
expect(out.reason).toBe("user pressed escape")
56+
})
57+
58+
test("aborted without reason: defaults to 'user_cancelled'", () => {
59+
const out = Telemetry.deriveAgentOutcomeReason({
60+
...baseInput,
61+
outcome: "aborted",
62+
lastToolName: "edit",
63+
})
64+
expect(out.final_tool).toBe("edit")
65+
expect(out.reason).toBe("user_cancelled")
66+
})
67+
68+
test("aborted reason is masked (quoted secrets stripped)", () => {
69+
const out = Telemetry.deriveAgentOutcomeReason({
70+
...baseInput,
71+
outcome: "aborted",
72+
abortReason: 'cancel because "sk-secret-token-12345"',
73+
})
74+
expect(out.reason).not.toContain("sk-secret-token-12345")
75+
})
76+
77+
test("aborted reason is truncated to 200 chars", () => {
78+
const longReason = "x".repeat(500)
79+
const out = Telemetry.deriveAgentOutcomeReason({
80+
...baseInput,
81+
outcome: "aborted",
82+
abortReason: longReason,
83+
})
84+
expect(out.reason.length).toBe(200)
85+
})
86+
87+
test("aborted with prior tool error: surfaces lastErrorClass", () => {
88+
const out = Telemetry.deriveAgentOutcomeReason({
89+
...baseInput,
90+
outcome: "aborted",
91+
lastToolName: "data_diff",
92+
lastErrorClass: "connection",
93+
abortReason: "user_cancelled",
94+
})
95+
expect(out.error_class).toBe("connection")
96+
})
97+
98+
test("error outcome with file_not_found message: classified", () => {
99+
const out = Telemetry.deriveAgentOutcomeReason({
100+
...baseInput,
101+
outcome: "error",
102+
lastToolName: "read",
103+
lastMessageError: "ENOENT: no such file or directory",
104+
})
105+
expect(out.final_tool).toBe("read")
106+
expect(out.error_class).toBe("file_not_found")
107+
expect(out.reason).toContain("ENOENT")
108+
})
109+
110+
test("error outcome with edit_mismatch message: classified", () => {
111+
const out = Telemetry.deriveAgentOutcomeReason({
112+
...baseInput,
113+
outcome: "error",
114+
lastToolName: "edit",
115+
lastMessageError: "could not find oldString in file",
116+
})
117+
expect(out.error_class).toBe("edit_mismatch")
118+
})
119+
120+
test("error outcome with unknown message: classified as 'unknown'", () => {
121+
const out = Telemetry.deriveAgentOutcomeReason({
122+
...baseInput,
123+
outcome: "error",
124+
lastMessageError: "something weird happened that nobody anticipated",
125+
})
126+
expect(out.error_class).toBe("unknown")
127+
})
128+
129+
test("error outcome with empty message: error_class is 'unknown'", () => {
130+
const out = Telemetry.deriveAgentOutcomeReason({ ...baseInput, outcome: "error" })
131+
expect(out.error_class).toBe("unknown")
132+
expect(out.reason).toBe("")
133+
})
134+
135+
test("error reason masking: quoted API key is stripped", () => {
136+
const errMsg = 'request failed with "sk-abcdef0123456789"'
137+
const out = Telemetry.deriveAgentOutcomeReason({
138+
...baseInput,
139+
outcome: "error",
140+
lastMessageError: errMsg,
141+
})
142+
expect(out.reason).not.toContain("sk-abcdef0123456789")
143+
expect(out.reason.length).toBeLessThanOrEqual(500)
144+
})
145+
146+
test("error reason: truncated to 500 chars", () => {
147+
const longErr = "boom: ".concat("a".repeat(1000))
148+
const out = Telemetry.deriveAgentOutcomeReason({
149+
...baseInput,
150+
outcome: "error",
151+
lastMessageError: longErr,
152+
})
153+
expect(out.reason.length).toBeLessThanOrEqual(500)
154+
})
155+
156+
test("MCP-namespaced tool name preserved verbatim in final_tool", () => {
157+
const out = Telemetry.deriveAgentOutcomeReason({
158+
...baseInput,
159+
outcome: "completed",
160+
lastToolName: "mcp__playwright__navigate",
161+
})
162+
expect(out.final_tool).toBe("mcp__playwright__navigate")
163+
})
164+
165+
test("all four outcomes always populate the three fields (no undefined)", () => {
166+
const outcomes = ["completed", "abandoned", "aborted", "error"] as const
167+
for (const outcome of outcomes) {
168+
const out = Telemetry.deriveAgentOutcomeReason({
169+
outcome,
170+
lastToolName: "x",
171+
lastMessageError: "fail",
172+
abortReason: "cancel",
173+
lastErrorClass: "unknown",
174+
})
175+
expect(typeof out.final_tool).toBe("string")
176+
expect(typeof out.error_class).toBe("string")
177+
expect(typeof out.reason).toBe("string")
178+
}
179+
})
180+
})

packages/opencode/test/session/regression.test.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,9 @@ describe("compaction count in telemetry", () => {
335335
cost: 0.05,
336336
compactions: compactionCount,
337337
outcome: "completed" as const,
338+
final_tool: "",
339+
error_class: "",
340+
reason: "",
338341
}
339342

340343
expect(event.compactions).toBe(2)

packages/opencode/test/telemetry/plan-skill-telemetry.test.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,9 @@ describe("telemetry.agent-outcome", () => {
184184
cost: 0.05,
185185
compactions: 0,
186186
outcome,
187+
final_tool: "",
188+
error_class: "",
189+
reason: "",
187190
}
188191
expect(event.outcome).toBe(outcome)
189192
expect(event.agent).toBe("plan")

0 commit comments

Comments
 (0)