Skip to content

Commit a1b0a8c

Browse files
feat(observability): add debug logging and enhanced metrics
- Add debug logs to all event handlers (session, message, permission, activity) - Log level now follows opencode --log-level via plugin config hook - Add 7 new metrics: cache.count (cacheRead/cacheCreation), session.duration, message.count, session.token.total, session.cost.total, model.usage, retry.count - Track per-session totals (tokens, cost, messages) via sessionTotals map - Record session duration and totals histograms on session.idle - Increment retry counter on session.status retry events - Fix cache counter to use occurrence counts (1) with cacheRead/cacheCreation types - Fix accumulateSessionTotals to use setBoundedMap (immutable update) - Fix sessionTotals leak: only delete by real sessionID in session.error - Move accumulateSessionTotals to util.ts to eliminate cross-handler import - Change sessionTokenGauge/sessionCostGauge from UpDownCounter to Histogram - Include cache tokens in totalTokens for session gauge accuracy - Add session.id attribute to modelUsageCounter for join capability - Redact shell command arguments from debug log (log length only) - Deduplicate config debug log (unique fields only after info startup log) - Rename status.message to retryMessage to clarify intent - Merge duplicate session.idle debug log into single call - Add tests for all new behaviors (102 total, up from 86)
1 parent 0299b11 commit a1b0a8c

12 files changed

Lines changed: 385 additions & 31 deletions

File tree

.npmrc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
//registry.npmjs.org/:_authToken="${NPM_TOKEN}"

src/handlers/activity.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,35 +5,47 @@ import type { HandlerContext } from "../types.ts"
55
/** Records lines-added and lines-removed metrics for each file in the diff. */
66
export function handleSessionDiff(e: EventSessionDiff, ctx: HandlerContext) {
77
const sessionID = e.properties.sessionID
8+
let totalAdded = 0
9+
let totalRemoved = 0
810
for (const fileDiff of e.properties.diff) {
911
if (fileDiff.additions > 0) {
1012
ctx.instruments.linesCounter.add(fileDiff.additions, {
1113
...ctx.commonAttrs,
1214
"session.id": sessionID,
1315
type: "added",
1416
})
17+
totalAdded += fileDiff.additions
1518
}
1619
if (fileDiff.deletions > 0) {
1720
ctx.instruments.linesCounter.add(fileDiff.deletions, {
1821
...ctx.commonAttrs,
1922
"session.id": sessionID,
2023
type: "removed",
2124
})
25+
totalRemoved += fileDiff.deletions
2226
}
2327
}
28+
ctx.log("debug", "otel: lines_of_code counter incremented", {
29+
sessionID,
30+
files: e.properties.diff.length,
31+
added: totalAdded,
32+
removed: totalRemoved,
33+
})
2434
}
2535

2636
const GIT_COMMIT_RE = /\bgit\s+commit(?![-\w])/
2737

2838
/** Detects `git commit` invocations in bash tool calls and increments the commit counter and emits a `commit` log event. */
2939
export function handleCommandExecuted(e: EventCommandExecuted, ctx: HandlerContext) {
3040
if (e.properties.name !== "bash") return
41+
ctx.log("debug", "otel: command.executed (bash)", { sessionID: e.properties.sessionID, argumentsLength: e.properties.arguments.length })
3142
if (!GIT_COMMIT_RE.test(e.properties.arguments)) return
3243

3344
ctx.instruments.commitCounter.add(1, {
3445
...ctx.commonAttrs,
3546
"session.id": e.properties.sessionID,
3647
})
48+
ctx.log("debug", "otel: commit counter incremented", { sessionID: e.properties.sessionID })
3749
ctx.logger.emit({
3850
severityNumber: SeverityNumber.INFO,
3951
severityText: "INFO",

src/handlers/message.ts

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import { SeverityNumber } from "@opentelemetry/api-logs"
22
import type { AssistantMessage, EventMessageUpdated, EventMessagePartUpdated, ToolPart } from "@opencode-ai/sdk"
3-
import { errorSummary } from "../util.ts"
4-
import { setBoundedMap } from "../util.ts"
3+
import { errorSummary, setBoundedMap, accumulateSessionTotals } from "../util.ts"
54
import type { HandlerContext } from "../types.ts"
65

76
/**
@@ -18,13 +17,38 @@ export function handleMessageUpdated(e: EventMessageUpdated, ctx: HandlerContext
1817
const duration = assistant.time.completed - assistant.time.created
1918
const { tokenCounter, costCounter } = ctx.instruments
2019

20+
const totalTokens = assistant.tokens.input + assistant.tokens.output + assistant.tokens.reasoning
21+
+ assistant.tokens.cache.read + assistant.tokens.cache.write
2122
tokenCounter.add(assistant.tokens.input, { ...ctx.commonAttrs, "session.id": sessionID, model: modelID, type: "input" })
2223
tokenCounter.add(assistant.tokens.output, { ...ctx.commonAttrs, "session.id": sessionID, model: modelID, type: "output" })
2324
tokenCounter.add(assistant.tokens.reasoning, { ...ctx.commonAttrs, "session.id": sessionID, model: modelID, type: "reasoning" })
2425
tokenCounter.add(assistant.tokens.cache.read, { ...ctx.commonAttrs, "session.id": sessionID, model: modelID, type: "cacheRead" })
2526
tokenCounter.add(assistant.tokens.cache.write, { ...ctx.commonAttrs, "session.id": sessionID, model: modelID, type: "cacheCreation" })
2627
costCounter.add(assistant.cost, { ...ctx.commonAttrs, "session.id": sessionID, model: modelID })
2728

29+
if (assistant.tokens.cache.read > 0) {
30+
ctx.instruments.cacheCounter.add(1, { ...ctx.commonAttrs, "session.id": sessionID, model: modelID, type: "cacheRead" })
31+
}
32+
if (assistant.tokens.cache.write > 0) {
33+
ctx.instruments.cacheCounter.add(1, { ...ctx.commonAttrs, "session.id": sessionID, model: modelID, type: "cacheCreation" })
34+
}
35+
36+
ctx.instruments.messageCounter.add(1, { ...ctx.commonAttrs, "session.id": sessionID, model: modelID })
37+
ctx.instruments.modelUsageCounter.add(1, { ...ctx.commonAttrs, "session.id": sessionID, model: modelID, provider: providerID })
38+
39+
accumulateSessionTotals(sessionID, totalTokens, assistant.cost, ctx)
40+
41+
ctx.log("debug", "otel: token+cost counters incremented", {
42+
sessionID,
43+
model: modelID,
44+
input: assistant.tokens.input,
45+
output: assistant.tokens.output,
46+
reasoning: assistant.tokens.reasoning,
47+
cacheRead: assistant.tokens.cache.read,
48+
cacheWrite: assistant.tokens.cache.write,
49+
cost_usd: assistant.cost,
50+
})
51+
2852
if (assistant.error) {
2953
ctx.logger.emit({
3054
severityNumber: SeverityNumber.ERROR,
@@ -98,6 +122,7 @@ export function handleMessagePartUpdated(e: EventMessagePartUpdated, ctx: Handle
98122
sessionID: toolPart.sessionID,
99123
startMs: toolPart.state.time.start,
100124
})
125+
ctx.log("debug", "otel: tool span started", { sessionID: toolPart.sessionID, tool: toolPart.tool, key })
101126
return
102127
}
103128

@@ -138,6 +163,12 @@ export function handleMessagePartUpdated(e: EventMessagePartUpdated, ctx: Handle
138163
...ctx.commonAttrs,
139164
},
140165
})
166+
ctx.log("debug", "otel: tool.duration histogram recorded", {
167+
sessionID: toolPart.sessionID,
168+
tool_name: toolPart.tool,
169+
duration_ms,
170+
success,
171+
})
141172
return ctx.log(success ? "info" : "error", "otel: tool_result", {
142173
sessionID: toolPart.sessionID,
143174
tool_name: toolPart.tool,

src/handlers/permission.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ export function handlePermissionUpdated(e: EventPermissionUpdated, ctx: HandlerC
1111
title: perm.title,
1212
sessionID: perm.sessionID,
1313
})
14+
ctx.log("debug", "otel: permission stored", { permissionID: perm.id, sessionID: perm.sessionID, type: perm.type, title: perm.title })
1415
}
1516

1617
/** Emits a `tool_decision` log event recording whether the permission was accepted or rejected. */
@@ -19,6 +20,7 @@ export function handlePermissionReplied(e: EventPermissionReplied, ctx: HandlerC
1920
const pending = ctx.pendingPermissions.get(permissionID)
2021
ctx.pendingPermissions.delete(permissionID)
2122
const decision = response === "allow" || response === "allowAlways" ? "accept" : "reject"
23+
ctx.log("debug", "otel: tool_decision emitted", { permissionID, sessionID, decision, source: response, tool_name: pending?.title ?? "unknown" })
2224
ctx.logger.emit({
2325
severityNumber: SeverityNumber.INFO,
2426
severityText: "INFO",

src/handlers/session.ts

Lines changed: 46 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
import { SeverityNumber } from "@opentelemetry/api-logs"
2-
import type { EventSessionCreated, EventSessionIdle, EventSessionError } from "@opencode-ai/sdk"
3-
import { errorSummary } from "../util.ts"
2+
import type { EventSessionCreated, EventSessionIdle, EventSessionError, EventSessionStatus } from "@opencode-ai/sdk"
3+
import { errorSummary, setBoundedMap } from "../util.ts"
44
import type { HandlerContext } from "../types.ts"
55

6-
/** Increments the session counter and emits a `session.created` log event. */
6+
/** Increments the session counter, records start time, and emits a `session.created` log event. */
77
export function handleSessionCreated(e: EventSessionCreated, ctx: HandlerContext) {
88
const sessionID = e.properties.info.id
99
const createdAt = e.properties.info.time.created
1010
ctx.instruments.sessionCounter.add(1, { ...ctx.commonAttrs, "session.id": sessionID })
11+
setBoundedMap(ctx.sessionTotals, sessionID, { startMs: createdAt, tokens: 0, cost: 0, messages: 0 })
1112
ctx.logger.emit({
1213
severityNumber: SeverityNumber.INFO,
1314
severityText: "INFO",
@@ -16,7 +17,7 @@ export function handleSessionCreated(e: EventSessionCreated, ctx: HandlerContext
1617
body: "session.created",
1718
attributes: { "event.name": "session.created", "session.id": sessionID, ...ctx.commonAttrs },
1819
})
19-
return ctx.log("info", "otel: session.created", { sessionID })
20+
return ctx.log("info", "otel: session.created", { sessionID, createdAt })
2021
}
2122

2223
function sweepSession(sessionID: string, ctx: HandlerContext) {
@@ -28,23 +29,50 @@ function sweepSession(sessionID: string, ctx: HandlerContext) {
2829
}
2930
}
3031

31-
/** Emits a `session.idle` log event and clears any pending tool spans and permissions for the session. */
32+
/** Emits a `session.idle` log event, records duration and session total histograms, and clears pending state. */
3233
export function handleSessionIdle(e: EventSessionIdle, ctx: HandlerContext) {
3334
const sessionID = e.properties.sessionID
35+
const totals = ctx.sessionTotals.get(sessionID)
36+
ctx.sessionTotals.delete(sessionID)
3437
sweepSession(sessionID, ctx)
38+
39+
const attrs = { ...ctx.commonAttrs, "session.id": sessionID }
40+
let duration_ms: number | undefined
41+
42+
if (totals) {
43+
duration_ms = Date.now() - totals.startMs
44+
ctx.instruments.sessionDurationHistogram.record(duration_ms, attrs)
45+
ctx.instruments.sessionTokenGauge.record(totals.tokens, attrs)
46+
ctx.instruments.sessionCostGauge.record(totals.cost, attrs)
47+
}
48+
3549
ctx.logger.emit({
3650
severityNumber: SeverityNumber.INFO,
3751
severityText: "INFO",
3852
timestamp: Date.now(),
3953
observedTimestamp: Date.now(),
4054
body: "session.idle",
41-
attributes: { "event.name": "session.idle", "session.id": sessionID, ...ctx.commonAttrs },
55+
attributes: {
56+
"event.name": "session.idle",
57+
"session.id": sessionID,
58+
total_tokens: totals?.tokens ?? 0,
59+
total_cost_usd: totals?.cost ?? 0,
60+
total_messages: totals?.messages ?? 0,
61+
...ctx.commonAttrs,
62+
},
63+
})
64+
ctx.log("debug", "otel: session.idle", {
65+
sessionID,
66+
...(totals ? { duration_ms, total_tokens: totals.tokens, total_cost_usd: totals.cost, total_messages: totals.messages } : {}),
4267
})
4368
}
4469

4570
/** Emits a `session.error` log event and clears any pending tool spans and permissions for the session. */
4671
export function handleSessionError(e: EventSessionError, ctx: HandlerContext) {
47-
const sessionID = e.properties.sessionID ?? "unknown"
72+
const rawID = e.properties.sessionID
73+
const sessionID = rawID ?? "unknown"
74+
const error = errorSummary(e.properties.error)
75+
if (rawID) ctx.sessionTotals.delete(rawID)
4876
sweepSession(sessionID, ctx)
4977
ctx.logger.emit({
5078
severityNumber: SeverityNumber.ERROR,
@@ -55,8 +83,18 @@ export function handleSessionError(e: EventSessionError, ctx: HandlerContext) {
5583
attributes: {
5684
"event.name": "session.error",
5785
"session.id": sessionID,
58-
error: errorSummary(e.properties.error),
86+
error,
5987
...ctx.commonAttrs,
6088
},
6189
})
90+
ctx.log("error", "otel: session.error", { sessionID, error })
91+
}
92+
93+
/** Increments the retry counter when the session enters a retry state. */
94+
export function handleSessionStatus(e: EventSessionStatus, ctx: HandlerContext) {
95+
if (e.properties.status.type !== "retry") return
96+
const { sessionID, status } = e.properties
97+
const { attempt, message: retryMessage } = status
98+
ctx.instruments.retryCounter.add(1, { ...ctx.commonAttrs, "session.id": sessionID })
99+
ctx.log("debug", "otel: retry counter incremented", { sessionID, attempt, retryMessage })
62100
}

src/index.ts

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import type {
66
EventSessionCreated,
77
EventSessionIdle,
88
EventSessionError,
9+
EventSessionStatus,
910
EventMessageUpdated,
1011
EventMessagePartUpdated,
1112
EventPermissionUpdated,
@@ -17,7 +18,7 @@ import { LEVELS, type Level, type HandlerContext } from "./types.ts"
1718
import { loadConfig, resolveLogLevel } from "./config.ts"
1819
import { probeEndpoint } from "./probe.ts"
1920
import { setupOtel, createInstruments } from "./otel.ts"
20-
import { handleSessionCreated, handleSessionIdle, handleSessionError } from "./handlers/session.ts"
21+
import { handleSessionCreated, handleSessionIdle, handleSessionError, handleSessionStatus } from "./handlers/session.ts"
2122
import { handleMessageUpdated, handleMessagePartUpdated } from "./handlers/message.ts"
2223
import { handlePermissionUpdated, handlePermissionReplied } from "./handlers/permission.ts"
2324
import { handleSessionDiff, handleCommandExecuted } from "./handlers/activity.ts"
@@ -51,6 +52,11 @@ export const OtelPlugin: Plugin = async ({ project, client }) => {
5152
metricPrefix: config.metricPrefix,
5253
})
5354

55+
await log("debug", "config loaded", {
56+
headersSet: !!config.otlpHeaders,
57+
resourceAttributesSet: !!config.resourceAttributes,
58+
})
59+
5460
const probe = await probeEndpoint(config.endpoint)
5561
if (probe.ok) {
5662
await log("info", "OTLP endpoint reachable", { endpoint: config.endpoint, ms: probe.ms })
@@ -73,6 +79,7 @@ export const OtelPlugin: Plugin = async ({ project, client }) => {
7379
const logger = logs.getLogger("com.opencode")
7480
const pendingToolSpans = new Map()
7581
const pendingPermissions = new Map()
82+
const sessionTotals = new Map()
7683
const commonAttrs = { "project.id": project.id } as const
7784

7885
const ctx: HandlerContext = {
@@ -82,6 +89,7 @@ export const OtelPlugin: Plugin = async ({ project, client }) => {
8289
commonAttrs,
8390
pendingToolSpans,
8491
pendingPermissions,
92+
sessionTotals,
8593
}
8694

8795
async function shutdown() {
@@ -113,7 +121,7 @@ export const OtelPlugin: Plugin = async ({ project, client }) => {
113121
const next = resolveLogLevel(cfg.logLevel, minLevel)
114122
if (next !== minLevel) {
115123
minLevel = next
116-
await log("debug", `log level set to "${minLevel}"`)
124+
await log("info", `log level set to "${minLevel}"`)
117125
} else if (cfg.logLevel.toLowerCase() !== minLevel) {
118126
await log("warn", `unknown log level "${cfg.logLevel}", keeping "${minLevel}"`)
119127
}
@@ -155,6 +163,9 @@ export const OtelPlugin: Plugin = async ({ project, client }) => {
155163
case "session.error":
156164
handleSessionError(event as EventSessionError, ctx)
157165
break
166+
case "session.status":
167+
handleSessionStatus(event as EventSessionStatus, ctx)
168+
break
158169
case "session.diff":
159170
handleSessionDiff(event as EventSessionDiff, ctx)
160171
break

src/otel.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,5 +105,33 @@ export function createInstruments(prefix: string): Instruments {
105105
unit: "ms",
106106
description: "Duration of tool executions in milliseconds",
107107
}),
108+
cacheCounter: meter.createCounter(`${prefix}cache.count`, {
109+
unit: "count",
110+
description: "Token cache hits (read) and misses (write) per completed assistant message",
111+
}),
112+
sessionDurationHistogram: meter.createHistogram(`${prefix}session.duration`, {
113+
unit: "ms",
114+
description: "Duration of a session from created to idle in milliseconds",
115+
}),
116+
messageCounter: meter.createCounter(`${prefix}message.count`, {
117+
unit: "count",
118+
description: "Number of completed assistant messages per session",
119+
}),
120+
sessionTokenGauge: meter.createHistogram(`${prefix}session.token.total`, {
121+
unit: "tokens",
122+
description: "Total tokens consumed per session, recorded as a histogram on session idle",
123+
}),
124+
sessionCostGauge: meter.createHistogram(`${prefix}session.cost.total`, {
125+
unit: "USD",
126+
description: "Total cost per session in USD, recorded as a histogram on session idle",
127+
}),
128+
modelUsageCounter: meter.createCounter(`${prefix}model.usage`, {
129+
unit: "count",
130+
description: "Number of completed assistant messages per model and provider",
131+
}),
132+
retryCounter: meter.createCounter(`${prefix}retry.count`, {
133+
unit: "count",
134+
description: "Number of API retries observed via session.status events",
135+
}),
108136
}
109137
}

src/types.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,21 @@ export type Instruments = {
4242
linesCounter: Counter
4343
commitCounter: Counter
4444
toolDurationHistogram: Histogram
45+
cacheCounter: Counter
46+
sessionDurationHistogram: Histogram
47+
messageCounter: Counter
48+
sessionTokenGauge: Histogram
49+
sessionCostGauge: Histogram
50+
modelUsageCounter: Counter
51+
retryCounter: Counter
52+
}
53+
54+
/** Accumulated per-session totals used for gauge snapshots on session.idle. */
55+
export type SessionTotals = {
56+
startMs: number
57+
tokens: number
58+
cost: number
59+
messages: number
4560
}
4661

4762
/** Shared context threaded through every event handler. */
@@ -52,4 +67,5 @@ export type HandlerContext = {
5267
commonAttrs: CommonAttrs
5368
pendingToolSpans: Map<string, PendingToolSpan>
5469
pendingPermissions: Map<string, PendingPermission>
70+
sessionTotals: Map<string, SessionTotals>
5571
}

src/util.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { MAX_PENDING } from "./types.ts"
2+
import type { HandlerContext } from "./types.ts"
23

34
/** Returns a human-readable summary string from an opencode error object. */
45
export function errorSummary(err: { name: string; data?: unknown } | undefined): string {
@@ -20,3 +21,24 @@ export function setBoundedMap<K, V>(map: Map<K, V>, key: K, value: V) {
2021
}
2122
map.set(key, value)
2223
}
24+
25+
/**
26+
* Accumulates token and cost totals for a session, and increments the message count.
27+
* Uses `setBoundedMap` to produce a new object rather than mutating in-place.
28+
* No-ops silently if the session was not previously registered via `handleSessionCreated`.
29+
*/
30+
export function accumulateSessionTotals(
31+
sessionID: string,
32+
tokens: number,
33+
cost: number,
34+
ctx: HandlerContext,
35+
) {
36+
const existing = ctx.sessionTotals.get(sessionID)
37+
if (!existing) return
38+
setBoundedMap(ctx.sessionTotals, sessionID, {
39+
startMs: existing.startMs,
40+
tokens: existing.tokens + tokens,
41+
cost: existing.cost + cost,
42+
messages: existing.messages + 1,
43+
})
44+
}

0 commit comments

Comments
 (0)