Skip to content

Commit 46681d8

Browse files
Merge pull request #8 from DEVtheOPS/feat/enhanced-observability
feat(observability): add debug logging and enhanced metrics (v0.3.0) fix(otel): use UCUM-compliant unit strings for all metric instruments
2 parents 0299b11 + ab79ee1 commit 46681d8

13 files changed

Lines changed: 391 additions & 37 deletions

File tree

.npmrc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
//registry.npmjs.org/:_authToken="${NPM_TOKEN}"

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@devtheops/opencode-plugin-otel",
3-
"version": "0.2.1",
3+
"version": "0.3.0",
44
"module": "src/index.ts",
55
"main": "src/index.ts",
66
"type": "module",

src/handlers/activity.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,35 +5,47 @@ import type { HandlerContext } from "../types.ts"
55
/** Records lines-added and lines-removed metrics for each file in the diff. */
66
export function handleSessionDiff(e: EventSessionDiff, ctx: HandlerContext) {
77
const sessionID = e.properties.sessionID
8+
let totalAdded = 0
9+
let totalRemoved = 0
810
for (const fileDiff of e.properties.diff) {
911
if (fileDiff.additions > 0) {
1012
ctx.instruments.linesCounter.add(fileDiff.additions, {
1113
...ctx.commonAttrs,
1214
"session.id": sessionID,
1315
type: "added",
1416
})
17+
totalAdded += fileDiff.additions
1518
}
1619
if (fileDiff.deletions > 0) {
1720
ctx.instruments.linesCounter.add(fileDiff.deletions, {
1821
...ctx.commonAttrs,
1922
"session.id": sessionID,
2023
type: "removed",
2124
})
25+
totalRemoved += fileDiff.deletions
2226
}
2327
}
28+
ctx.log("debug", "otel: lines_of_code counter incremented", {
29+
sessionID,
30+
files: e.properties.diff.length,
31+
added: totalAdded,
32+
removed: totalRemoved,
33+
})
2434
}
2535

2636
const GIT_COMMIT_RE = /\bgit\s+commit(?![-\w])/
2737

2838
/** Detects `git commit` invocations in bash tool calls and increments the commit counter and emits a `commit` log event. */
2939
export function handleCommandExecuted(e: EventCommandExecuted, ctx: HandlerContext) {
3040
if (e.properties.name !== "bash") return
41+
ctx.log("debug", "otel: command.executed (bash)", { sessionID: e.properties.sessionID, argumentsLength: e.properties.arguments.length })
3142
if (!GIT_COMMIT_RE.test(e.properties.arguments)) return
3243

3344
ctx.instruments.commitCounter.add(1, {
3445
...ctx.commonAttrs,
3546
"session.id": e.properties.sessionID,
3647
})
48+
ctx.log("debug", "otel: commit counter incremented", { sessionID: e.properties.sessionID })
3749
ctx.logger.emit({
3850
severityNumber: SeverityNumber.INFO,
3951
severityText: "INFO",

src/handlers/message.ts

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import { SeverityNumber } from "@opentelemetry/api-logs"
22
import type { AssistantMessage, EventMessageUpdated, EventMessagePartUpdated, ToolPart } from "@opencode-ai/sdk"
3-
import { errorSummary } from "../util.ts"
4-
import { setBoundedMap } from "../util.ts"
3+
import { errorSummary, setBoundedMap, accumulateSessionTotals } from "../util.ts"
54
import type { HandlerContext } from "../types.ts"
65

76
/**
@@ -18,13 +17,38 @@ export function handleMessageUpdated(e: EventMessageUpdated, ctx: HandlerContext
1817
const duration = assistant.time.completed - assistant.time.created
1918
const { tokenCounter, costCounter } = ctx.instruments
2019

20+
const totalTokens = assistant.tokens.input + assistant.tokens.output + assistant.tokens.reasoning
21+
+ assistant.tokens.cache.read + assistant.tokens.cache.write
2122
tokenCounter.add(assistant.tokens.input, { ...ctx.commonAttrs, "session.id": sessionID, model: modelID, type: "input" })
2223
tokenCounter.add(assistant.tokens.output, { ...ctx.commonAttrs, "session.id": sessionID, model: modelID, type: "output" })
2324
tokenCounter.add(assistant.tokens.reasoning, { ...ctx.commonAttrs, "session.id": sessionID, model: modelID, type: "reasoning" })
2425
tokenCounter.add(assistant.tokens.cache.read, { ...ctx.commonAttrs, "session.id": sessionID, model: modelID, type: "cacheRead" })
2526
tokenCounter.add(assistant.tokens.cache.write, { ...ctx.commonAttrs, "session.id": sessionID, model: modelID, type: "cacheCreation" })
2627
costCounter.add(assistant.cost, { ...ctx.commonAttrs, "session.id": sessionID, model: modelID })
2728

29+
if (assistant.tokens.cache.read > 0) {
30+
ctx.instruments.cacheCounter.add(1, { ...ctx.commonAttrs, "session.id": sessionID, model: modelID, type: "cacheRead" })
31+
}
32+
if (assistant.tokens.cache.write > 0) {
33+
ctx.instruments.cacheCounter.add(1, { ...ctx.commonAttrs, "session.id": sessionID, model: modelID, type: "cacheCreation" })
34+
}
35+
36+
ctx.instruments.messageCounter.add(1, { ...ctx.commonAttrs, "session.id": sessionID, model: modelID })
37+
ctx.instruments.modelUsageCounter.add(1, { ...ctx.commonAttrs, "session.id": sessionID, model: modelID, provider: providerID })
38+
39+
accumulateSessionTotals(sessionID, totalTokens, assistant.cost, ctx)
40+
41+
ctx.log("debug", "otel: token+cost counters incremented", {
42+
sessionID,
43+
model: modelID,
44+
input: assistant.tokens.input,
45+
output: assistant.tokens.output,
46+
reasoning: assistant.tokens.reasoning,
47+
cacheRead: assistant.tokens.cache.read,
48+
cacheWrite: assistant.tokens.cache.write,
49+
cost_usd: assistant.cost,
50+
})
51+
2852
if (assistant.error) {
2953
ctx.logger.emit({
3054
severityNumber: SeverityNumber.ERROR,
@@ -98,6 +122,7 @@ export function handleMessagePartUpdated(e: EventMessagePartUpdated, ctx: Handle
98122
sessionID: toolPart.sessionID,
99123
startMs: toolPart.state.time.start,
100124
})
125+
ctx.log("debug", "otel: tool span started", { sessionID: toolPart.sessionID, tool: toolPart.tool, key })
101126
return
102127
}
103128

@@ -138,6 +163,12 @@ export function handleMessagePartUpdated(e: EventMessagePartUpdated, ctx: Handle
138163
...ctx.commonAttrs,
139164
},
140165
})
166+
ctx.log("debug", "otel: tool.duration histogram recorded", {
167+
sessionID: toolPart.sessionID,
168+
tool_name: toolPart.tool,
169+
duration_ms,
170+
success,
171+
})
141172
return ctx.log(success ? "info" : "error", "otel: tool_result", {
142173
sessionID: toolPart.sessionID,
143174
tool_name: toolPart.tool,

src/handlers/permission.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ export function handlePermissionUpdated(e: EventPermissionUpdated, ctx: HandlerC
1111
title: perm.title,
1212
sessionID: perm.sessionID,
1313
})
14+
ctx.log("debug", "otel: permission stored", { permissionID: perm.id, sessionID: perm.sessionID, type: perm.type, title: perm.title })
1415
}
1516

1617
/** Emits a `tool_decision` log event recording whether the permission was accepted or rejected. */
@@ -19,6 +20,7 @@ export function handlePermissionReplied(e: EventPermissionReplied, ctx: HandlerC
1920
const pending = ctx.pendingPermissions.get(permissionID)
2021
ctx.pendingPermissions.delete(permissionID)
2122
const decision = response === "allow" || response === "allowAlways" ? "accept" : "reject"
23+
ctx.log("debug", "otel: tool_decision emitted", { permissionID, sessionID, decision, source: response, tool_name: pending?.title ?? "unknown" })
2224
ctx.logger.emit({
2325
severityNumber: SeverityNumber.INFO,
2426
severityText: "INFO",

src/handlers/session.ts

Lines changed: 46 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
import { SeverityNumber } from "@opentelemetry/api-logs"
2-
import type { EventSessionCreated, EventSessionIdle, EventSessionError } from "@opencode-ai/sdk"
3-
import { errorSummary } from "../util.ts"
2+
import type { EventSessionCreated, EventSessionIdle, EventSessionError, EventSessionStatus } from "@opencode-ai/sdk"
3+
import { errorSummary, setBoundedMap } from "../util.ts"
44
import type { HandlerContext } from "../types.ts"
55

6-
/** Increments the session counter and emits a `session.created` log event. */
6+
/** Increments the session counter, records start time, and emits a `session.created` log event. */
77
export function handleSessionCreated(e: EventSessionCreated, ctx: HandlerContext) {
88
const sessionID = e.properties.info.id
99
const createdAt = e.properties.info.time.created
1010
ctx.instruments.sessionCounter.add(1, { ...ctx.commonAttrs, "session.id": sessionID })
11+
setBoundedMap(ctx.sessionTotals, sessionID, { startMs: createdAt, tokens: 0, cost: 0, messages: 0 })
1112
ctx.logger.emit({
1213
severityNumber: SeverityNumber.INFO,
1314
severityText: "INFO",
@@ -16,7 +17,7 @@ export function handleSessionCreated(e: EventSessionCreated, ctx: HandlerContext
1617
body: "session.created",
1718
attributes: { "event.name": "session.created", "session.id": sessionID, ...ctx.commonAttrs },
1819
})
19-
return ctx.log("info", "otel: session.created", { sessionID })
20+
return ctx.log("info", "otel: session.created", { sessionID, createdAt })
2021
}
2122

2223
function sweepSession(sessionID: string, ctx: HandlerContext) {
@@ -28,23 +29,50 @@ function sweepSession(sessionID: string, ctx: HandlerContext) {
2829
}
2930
}
3031

31-
/** Emits a `session.idle` log event and clears any pending tool spans and permissions for the session. */
32+
/** Emits a `session.idle` log event, records duration and session total histograms, and clears pending state. */
3233
export function handleSessionIdle(e: EventSessionIdle, ctx: HandlerContext) {
3334
const sessionID = e.properties.sessionID
35+
const totals = ctx.sessionTotals.get(sessionID)
36+
ctx.sessionTotals.delete(sessionID)
3437
sweepSession(sessionID, ctx)
38+
39+
const attrs = { ...ctx.commonAttrs, "session.id": sessionID }
40+
let duration_ms: number | undefined
41+
42+
if (totals) {
43+
duration_ms = Date.now() - totals.startMs
44+
ctx.instruments.sessionDurationHistogram.record(duration_ms, attrs)
45+
ctx.instruments.sessionTokenGauge.record(totals.tokens, attrs)
46+
ctx.instruments.sessionCostGauge.record(totals.cost, attrs)
47+
}
48+
3549
ctx.logger.emit({
3650
severityNumber: SeverityNumber.INFO,
3751
severityText: "INFO",
3852
timestamp: Date.now(),
3953
observedTimestamp: Date.now(),
4054
body: "session.idle",
41-
attributes: { "event.name": "session.idle", "session.id": sessionID, ...ctx.commonAttrs },
55+
attributes: {
56+
"event.name": "session.idle",
57+
"session.id": sessionID,
58+
total_tokens: totals?.tokens ?? 0,
59+
total_cost_usd: totals?.cost ?? 0,
60+
total_messages: totals?.messages ?? 0,
61+
...ctx.commonAttrs,
62+
},
63+
})
64+
ctx.log("debug", "otel: session.idle", {
65+
sessionID,
66+
...(totals ? { duration_ms, total_tokens: totals.tokens, total_cost_usd: totals.cost, total_messages: totals.messages } : {}),
4267
})
4368
}
4469

4570
/** Emits a `session.error` log event and clears any pending tool spans and permissions for the session. */
4671
export function handleSessionError(e: EventSessionError, ctx: HandlerContext) {
47-
const sessionID = e.properties.sessionID ?? "unknown"
72+
const rawID = e.properties.sessionID
73+
const sessionID = rawID ?? "unknown"
74+
const error = errorSummary(e.properties.error)
75+
if (rawID) ctx.sessionTotals.delete(rawID)
4876
sweepSession(sessionID, ctx)
4977
ctx.logger.emit({
5078
severityNumber: SeverityNumber.ERROR,
@@ -55,8 +83,18 @@ export function handleSessionError(e: EventSessionError, ctx: HandlerContext) {
5583
attributes: {
5684
"event.name": "session.error",
5785
"session.id": sessionID,
58-
error: errorSummary(e.properties.error),
86+
error,
5987
...ctx.commonAttrs,
6088
},
6189
})
90+
ctx.log("error", "otel: session.error", { sessionID, error })
91+
}
92+
93+
/** Increments the retry counter when the session enters a retry state. */
94+
export function handleSessionStatus(e: EventSessionStatus, ctx: HandlerContext) {
95+
if (e.properties.status.type !== "retry") return
96+
const { sessionID, status } = e.properties
97+
const { attempt, message: retryMessage } = status
98+
ctx.instruments.retryCounter.add(1, { ...ctx.commonAttrs, "session.id": sessionID })
99+
ctx.log("debug", "otel: retry counter incremented", { sessionID, attempt, retryMessage })
62100
}

src/index.ts

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import type {
66
EventSessionCreated,
77
EventSessionIdle,
88
EventSessionError,
9+
EventSessionStatus,
910
EventMessageUpdated,
1011
EventMessagePartUpdated,
1112
EventPermissionUpdated,
@@ -17,7 +18,7 @@ import { LEVELS, type Level, type HandlerContext } from "./types.ts"
1718
import { loadConfig, resolveLogLevel } from "./config.ts"
1819
import { probeEndpoint } from "./probe.ts"
1920
import { setupOtel, createInstruments } from "./otel.ts"
20-
import { handleSessionCreated, handleSessionIdle, handleSessionError } from "./handlers/session.ts"
21+
import { handleSessionCreated, handleSessionIdle, handleSessionError, handleSessionStatus } from "./handlers/session.ts"
2122
import { handleMessageUpdated, handleMessagePartUpdated } from "./handlers/message.ts"
2223
import { handlePermissionUpdated, handlePermissionReplied } from "./handlers/permission.ts"
2324
import { handleSessionDiff, handleCommandExecuted } from "./handlers/activity.ts"
@@ -51,6 +52,11 @@ export const OtelPlugin: Plugin = async ({ project, client }) => {
5152
metricPrefix: config.metricPrefix,
5253
})
5354

55+
await log("debug", "config loaded", {
56+
headersSet: !!config.otlpHeaders,
57+
resourceAttributesSet: !!config.resourceAttributes,
58+
})
59+
5460
const probe = await probeEndpoint(config.endpoint)
5561
if (probe.ok) {
5662
await log("info", "OTLP endpoint reachable", { endpoint: config.endpoint, ms: probe.ms })
@@ -73,6 +79,7 @@ export const OtelPlugin: Plugin = async ({ project, client }) => {
7379
const logger = logs.getLogger("com.opencode")
7480
const pendingToolSpans = new Map()
7581
const pendingPermissions = new Map()
82+
const sessionTotals = new Map()
7683
const commonAttrs = { "project.id": project.id } as const
7784

7885
const ctx: HandlerContext = {
@@ -82,6 +89,7 @@ export const OtelPlugin: Plugin = async ({ project, client }) => {
8289
commonAttrs,
8390
pendingToolSpans,
8491
pendingPermissions,
92+
sessionTotals,
8593
}
8694

8795
async function shutdown() {
@@ -113,7 +121,7 @@ export const OtelPlugin: Plugin = async ({ project, client }) => {
113121
const next = resolveLogLevel(cfg.logLevel, minLevel)
114122
if (next !== minLevel) {
115123
minLevel = next
116-
await log("debug", `log level set to "${minLevel}"`)
124+
await log("info", `log level set to "${minLevel}"`)
117125
} else if (cfg.logLevel.toLowerCase() !== minLevel) {
118126
await log("warn", `unknown log level "${cfg.logLevel}", keeping "${minLevel}"`)
119127
}
@@ -155,6 +163,9 @@ export const OtelPlugin: Plugin = async ({ project, client }) => {
155163
case "session.error":
156164
handleSessionError(event as EventSessionError, ctx)
157165
break
166+
case "session.status":
167+
handleSessionStatus(event as EventSessionStatus, ctx)
168+
break
158169
case "session.diff":
159170
handleSessionDiff(event as EventSessionDiff, ctx)
160171
break

src/otel.ts

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -82,28 +82,56 @@ export function createInstruments(prefix: string): Instruments {
8282
const meter = metrics.getMeter("com.opencode")
8383
return {
8484
sessionCounter: meter.createCounter(`${prefix}session.count`, {
85-
unit: "count",
85+
unit: "{session}",
8686
description: "Count of opencode sessions started",
8787
}),
8888
tokenCounter: meter.createCounter(`${prefix}token.usage`, {
89-
unit: "tokens",
89+
unit: "{token}",
9090
description: "Number of tokens used",
9191
}),
9292
costCounter: meter.createCounter(`${prefix}cost.usage`, {
93-
unit: "USD",
93+
unit: "[USD]",
9494
description: "Cost of the opencode session in USD",
9595
}),
9696
linesCounter: meter.createCounter(`${prefix}lines_of_code.count`, {
97-
unit: "count",
97+
unit: "{line}",
9898
description: "Count of lines of code added or removed",
9999
}),
100100
commitCounter: meter.createCounter(`${prefix}commit.count`, {
101-
unit: "count",
101+
unit: "{commit}",
102102
description: "Number of git commits created",
103103
}),
104104
toolDurationHistogram: meter.createHistogram(`${prefix}tool.duration`, {
105105
unit: "ms",
106106
description: "Duration of tool executions in milliseconds",
107107
}),
108+
cacheCounter: meter.createCounter(`${prefix}cache.count`, {
109+
unit: "{request}",
110+
description: "Token cache activity (cacheRead/cacheCreation) per completed assistant message",
111+
}),
112+
sessionDurationHistogram: meter.createHistogram(`${prefix}session.duration`, {
113+
unit: "ms",
114+
description: "Duration of a session from created to idle in milliseconds",
115+
}),
116+
messageCounter: meter.createCounter(`${prefix}message.count`, {
117+
unit: "{message}",
118+
description: "Number of completed assistant messages per session",
119+
}),
120+
sessionTokenGauge: meter.createHistogram(`${prefix}session.token.total`, {
121+
unit: "{token}",
122+
description: "Total tokens consumed per session, recorded as a histogram on session idle",
123+
}),
124+
sessionCostGauge: meter.createHistogram(`${prefix}session.cost.total`, {
125+
unit: "[USD]",
126+
description: "Total cost per session in USD, recorded as a histogram on session idle",
127+
}),
128+
modelUsageCounter: meter.createCounter(`${prefix}model.usage`, {
129+
unit: "{request}",
130+
description: "Number of completed assistant messages per model and provider",
131+
}),
132+
retryCounter: meter.createCounter(`${prefix}retry.count`, {
133+
unit: "{retry}",
134+
description: "Number of API retries observed via session.status events",
135+
}),
108136
}
109137
}

src/types.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,21 @@ export type Instruments = {
4242
linesCounter: Counter
4343
commitCounter: Counter
4444
toolDurationHistogram: Histogram
45+
cacheCounter: Counter
46+
sessionDurationHistogram: Histogram
47+
messageCounter: Counter
48+
sessionTokenGauge: Histogram
49+
sessionCostGauge: Histogram
50+
modelUsageCounter: Counter
51+
retryCounter: Counter
52+
}
53+
54+
/** Accumulated per-session totals used for gauge snapshots on session.idle. */
55+
export type SessionTotals = {
56+
startMs: number
57+
tokens: number
58+
cost: number
59+
messages: number
4560
}
4661

4762
/** Shared context threaded through every event handler. */
@@ -52,4 +67,5 @@ export type HandlerContext = {
5267
commonAttrs: CommonAttrs
5368
pendingToolSpans: Map<string, PendingToolSpan>
5469
pendingPermissions: Map<string, PendingPermission>
70+
sessionTotals: Map<string, SessionTotals>
5571
}

0 commit comments

Comments
 (0)