Skip to content

Commit 8e8e5ca

Browse files
fix(tracing): scope root spans to runs
1 parent c9fee7f commit 8e8e5ca

7 files changed

Lines changed: 240 additions & 90 deletions

File tree

src/handlers/message.ts

Lines changed: 17 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { SeverityNumber } from "@opentelemetry/api-logs"
2-
import { SpanStatusCode, SpanKind, trace } from "@opentelemetry/api"
2+
import { SpanStatusCode, SpanKind } from "@opentelemetry/api"
33
import type { AssistantMessage, EventMessageUpdated, EventMessagePartUpdated, ToolPart } from "@opencode-ai/sdk"
44
import {
55
AGENT_NAME,
@@ -27,7 +27,16 @@ import {
2727
TOOL_NAME,
2828
TOOL_PARAMETERS,
2929
} from "@arizeai/openinference-semantic-conventions"
30-
import { agentAttrs, errorSummary, setBoundedMap, accumulateSessionTotals, getSessionAgentMeta, isMetricEnabled, isTraceEnabled } from "../util.ts"
30+
import {
31+
agentAttrs,
32+
errorSummary,
33+
setBoundedMap,
34+
accumulateSessionTotals,
35+
getSessionAgentMeta,
36+
isMetricEnabled,
37+
isTraceEnabled,
38+
resolveSessionTraceContext,
39+
} from "../util.ts"
3140
import type { HandlerContext } from "../types.ts"
3241

3342
const OPENINFERENCE_SPAN_KIND = SemanticConventions.OPENINFERENCE_SPAN_KIND
@@ -260,11 +269,6 @@ export function handleMessagePartUpdated(e: EventMessagePartUpdated, ctx: Handle
260269
const { agentName, agentType } = getSessionAgentMeta(toolPart.sessionID, ctx)
261270
const toolSpan = isTraceEnabled("tool", ctx)
262271
? (() => {
263-
const sessionSpan = ctx.sessionSpans.get(toolPart.sessionID)
264-
const baseCtx = ctx.rootContext()
265-
const parentCtx = sessionSpan
266-
? trace.setSpan(baseCtx, sessionSpan)
267-
: baseCtx
268272
return ctx.tracer.startSpan(
269273
`${ctx.tracePrefix}tool.${toolPart.tool}`,
270274
{
@@ -283,7 +287,7 @@ export function handleMessagePartUpdated(e: EventMessagePartUpdated, ctx: Handle
283287
...ctx.commonAttrs,
284288
},
285289
},
286-
parentCtx,
290+
resolveSessionTraceContext(toolPart.sessionID, ctx),
287291
)
288292
})()
289293
: undefined
@@ -319,11 +323,6 @@ export function handleMessagePartUpdated(e: EventMessagePartUpdated, ctx: Handle
319323

320324
if (isTraceEnabled("tool", ctx)) {
321325
const toolSpan = pending?.span ?? (() => {
322-
const sessionSpan = ctx.sessionSpans.get(toolPart.sessionID)
323-
const baseCtx = ctx.rootContext()
324-
const parentCtx = sessionSpan
325-
? trace.setSpan(baseCtx, sessionSpan)
326-
: baseCtx
327326
return ctx.tracer.startSpan(
328327
`${ctx.tracePrefix}tool.${toolPart.tool}`,
329328
{
@@ -340,7 +339,7 @@ export function handleMessagePartUpdated(e: EventMessagePartUpdated, ctx: Handle
340339
...ctx.commonAttrs,
341340
},
342341
},
343-
parentCtx,
342+
resolveSessionTraceContext(toolPart.sessionID, ctx),
344343
)
345344
})()
346345
toolSpan.setAttributes({ [AGENT_NAME]: agentName, "agent.type": agentType })
@@ -403,8 +402,9 @@ export function handleMessagePartUpdated(e: EventMessagePartUpdated, ctx: Handle
403402

404403
/**
405404
* Starts an LLM span for an assistant message when it first appears in `message.updated`.
406-
* The span is parented to the session span and carries `gen_ai.*` semantic attributes for
407-
* the model and provider. It is ended in `handleMessageUpdated` once the message completes.
405+
* The span is parented to the active run or subagent span and carries `gen_ai.*` semantic
406+
* attributes for the model and provider. It is ended in `handleMessageUpdated` once the
407+
* message completes.
408408
*
409409
* Only called for assistant messages that have not yet completed (`time.completed` absent).
410410
*/
@@ -420,11 +420,6 @@ export function startMessageSpan(
420420
const msgKey = `${sessionID}:${messageID}`
421421
if (ctx.messageSpans.has(msgKey)) return
422422
const { agentName, agentType } = getSessionAgentMeta(sessionID, ctx)
423-
const sessionSpan = ctx.sessionSpans.get(sessionID)
424-
const baseCtx = ctx.rootContext()
425-
const parentCtx = sessionSpan
426-
? trace.setSpan(baseCtx, sessionSpan)
427-
: baseCtx
428423

429424
const msgSpan = ctx.tracer.startSpan(
430425
`${ctx.tracePrefix}llm`,
@@ -449,7 +444,7 @@ export function startMessageSpan(
449444
...ctx.commonAttrs,
450445
},
451446
},
452-
parentCtx,
447+
resolveSessionTraceContext(sessionID, ctx),
453448
)
454449
setBoundedMap(ctx.messageSpans, msgKey, msgSpan)
455450
}

src/handlers/session.ts

Lines changed: 101 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,82 @@
11
import { SeverityNumber } from "@opentelemetry/api-logs"
2-
import { SpanStatusCode, trace } from "@opentelemetry/api"
2+
import { SpanStatusCode } from "@opentelemetry/api"
33
import type { EventSessionCreated, EventSessionIdle, EventSessionError, EventSessionStatus } from "@opencode-ai/sdk"
4-
import { AGENT_NAME, OpenInferenceSpanKind, SemanticConventions, SESSION_ID } from "@arizeai/openinference-semantic-conventions"
5-
import { agentAttrs, errorSummary, getSessionAgentMeta, setBoundedMap, isMetricEnabled, isTraceEnabled } from "../util.ts"
4+
import {
5+
AGENT_NAME,
6+
INPUT_MIME_TYPE,
7+
INPUT_VALUE,
8+
LLM_INPUT_MESSAGES,
9+
MimeType,
10+
OpenInferenceSpanKind,
11+
SemanticConventions,
12+
SESSION_ID,
13+
} from "@arizeai/openinference-semantic-conventions"
14+
import {
15+
agentAttrs,
16+
errorSummary,
17+
getSessionAgentMeta,
18+
setBoundedMap,
19+
isMetricEnabled,
20+
isTraceEnabled,
21+
resolveSessionTraceContext,
22+
} from "../util.ts"
623
import type { HandlerContext, SessionAgentType } from "../types.ts"
724

825
const OPENINFERENCE_SPAN_KIND = SemanticConventions.OPENINFERENCE_SPAN_KIND
926

27+
export function handleRunStarted(
28+
sessionID: string,
29+
agent: string,
30+
promptText: string,
31+
model: string,
32+
startTime: number,
33+
ctx: HandlerContext,
34+
) {
35+
if (!isTraceEnabled("session", ctx)) return
36+
const existing = ctx.runSpans.get(sessionID)
37+
if (existing) {
38+
existing.setAttributes({
39+
[AGENT_NAME]: agent,
40+
...(promptText
41+
? {
42+
[INPUT_VALUE]: promptText,
43+
[INPUT_MIME_TYPE]: MimeType.TEXT,
44+
[LLM_INPUT_MESSAGES]: JSON.stringify([{ role: "user", content: promptText }]),
45+
}
46+
: {}),
47+
model,
48+
})
49+
return
50+
}
51+
52+
const runSpan = ctx.tracer.startSpan(
53+
`${ctx.tracePrefix}session`,
54+
{
55+
startTime,
56+
attributes: {
57+
[OPENINFERENCE_SPAN_KIND]: OpenInferenceSpanKind.AGENT,
58+
[SESSION_ID]: sessionID,
59+
[AGENT_NAME]: agent,
60+
"agent.type": "primary",
61+
"session.is_subagent": false,
62+
...(promptText
63+
? {
64+
[INPUT_VALUE]: promptText,
65+
[INPUT_MIME_TYPE]: MimeType.TEXT,
66+
[LLM_INPUT_MESSAGES]: JSON.stringify([{ role: "user", content: promptText }]),
67+
}
68+
: {}),
69+
model,
70+
...ctx.commonAttrs,
71+
},
72+
},
73+
ctx.rootContext(),
74+
)
75+
setBoundedMap(ctx.runSpans, sessionID, runSpan)
76+
setBoundedMap(ctx.runSpanContexts, sessionID, runSpan.spanContext())
77+
setBoundedMap(ctx.sessionRunRoots, sessionID, sessionID)
78+
}
79+
1080
/** Increments the session counter, records start time, starts the root session span, and emits a `session.created` log event. */
1181
export function handleSessionCreated(e: EventSessionCreated, ctx: HandlerContext) {
1282
const { id: sessionID, time, parentID } = e.properties.info
@@ -18,16 +88,9 @@ export function handleSessionCreated(e: EventSessionCreated, ctx: HandlerContext
1888
}
1989
setBoundedMap(ctx.sessionTotals, sessionID, { startMs: createdAt, tokens: 0, cost: 0, messages: 0, agent: "unknown", agentType })
2090

21-
// WARNING: disabling "session" traces while "llm" or "tool" traces remain enabled
22-
// leaves those child spans without a local session parent. If OPENCODE_TRACEPARENT
23-
// is set, they fall back to that remote parent; otherwise they become root spans.
24-
if (isTraceEnabled("session", ctx)) {
25-
const parentSpan = parentID ? ctx.sessionSpans.get(parentID) : undefined
26-
const baseCtx = ctx.rootContext()
27-
const spanCtx = parentSpan
28-
? trace.setSpan(baseCtx, parentSpan)
29-
: baseCtx
30-
91+
if (isTraceEnabled("session", ctx) && parentID) {
92+
const runRootID = ctx.sessionRunRoots.get(parentID) ?? parentID
93+
setBoundedMap(ctx.sessionRunRoots, sessionID, runRootID)
3194
const sessionSpan = ctx.tracer.startSpan(
3295
`${ctx.tracePrefix}session`,
3396
{
@@ -41,9 +104,10 @@ export function handleSessionCreated(e: EventSessionCreated, ctx: HandlerContext
41104
...ctx.commonAttrs,
42105
},
43106
},
44-
spanCtx,
107+
resolveSessionTraceContext(parentID, ctx),
45108
)
46109
setBoundedMap(ctx.sessionSpans, sessionID, sessionSpan)
110+
setBoundedMap(ctx.sessionSpanContexts, sessionID, sessionSpan.spanContext())
47111
}
48112

49113
ctx.emitLog({
@@ -128,6 +192,21 @@ export function handleSessionIdle(e: EventSessionIdle, ctx: HandlerContext) {
128192
sessionSpan.end()
129193
ctx.sessionSpans.delete(sessionID)
130194
}
195+
const runSpan = ctx.runSpans.get(sessionID)
196+
if (runSpan) {
197+
if (totals) {
198+
runSpan.setAttributes({
199+
[AGENT_NAME]: totals.agent,
200+
"agent.type": totals.agentType,
201+
"session.total_tokens": totals.tokens,
202+
"session.total_cost_usd": totals.cost,
203+
"session.total_messages": totals.messages,
204+
})
205+
}
206+
runSpan.setStatus({ code: SpanStatusCode.OK })
207+
runSpan.end()
208+
ctx.runSpans.delete(sessionID)
209+
}
131210

132211
ctx.emitLog({
133212
severityNumber: SeverityNumber.INFO,
@@ -173,6 +252,14 @@ export function handleSessionError(e: EventSessionError, ctx: HandlerContext) {
173252
sessionSpan.end()
174253
ctx.sessionSpans.delete(rawID)
175254
}
255+
const runSpan = ctx.runSpans.get(rawID)
256+
if (runSpan) {
257+
if (totals) runSpan.setAttributes({ [AGENT_NAME]: totals.agent, "agent.type": totals.agentType })
258+
runSpan.setStatus({ code: SpanStatusCode.ERROR, message: error })
259+
runSpan.setAttribute("error", error)
260+
runSpan.end()
261+
ctx.runSpans.delete(rawID)
262+
}
176263
}
177264

178265
ctx.emitLog({

src/index.ts

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,12 @@ import { loadConfig, parseAttributePairs, resolveHelperPath, resolveLogLevel } f
2121
import { probeEndpoint } from "./probe.ts"
2222
import { setupOtel, createInstruments } from "./otel.ts"
2323
import { remoteParentContext } from "./trace-context.ts"
24-
import { handleSessionCreated, handleSessionIdle, handleSessionError, handleSessionStatus } from "./handlers/session.ts"
24+
import { handleSessionCreated, handleSessionIdle, handleSessionError, handleSessionStatus, handleRunStarted } from "./handlers/session.ts"
2525
import { handleMessageUpdated, handleMessagePartUpdated, startMessageSpan } from "./handlers/message.ts"
2626
import { handlePermissionUpdated, handlePermissionReplied } from "./handlers/permission.ts"
2727
import { handleSessionDiff, handleCommandExecuted } from "./handlers/activity.ts"
28-
import { agentAttrs, getSessionAgentMeta } from "./util.ts"
28+
import { agentAttrs, getSessionAgentMeta, setBoundedMap } from "./util.ts"
29+
import type { SessionTotals } from "./types.ts"
2930

3031
const PLUGIN_VERSION: string = (pkg as { version?: string }).version ?? "unknown"
3132

@@ -103,7 +104,11 @@ export const OtelPlugin: Plugin = async ({ project, client, directory, worktree
103104
const pendingPermissions = new Map()
104105
const sessionTotals = new Map()
105106
const sessionDiffTotals = new Map()
107+
const runSpans = new Map()
108+
const runSpanContexts = new Map()
109+
const sessionRunRoots = new Map()
106110
const sessionSpans = new Map()
111+
const sessionSpanContexts = new Map()
107112
const messageSpans = new Map()
108113
const sessionInputs = new Map()
109114
const messageOutputs = new Map()
@@ -139,7 +144,11 @@ export const OtelPlugin: Plugin = async ({ project, client, directory, worktree
139144
tracer,
140145
tracePrefix: config.metricPrefix,
141146
rootContext,
147+
runSpans,
148+
runSpanContexts,
149+
sessionRunRoots,
142150
sessionSpans,
151+
sessionSpanContexts,
143152
messageSpans,
144153
sessionInputs,
145154
messageOutputs,
@@ -183,9 +192,18 @@ export const OtelPlugin: Plugin = async ({ project, client, directory, worktree
183192

184193
"chat.message": safe("chat.message", async (input, output) => {
185194
const agent = input.agent ?? "unknown"
195+
const startTime = Date.now()
196+
const existingTotals = sessionTotals.get(input.sessionID)
197+
const nextTotals: SessionTotals = {
198+
startMs: existingTotals?.startMs ?? startTime,
199+
tokens: existingTotals?.tokens ?? 0,
200+
cost: existingTotals?.cost ?? 0,
201+
messages: existingTotals?.messages ?? 0,
202+
agent,
203+
agentType: existingTotals?.agentType ?? "primary",
204+
}
205+
setBoundedMap(sessionTotals, input.sessionID, nextTotals)
186206
const { agentType } = getSessionAgentMeta(input.sessionID, ctx)
187-
const totals = sessionTotals.get(input.sessionID)
188-
if (totals) totals.agent = agent
189207
const sessionSpan = sessionSpans.get(input.sessionID)
190208
if (sessionSpan) sessionSpan.setAttributes({ [AGENT_NAME]: agent, "agent.type": agentType })
191209
const promptText = output.parts.map((part) => {
@@ -203,12 +221,22 @@ export const OtelPlugin: Plugin = async ({ project, client, directory, worktree
203221
}
204222
}).filter(Boolean).join("\n")
205223
sessionInputs.set(input.sessionID, promptText)
224+
if (!sessionSpan) {
225+
handleRunStarted(
226+
input.sessionID,
227+
agent,
228+
promptText,
229+
input.model ? `${input.model.providerID}/${input.model.modelID}` : "unknown",
230+
startTime,
231+
ctx,
232+
)
233+
}
206234
const promptLength = promptText.length
207235
emitLog({
208236
severityNumber: SeverityNumber.INFO,
209237
severityText: "INFO",
210-
timestamp: Date.now(),
211-
observedTimestamp: Date.now(),
238+
timestamp: startTime,
239+
observedTimestamp: startTime,
212240
body: "user_prompt",
213241
attributes: {
214242
"event.name": "user_prompt",

src/types.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import type { Context, Counter, Gauge, Histogram, Span, Tracer } from "@opentelemetry/api"
1+
import type { Context, Counter, Gauge, Histogram, Span, SpanContext, Tracer } from "@opentelemetry/api"
22
import type { LogRecord } from "@opentelemetry/api-logs"
33

44
/** Numeric priority map for log levels; higher value = higher severity. */
@@ -82,7 +82,11 @@ export type HandlerContext = {
8282
tracer: Tracer
8383
tracePrefix: string
8484
rootContext: () => Context
85+
runSpans: Map<string, Span>
86+
runSpanContexts: Map<string, SpanContext>
87+
sessionRunRoots: Map<string, string>
8588
sessionSpans: Map<string, Span>
89+
sessionSpanContexts: Map<string, SpanContext>
8690
messageSpans: Map<string, Span>
8791
sessionInputs: Map<string, string>
8892
messageOutputs: Map<string, string>

src/util.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import { trace } from "@opentelemetry/api"
12
import { MAX_PENDING } from "./types.ts"
23
import type { HandlerContext, SessionAgentType } from "./types.ts"
34

@@ -22,6 +23,19 @@ export function setBoundedMap<K, V>(map: Map<K, V>, key: K, value: V) {
2223
map.set(key, value)
2324
}
2425

26+
export function resolveSessionTraceContext(sessionID: string, ctx: HandlerContext) {
27+
const baseCtx = ctx.rootContext()
28+
const sessionSpan = ctx.sessionSpans.get(sessionID)
29+
if (sessionSpan) return trace.setSpan(baseCtx, sessionSpan)
30+
const sessionSpanContext = ctx.sessionSpanContexts.get(sessionID)
31+
if (sessionSpanContext) return trace.setSpanContext(baseCtx, sessionSpanContext)
32+
const runRootID = ctx.sessionRunRoots.get(sessionID) ?? sessionID
33+
const runSpan = ctx.runSpans.get(runRootID)
34+
if (runSpan) return trace.setSpan(baseCtx, runSpan)
35+
const runSpanContext = ctx.runSpanContexts.get(runRootID)
36+
return runSpanContext ? trace.setSpanContext(baseCtx, runSpanContext) : baseCtx
37+
}
38+
2539
/**
2640
* Returns `true` if the metric name (without prefix) is not in the disabled set.
2741
* The `name` should be the suffix after the metric prefix, e.g. `"session.count"`.

0 commit comments

Comments
 (0)