diff --git a/docs/design/workflow-tracing-gaps.md b/docs/design/workflow-tracing-gaps.md new file mode 100644 index 0000000000..b0247e9d80 --- /dev/null +++ b/docs/design/workflow-tracing-gaps.md @@ -0,0 +1,376 @@ +# Workflow 级 Span 粒度不足分析 (P1) + +> 基于 2026-05-13 对 qwen-code origin/main 的复核 + +## 现状 + +qwen-code 已具备 tracing 基础设施: + +| 组件 | 位置 | 说明 | +| ------------- | ------------------------------------------------ | -------------------------------------------------------- | +| Span 类型定义 | `packages/core/src/telemetry/session-tracing.ts` | `interaction`、`llm_request`、`tool`、`tool.execution` | +| Tracer 工具 | `packages/core/src/telemetry/tracer.ts` | session root context、`withSpan`、`startSpanWithContext` | +| 交互入口 | `packages/core/src/core/client.ts` | 顶层交互显式启动 `interaction` span | +| 生命周期管理 | — | AsyncLocalStorage + WeakRef + TTL cleanup | + +当前 runtime 中稳定接入的主要是两类 generic spans: + +- `api.generateContent` / `api.generateContentStream` +- `tool.` + +**结论:已进入"有 tracing 主干"阶段,但尚未把 agent workflow 的阶段边界完整编码进 trace 树。** + +### 对比:claude-code 已实现的 span 类型 + +参考 `claude-code/src/utils/telemetry/sessionTracing.ts` (line 49): + +- `interaction` +- `llm_request` +- `tool` +- `tool.blocked_on_user` +- `tool.execution` +- `hook` + +## 缺失项 + +| 缺失 span / 机制 | 影响 | +| ------------------------------------------ | ----------------------------------------------- | +| `permission_wait` / `blocked_on_user` span | 无法区分审批等待 vs 工具执行耗时 | +| `hook` span | hook 耗时被折叠进 tool span,定位边界不清 | +| `subagent` root span | subagent 内部 llm/tool 调用无法形成 trace 子树 | +| `tool.execution` 真实接线 | helper 已定义但主链路未调用 | +| 稳定的 parent-child wiring | spans 多为 session root 下的 sibling 而非层级树 | + +## 逐项分析 + +### 1. 用户审批等待不在 trace 中 + +工具调用等待审批时,状态迁移路径为 `awaiting_approval` → `scheduled` → 执行。 + +- "等待用户确认"只是状态迁移,不是 trace 节点 +- trace 上看不到审批等待耗时 +- 工具慢时无法区分是"卡在等用户"还是"工具本身执行慢" + +### 2. Hook 有事件记录但没有独立 span + +Pre/Post hook 执行后产出 `HookCallEvent`,走 `logHookCall()`,但不建立独立 OTel span。 + +- hook 变慢时表现为外层 tool span 变慢 +- hook 失败时表现为 "tool 失败" +- trace 无法回答"时间花在 hook 还是 tool.execution 上" + +### 3. Subagent 是 log/metric 而非 trace subtree + +subagent 启动/完成时记录 `SubagentExecutionEvent` 并进入 log/metric,但没有形成显式 span 子树。 + +- 能统计"哪个 subagent 跑过" +- 不能顺着 trace 看"这个 subagent 触发了哪些 llm/tool 调用" +- 并发 subagent 场景下因果链不清 + +### 4. tool.execution helper 已定义但未接入主链路 + +`session-tracing.ts` 中已有 `startToolExecutionSpan()` / `endToolExecutionSpan()`,但非测试代码中未见调用点。 + +当前实际 trace 树: + +``` +session-root + interaction + api.generateContent + tool.Bash + subagent_execution (log/metric) + hook_call (event/QwenLogger) +``` + +理想 trace 树: + +``` +interaction + llm_request + tool + tool.blocked_on_user + hook(pre) + tool.execution + hook(post) + subagent + interaction + llm_request + tool +``` + +### 5. Parent-child wiring 不够稳定 + +interaction span 已存在,但很多运行中的 spans 挂在 session root 下作为 sibling,而不是 interaction 的子节点。 + +- 调用树偏平 +- 节点间因果关系不直观 +- 从一个用户轮次追到内部 llm/tool/hook/subagent 的体验不连续 + +## 影响 + +- traces 有基础价值,但不足以支撑 workflow 级排障 +- 无法直接回答"这轮慢在等用户、hook,还是 tool 真执行" +- 无法把 subagent 运行过程还原为可阅读的 trace 子树 +- hook 问题被折叠进 tool span,定位边界不清 +- 在 Jaeger / Tempo / ARMS 上的树比 claude-code 更平、更难读 + +--- + +## claude-code 方案复用分析 + +> 基于 2026-05-13 对 claude-code 源码的深度对比 + +### claude-code 的 tracing 架构 + +claude-code 在 `src/utils/telemetry/sessionTracing.ts` 中实现了一个**统一的、基于双 ALS 的 span 管理系统**: + +``` + interactionContext (ALS) toolContext (ALS) + │ │ + ▼ ▼ + ┌─────────────────────┐ ┌─────────────────────┐ + │ interaction span │ │ tool span │ + │ (session root) │ │ (child of intxn) │ + └─────────────────────┘ └─────────────────────┘ + ▲ parent of ▲ parent of + │ │ + ┌───────┴───────┐ ┌──────────┼──────────┐ + │ │ │ │ │ + llm_request tool blocked execution hook + _on_user +``` + +**核心机制:** + +| 机制 | 实现 | +| ----------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| 双 ALS | `interactionContext` 存当前 interaction span;`toolContext` 存当前 tool span | +| parent 解析 | 每种 span 类型硬编码从哪个 ALS 取 parent:`llm_request`/`tool` 取 `interactionContext`;`blocked_on_user`/`execution`/`hook` 取 `toolContext`;`hook` 有 fallback 到 `interactionContext` | +| 生命周期 | enterWith 注入 → span 运行 → enterWith(undefined) 清除 | +| 查找 span | 非 ALS 存储的 span(如 blocked_on_user)通过 `activeSpans` Map 按 `span.type` 反查 | +| 内存管理 | ALS 持有的 span 用 WeakRef;非 ALS 持有的 span 用 strongRef 防 GC;TTL 30min 自动清理 | + +**claude-code tool span 完整生命周期** (`toolExecution.ts`): + +``` +startToolSpan(name, attrs) // → toolContext.enterWith(spanCtx) + startToolBlockedOnUserSpan() // → parent = toolContext.getStore() + [permission resolution / user prompt] + endToolBlockedOnUserSpan(decision, source) + startToolExecutionSpan() // → parent = toolContext.getStore() + [tool.call()] + endToolExecutionSpan({ success }) +endToolSpan(result) // → toolContext.enterWith(undefined) +``` + +**claude-code hook span** (`hooks.ts`): + +``` +startHookSpan(event, name, count, defs) // → parent = toolContext ?? interactionContext + [parallel hook execution] +endHookSpan(span, { success, blocking, ... }) +``` + +### qwen-code 现有架构 vs claude-code + +#### 根本差异:两套断裂的 span 创建路径 + +这是 qwen-code 当前最关键的架构问题: + +| 层 | 文件 | 用法 | parent 解析 | +| ------------------ | -------------------- | ------------------------------------------------------------------------------------------- | --------------------------------------------------------- | +| session-tracing 层 | `session-tracing.ts` | `startInteractionSpan` / `startLLMRequestSpan` / `startToolSpan` / `startToolExecutionSpan` | 显式从 `interactionContext` ALS 取 parent | +| tracer 层 | `tracer.ts` | `withSpan` / `startSpanWithContext` | 从 `context.active()` 取 parent,fallback 到 session root | + +**runtime 实际调用情况:** + +- `startInteractionSpan` → **已接入** (`client.ts` line 956),写入 `interactionContext` ALS +- `startLLMRequestSpan` / `endLLMRequestSpan` → **未接入**,runtime 用的是 `withSpan('api.generateContent', ...)` (在 `loggingContentGenerator.ts`) +- `startToolSpan` / `endToolSpan` → **未接入**,runtime 用的是 `withSpan('tool.${name}', ...)` (在 `coreToolScheduler.ts`) +- `startToolExecutionSpan` / `endToolExecutionSpan` → **未接入** + +**后果:** + +`withSpan` 的 `getParentContext()` 先检查 `context.active()`(OTel 原生 context),找不到活跃 span 时回退到 session root context。它**完全不读取 `interactionContext` ALS**。 + +因此 interaction span 和 LLM/tool spans 变成了 session root 下的**平级 sibling**,而不是 parent-child 树: + +``` +session-root + ├── interaction (来自 session-tracing, 写入了 interactionContext ALS) + ├── api.generateContent (来自 withSpan, 不读 interactionContext → 挂到 session root) + ├── tool.Bash (来自 withSpan, 同上) + └── tool.Read (来自 withSpan, 同上) +``` + +**而 claude-code 中,只有一套 span 创建路径(sessionTracing.ts),所有 span 都走同一套 ALS → OTel context 转换逻辑,所以树是完整的。** + +#### 逐项复用评估 + +##### 1. 双 ALS + 显式 parent 解析 — 可复用,是核心修复 + +| 维度 | claude-code | qwen-code | +| ------------ | ----------------------------------------------------- | -------------------------------------------- | +| ALS 数量 | 2 (`interactionContext` + `toolContext`) | 1 (`interactionContext`,无 `toolContext`) | +| parent 解析 | 每种 span 类型显式指定从哪个 ALS 取 parent | `withSpan` 统一走 `context.active()` | +| context 注入 | `trace.setSpan(otelContext.active(), parentCtx.span)` | `withSpan` 内部由 `startActiveSpan` 隐式注入 | + +**复用方案:** + +qwen-code 的 `session-tracing.ts` 已经实现了与 claude-code **几乎相同的 parent 解析模式**: + +```typescript +// qwen-code session-tracing.ts (已有但未用) +export function startLLMRequestSpan(model, promptId): Span { + const parentCtx = interactionContext.getStore(); + const ctx = parentCtx + ? trace.setSpan(otelContext.active(), parentCtx.span) + : otelContext.active(); + // ... +} +``` + +这段代码与 claude-code 的 `startLLMRequestSpan` 逻辑**完全一致**。 + +**核心修复路径:废弃 runtime 中的 `withSpan('api.*')` / `withSpan('tool.*')` 调用,改为调用 session-tracing 的 typed helpers。** 不需要重写 session-tracing 层——它的 API 已经就绪。 + +需要新增的只有: + +- 增加 `toolContext` ALS(仿 claude-code) +- 增加 `blocked_on_user` 和 `hook` span 类型及 helper 函数 + +##### 2. tool.blocked_on_user — 需要适配审批流差异 + +| 维度 | claude-code | qwen-code | +| ------------- | ------------------------------------------ | -------------------------------------------------------------------------- | +| 审批位置 | 在 `toolExecution.ts` 内,tool span 内部 | 在 `coreToolScheduler._schedule()` 内,tool span 之前 | +| 审批模式 | 同步等待 `resolveHookPermissionDecision()` | 状态机驱动:`validating` → `awaiting_approval` → `scheduled` → `executing` | +| span 覆盖范围 | tool span 包含 blocked + execution | tool span(`withSpan`) 只包含 execution(从 `executeSingleToolCall` 开始) | + +**关键差异:** qwen-code 的 `executeSingleToolCall` 入口检查 `toolCall.status !== 'scheduled'` 才继续——也就是说调用到这里时审批已经完成。Tool span 的 `withSpan` 包不住审批等待。 + +**适配方案(两种):** + +**方案 A — 前移 tool span 起点(推荐):** + +将 `startToolSpan` 调用从 `executeSingleToolCall` 移到 `_schedule` 中审批检查之前,使 tool span 覆盖完整生命周期。在进入 `awaiting_approval` 状态时 `startToolBlockedOnUserSpan`,在审批完成(`scheduled`)时 `endToolBlockedOnUserSpan`。 + +``` +_schedule(): + startToolSpan(name) // ← 新增 + startToolBlockedOnUserSpan() // ← 新增,进入 awaiting_approval 时 + [状态机等待] + endToolBlockedOnUserSpan(decision) // ← 新增,进入 scheduled 时 +executeSingleToolCall(): + startToolExecutionSpan() // ← 接入已有 helper + [hook + execute] + endToolExecutionSpan() + endToolSpan() // ← 需要在 finally 中 +``` + +**方案 B — 保持 tool span 位置不变,单独追踪审批:** + +在 `_schedule` 中独立创建 `approval_wait` span(不作为 tool 的 child),挂到 interaction 下。好处是改动更小,坏处是与 claude-code 模型不一致、trace 树可读性差。 + +**建议采用方案 A**,因为: + +- 与 claude-code 的 trace 树结构一致 +- trace 上一个 tool 节点就能看到"等了多久 + 执行了多久" +- 状态机驱动的特性只影响 span start/end 的触发时机,不影响 parent-child 建模 + +##### 3. hook span — 可直接复用 + +| 维度 | claude-code | qwen-code | +| ------------- | ----------------------------------- | -------------------------------------------------------------------- | +| hook 执行入口 | `executeHooks()` in `hooks.ts` | `firePreToolUseHook`/`firePostToolUseHook` via `hookEventHandler.ts` | +| 现有记录方式 | OTel span + Perfetto span | `HookCallEvent` → `QwenLogger` (无 OTel) | +| parent | `toolContext ?? interactionContext` | — | + +**复用方案:** + +1. 在 `session-tracing.ts` 新增 `startHookSpan` / `endHookSpan`(parent = `toolContext ?? interactionContext`,与 claude-code 一致) +2. 在 `coreToolScheduler.ts` 的 `executeSingleToolCall` 中,pre/post hook 调用前后分别 start/end hook span +3. 保留现有 `logHookCall` 事件记录(两套并行,不互斥) + +改动量低,不影响现有 hook 逻辑。 + +##### 4. tool.execution — 已有 helper,只需接线 + +qwen-code 的 `startToolExecutionSpan(parentToolSpan)` / `endToolExecutionSpan(span, metadata)` 已经完整实现,只需在 `executeSingleToolCall` 中调用: + +```typescript +// coreToolScheduler.ts executeSingleToolCall 内部 +const toolSpan = startToolSpan(toolName, attrs); +// ... hook pre ... +const execSpan = startToolExecutionSpan(toolSpan); +try { + // ... invocation.execute() ... + endToolExecutionSpan(execSpan, { success: true }); +} catch (e) { + endToolExecutionSpan(execSpan, { success: false, error: e.message }); +} +// ... hook post ... +endToolSpan(toolSpan); +``` + +注意:qwen-code 的 `startToolExecutionSpan` 接收显式 `parentToolSpan` 参数,而 claude-code 的是从 `toolContext` ALS 隐式获取。这不影响功能,只是风格差异。如果引入 `toolContext` ALS,可以统一改为隐式获取。 + +##### 5. subagent trace tree — 双方都不完整,不建议直接复用 + +| 维度 | claude-code | qwen-code | +| --------------- | ----------------------------------------------------------------------- | ---------------------------------------------------- | +| OTel trace 传播 | **无** — subagent 的 interaction 是新 root | **无** — subagent 无显式 trace 传播 | +| 身份关联 | Perfetto metadata(agent process/thread)+ `teammateContextStorage` ALS | `subagentNameContext` ALS + `SubagentExecutionEvent` | +| 并发隔离 | OTel ALS 有泄漏风险(`enterWith` 是进程级,并发 subagent 会互覆盖) | 同样的风险 | + +claude-code 在 subagent OTel tracing 上**自己也没解决好**: + +- `interactionContext.enterWith()` 是进程级的,并发 subagent 会覆盖彼此的 ALS 值 +- 真正的 agent 层级树只存在于 Perfetto(一个 Anthropic 内部 feature-flagged 的系统),不在 OTel 中 + +**建议:** + +- 短期:沿用 qwen-code 现有的 `subagentNameContext` + 事件日志方案 +- 中期:在 subagent 启动时创建一个 `subagent` span(parent = 当前 toolContext),并用 `context.with()` 而非 `enterWith()` 来隔离并发 subagent 的 OTel context +- 这是需要独立设计的工作项,不建议直接照搬 claude-code + +##### 6. LLM request span — 路径明确 + +qwen-code 当前在 `loggingContentGenerator.ts` 中用 `withSpan('api.generateContent', ...)` 和 `startSpanWithContext('api.generateContentStream', ...)`。 + +改为调用 `startLLMRequestSpan` / `endLLMRequestSpan`(session-tracing 层已有实现)即可。streaming 场景需要注意: + +- `startLLMRequestSpan` 返回 `Span` 对象 +- 需要手动传入 `endLLMRequestSpan(span, metadata)` 终结 +- 这与 `startSpanWithContext` 的手动管理模式兼容 + +### 复用总结 + +| 改造项 | 可复用程度 | 改动量 | 优先级 | +| ------------------------------------------------------------------------- | ------------------------------------- | --------------------------------------------- | ------ | +| 统一 span 创建路径(废弃 runtime `withSpan`,用 session-tracing helpers) | **核心修复** — 解决 parent-child 断裂 | 中(~5 个调用点) | P0 | +| 新增 `toolContext` ALS | 直接照搬 claude-code 模式 | 低(session-tracing.ts 内部) | P0 | +| tool.blocked_on_user span | 方案 A 需适配状态机 | 中(\_schedule + executeSingleToolCall 协调) | P1 | +| tool.execution 接线 | helper 已有,只需调用 | 低(executeSingleToolCall 内 3 行) | P1 | +| hook span | 新增 helper + 调用点 | 低 | P1 | +| LLM request span 切换 | 替换 withSpan 为 typed helper | 低(2 个调用点) | P1 | +| subagent trace tree | **不建议直接复用** — 需独立设计 | 高 | P2 | + +### 推荐实施顺序 + +``` +Phase 1 — 修复 trace 树结构 (P0) +├── 1a. session-tracing.ts 新增 toolContext ALS + blocked_on_user / hook span helpers +├── 1b. loggingContentGenerator.ts: withSpan → startLLMRequestSpan/endLLMRequestSpan +└── 1c. coreToolScheduler.ts: withSpan → startToolSpan/endToolSpan + +Phase 2 — 补齐 workflow span (P1) +├── 2a. coreToolScheduler._schedule: blocked_on_user span 接入 +├── 2b. coreToolScheduler.executeSingleToolCall: tool.execution span 接入 +└── 2c. hook pre/post 调用处: hook span 接入 + +Phase 3 — Subagent trace tree (P2) +├── 3a. 设计 context.with() 隔离方案(替代 enterWith) +├── 3b. subagent 启动时创建 subagent root span +└── 3c. 并发 subagent 场景验证 +``` diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts index 044625c51e..43608db476 100644 --- a/packages/core/src/core/client.test.ts +++ b/packages/core/src/core/client.test.ts @@ -15,7 +15,6 @@ import { } from 'vitest'; import type { Content, GenerateContentResponse, Part } from '@google/genai'; -import { SpanStatusCode } from '@opentelemetry/api'; import { GeminiClient, SendMessageType } from './client.js'; import { findCompressSplitPoint } from '../services/chatCompressionService.js'; import { @@ -167,29 +166,9 @@ const mockUiTelemetryService = vi.hoisted(() => ({ reset: vi.fn(), addEvent: vi.fn(), })); -const clientSpanCalls = vi.hoisted( - (): Array<{ - name: string; - attributes: Record; - statuses: Array<{ code: number; message?: string }>; - }> => [], -); -const mockWithSpan = vi.hoisted(() => vi.fn()); - vi.mock('../telemetry/tracer.js', () => ({ API_CALL_ABORTED_SPAN_STATUS_MESSAGE: 'API call aborted', API_CALL_FAILED_SPAN_STATUS_MESSAGE: 'API call failed', - safeSetStatus: ( - span: { setStatus: (status: { code: number; message?: string }) => void }, - status: { code: number; message?: string }, - ) => { - try { - span.setStatus(status); - } catch { - // Match production best-effort telemetry behavior. - } - }, - withSpan: mockWithSpan, })); vi.mock('../telemetry/index.js', async (importOriginal) => { @@ -356,32 +335,6 @@ describe('Gemini Client (client.ts)', () => { }; beforeEach(async () => { vi.resetAllMocks(); - clientSpanCalls.length = 0; - mockWithSpan.mockImplementation( - async ( - name: string, - attributes: Record, - fn: (span: { - setStatus: ReturnType; - setAttribute: ReturnType; - end: ReturnType; - }) => Promise, - ) => { - const spanCall = { - name, - attributes, - statuses: [] as Array<{ code: number; message?: string }>, - }; - clientSpanCalls.push(spanCall); - return fn({ - setStatus: vi.fn((status: { code: number; message?: string }) => { - spanCall.statuses.push(status); - }), - setAttribute: vi.fn(), - end: vi.fn(), - }); - }, - ); vi.mocked(uiTelemetryService.setLastPromptTokenCount).mockClear(); // Default: createContentGenerator rejects (simulates test env without auth). @@ -4494,15 +4447,6 @@ Other open files: }), 'btw-prompt-id', ); - expect(clientSpanCalls.at(-1)).toEqual( - expect.objectContaining({ - name: 'client.generateContent', - attributes: { - model: DEFAULT_QWEN_FLASH_MODEL, - prompt_id: 'btw-prompt-id', - }, - }), - ); }); it('should prefer an explicit prompt id override over the current context', async () => { @@ -4530,15 +4474,6 @@ Other open files: }), 'override-prompt-id', ); - expect(clientSpanCalls.at(-1)).toEqual( - expect.objectContaining({ - name: 'client.generateContent', - attributes: { - model: DEFAULT_QWEN_FLASH_MODEL, - prompt_id: 'override-prompt-id', - }, - }), - ); }); it('should use config system prompt override when provided', async () => { @@ -4642,7 +4577,7 @@ Other open files: ); }); - it('sets a generic span status when content generation fails', async () => { + it('propagates error when content generation fails', async () => { const contents = [{ role: 'user', parts: [{ text: 'hello' }] }]; const abortSignal = new AbortController().signal; mockGenerateContentFn.mockRejectedValueOnce( @@ -4657,15 +4592,9 @@ Other open files: DEFAULT_QWEN_FLASH_MODEL, ), ).rejects.toThrow('raw upstream 500 with sensitive details'); - - const spanCall = clientSpanCalls.at(-1); - expect(spanCall?.statuses).toEqual([ - { code: SpanStatusCode.ERROR, message: 'API call failed' }, - ]); - expect(JSON.stringify(spanCall?.statuses)).not.toContain('raw upstream'); }); - it('sets a generic aborted span status when content generation is aborted', async () => { + it('propagates error when content generation is aborted', async () => { const contents = [{ role: 'user', parts: [{ text: 'hello' }] }]; const abortController = new AbortController(); abortController.abort(); @@ -4681,12 +4610,6 @@ Other open files: DEFAULT_QWEN_FLASH_MODEL, ), ).rejects.toThrow('raw abort reason with sensitive details'); - - const spanCall = clientSpanCalls.at(-1); - expect(spanCall?.statuses).toEqual([ - { code: SpanStatusCode.ERROR, message: 'API call aborted' }, - ]); - expect(JSON.stringify(spanCall?.statuses)).not.toContain('raw abort'); }); // Note: there is currently no "fallback mode" model routing; the model used diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index ff9a76fc5e..c6e1660b7c 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -12,7 +12,6 @@ import type { PartListUnion, Tool, } from '@google/genai'; -import { SpanStatusCode } from '@opentelemetry/api'; // Config import { ApprovalMode, type Config } from '../config/config.js'; @@ -105,12 +104,6 @@ import { createHookOutput, SessionStartSource } from '../hooks/types.js'; import { ideContextStore } from '../ide/ideContext.js'; import { type File, type IdeContext } from '../ide/types.js'; import { PermissionMode, type StopHookOutput } from '../hooks/types.js'; -import { - API_CALL_ABORTED_SPAN_STATUS_MESSAGE, - API_CALL_FAILED_SPAN_STATUS_MESSAGE, - safeSetStatus, - withSpan, -} from '../telemetry/tracer.js'; const MAX_TURNS = 100; @@ -1684,91 +1677,73 @@ export class GeminiClient { const promptId = promptIdOverride ?? promptIdContext.getStore() ?? this.lastPromptId!; - return withSpan( - 'client.generateContent', - { model, prompt_id: promptId }, - async (span) => { - let currentAttemptModel: string = model; + let currentAttemptModel: string = model; - try { - const userMemory = this.config.getUserMemory(); - const finalSystemInstruction = generationConfig.systemInstruction - ? getCustomSystemPrompt( - generationConfig.systemInstruction, - userMemory, - ) - : this.getMainSessionSystemInstruction(); - - const requestConfig: GenerateContentConfig = { - abortSignal, - ...generationConfig, - systemInstruction: finalSystemInstruction, - }; + try { + const userMemory = this.config.getUserMemory(); + const finalSystemInstruction = generationConfig.systemInstruction + ? getCustomSystemPrompt(generationConfig.systemInstruction, userMemory) + : this.getMainSessionSystemInstruction(); + + const requestConfig: GenerateContentConfig = { + abortSignal, + ...generationConfig, + systemInstruction: finalSystemInstruction, + }; - // When the requested model differs from the main model (e.g. fast model - // side queries for session recap / title / summary), resolve the target - // model's own ContentGeneratorConfig so that per-model settings like - // extra_body, samplingParams, and reasoning are not inherited from the - // main model's config. The retry authType is resolved alongside so that - // provider-specific checks (e.g. QWEN_OAUTH quota detection) reference - // the target model's provider. - const { - contentGenerator, - retryAuthType, + // When the requested model differs from the main model (e.g. fast model + // side queries for session recap / title / summary), resolve the target + // model's own ContentGeneratorConfig so that per-model settings like + // extra_body, samplingParams, and reasoning are not inherited from the + // main model's config. The retry authType is resolved alongside so that + // provider-specific checks (e.g. QWEN_OAUTH quota detection) reference + // the target model's provider. + const { + contentGenerator, + retryAuthType, + model: requestModel, + } = await this.config.getBaseLlmClient().resolveForModel(model); + + const apiCall = () => { + currentAttemptModel = requestModel; + + return contentGenerator.generateContent( + { model: requestModel, - } = await this.config.getBaseLlmClient().resolveForModel(model); - - const apiCall = () => { - currentAttemptModel = requestModel; - - return contentGenerator.generateContent( - { - model: requestModel, - config: requestConfig, - contents, - }, - promptId, - ); - }; - const result = await retryWithBackoff(apiCall, { - authType: retryAuthType, - persistentMode: isUnattendedMode(), - signal: abortSignal, - heartbeatFn: (info) => { - process.stderr.write( - `[qwen-code] Waiting for API capacity... attempt ${info.attempt}, retry in ${Math.ceil(info.remainingMs / 1000)}s\n`, - ); - }, - }); - return result; - } catch (error: unknown) { - if (abortSignal.aborted) { - safeSetStatus(span, { - code: SpanStatusCode.ERROR, - message: API_CALL_ABORTED_SPAN_STATUS_MESSAGE, - }); - throw error; - } - - safeSetStatus(span, { - code: SpanStatusCode.ERROR, - message: API_CALL_FAILED_SPAN_STATUS_MESSAGE, - }); - await reportError( - error, - `Error generating content via API with model ${currentAttemptModel}.`, - { - requestContents: contents, - requestConfig: generationConfig, - }, - 'generateContent-api', - ); - throw new Error( - `Failed to generate content with model ${currentAttemptModel}: ${getErrorMessage(error)}`, + config: requestConfig, + contents, + }, + promptId, + ); + }; + const result = await retryWithBackoff(apiCall, { + authType: retryAuthType, + persistentMode: isUnattendedMode(), + signal: abortSignal, + heartbeatFn: (info) => { + process.stderr.write( + `[qwen-code] Waiting for API capacity... attempt ${info.attempt}, retry in ${Math.ceil(info.remainingMs / 1000)}s\n`, ); - } - }, - ); + }, + }); + return result; + } catch (error: unknown) { + if (abortSignal.aborted) { + throw error; + } + await reportError( + error, + `Error generating content via API with model ${currentAttemptModel}.`, + { + requestContents: contents, + requestConfig: generationConfig, + }, + 'generateContent-api', + ); + throw new Error( + `Failed to generate content with model ${currentAttemptModel}: ${getErrorMessage(error)}`, + ); + } } /** diff --git a/packages/core/src/core/coreToolScheduler.test.ts b/packages/core/src/core/coreToolScheduler.test.ts index 64b825fa27..cce73bb851 100644 --- a/packages/core/src/core/coreToolScheduler.test.ts +++ b/packages/core/src/core/coreToolScheduler.test.ts @@ -79,65 +79,85 @@ vi.mock('../telemetry/tracer.js', () => ({ // Match production best-effort telemetry behavior. } }, - withSpan: vi.fn( - async ( - name: string, - attributes: Record, - fn: (span: { - setStatus: (status: { code: number; message?: string }) => void; - setAttribute: (key: string, value: string | number | boolean) => void; - end: () => void; - }) => Promise, - options?: { autoOkOnSuccess?: boolean }, - ) => { - const autoOkOnSuccess = options?.autoOkOnSuccess ?? true; - const record: ToolSpanRecord = { - name, - attributes, - statusCalls: [], - spanAttributes: {}, - ended: false, - }; - toolSpanRecords.push(record); - let statusSet = false; - const span = { - setStatus(status: { code: number; message?: string }) { - statusSet = true; - if (shouldThrowToolSpanSetStatus.value) { - throw new Error('setStatus failed'); - } - record.statusCalls.push(status); - }, - setAttribute(key: string, value: string | number | boolean) { - if (shouldThrowToolSpanSetAttribute.value) { - throw new Error('setAttribute failed'); - } - record.spanAttributes[key] = value; - }, - end() { - record.ended = true; - }, - }; +})); - try { - const result = await fn(span); - if (autoOkOnSuccess && !statusSet) { - record.statusCalls.push({ code: 1 }); - } - return result; - } catch (error) { - if (!statusSet) { - record.statusCalls.push({ - code: 2, - message: error instanceof Error ? error.message : String(error), - }); - } - throw error; - } finally { - record.ended = true; +function createMockToolSpan( + name: string, + attributes: Record, +): ToolSpanRecord & { + setStatus: (status: { code: number; message?: string }) => void; + setAttribute: (key: string, value: string | number | boolean) => void; + setAttributes: (attrs: Record) => void; + end: () => void; + spanContext: () => { spanId: string; traceId: string; traceFlags: number }; +} { + const record: ToolSpanRecord = { + name, + attributes, + statusCalls: [], + spanAttributes: {}, + ended: false, + }; + toolSpanRecords.push(record); + const spanId = Math.random().toString(16).slice(2, 18).padEnd(16, '0'); + return Object.assign(record, { + setStatus(status: { code: number; message?: string }) { + if (shouldThrowToolSpanSetStatus.value) { + throw new Error('setStatus failed'); + } + record.statusCalls.push(status); + }, + setAttribute(key: string, value: string | number | boolean) { + if (shouldThrowToolSpanSetAttribute.value) { + throw new Error('setAttribute failed'); + } + record.spanAttributes[key] = value; + }, + setAttributes(attrs: Record) { + Object.assign(record.spanAttributes, attrs); + }, + end() { + record.ended = true; + }, + spanContext: () => ({ spanId, traceId: '0'.repeat(32), traceFlags: 0 }), + }); +} + +vi.mock('../telemetry/session-tracing.js', () => ({ + startToolSpan: vi.fn( + (name: string, attrs?: Record) => + createMockToolSpan(`tool.${name}`, { tool_name: name, ...attrs }), + ), + endToolSpan: vi.fn( + ( + span: ToolSpanRecord & ReturnType, + metadata?: { success?: boolean; error?: string }, + ) => { + if (metadata) { + const status = + metadata.success !== false + ? { code: 1 } + : { code: 2, message: metadata.error ?? 'tool error' }; + span.statusCalls.push(status); } + span.ended = true; }, ), + runInToolSpanContext: vi.fn((_span: unknown, fn: () => T): T => fn()), + startToolExecutionSpan: vi.fn(() => createMockToolSpan('tool.execution', {})), + endToolExecutionSpan: vi.fn( + ( + span: ReturnType, + _metadata?: { success?: boolean; error?: string }, + ) => { + span.ended = true; + }, + ), + startInteractionSpan: vi.fn(), + endInteractionSpan: vi.fn(), + startLLMRequestSpan: vi.fn(), + endLLMRequestSpan: vi.fn(), + clearSessionTracingForTesting: vi.fn(), })); vi.mock('fs/promises', () => ({ @@ -2940,7 +2960,9 @@ describe('CoreToolScheduler telemetry spans', () => { }); function getLastToolSpan(): ToolSpanRecord { - const spanRecord = toolSpanRecords.at(-1); + const spanRecord = toolSpanRecords.findLast( + (r) => r.name.startsWith('tool.') && r.name !== 'tool.execution', + ); if (!spanRecord) { throw new Error('tool span was not created'); } @@ -3275,7 +3297,7 @@ describe('CoreToolScheduler telemetry spans', () => { ); }); - it('leaves cancellation spans with no explicit status (autoOkOnSuccess: false)', async () => { + it('marks cancellation spans with UNSET status', async () => { const abortController = new AbortController(); const { spanRecord, completedCalls } = await runSingleTool({ abortController, @@ -3289,9 +3311,7 @@ describe('CoreToolScheduler telemetry spans', () => { }); expect(completedCalls[0].status).toBe('cancelled'); - // autoOkOnSuccess: false prevents withSpan from auto-setting OK; - // setToolSpanCancelled only sets the failure_kind attribute, not a status. - expect(spanRecord.statusCalls).toEqual([]); + expect(spanRecord.statusCalls).toEqual([{ code: SpanStatusCode.UNSET }]); expect(spanRecord.spanAttributes['tool.failure_kind']).toBe('cancelled'); expect(spanRecord.ended).toBe(true); }); @@ -3311,9 +3331,9 @@ describe('CoreToolScheduler telemetry spans', () => { }); expect(completedCalls[0].status).toBe('cancelled'); - // No status set — autoOkOnSuccess: false, and setToolSpanCancelled - // only sets the attribute (which fails here, caught internally). - expect(spanRecord.statusCalls).toEqual([]); + // setAttribute throws, but safeSetStatus still attempts setStatus. + // Since throwSpanSetAttribute only affects setAttribute, setStatus succeeds. + expect(spanRecord.statusCalls).toEqual([{ code: SpanStatusCode.UNSET }]); expect(spanRecord.spanAttributes).not.toHaveProperty('tool.failure_kind'); expect(spanRecord.ended).toBe(true); }); @@ -3333,9 +3353,8 @@ describe('CoreToolScheduler telemetry spans', () => { }); expect(completedCalls[0].status).toBe('cancelled'); - // setToolSpanCancelled no longer calls setStatus, so throwSpanSetStatus - // only affects the safeSetStatus(span, OK) in the success path (not hit). - // With autoOkOnSuccess: false, withSpan does not attempt setStatus either. + // setToolSpanCancelled calls safeSetStatus which catches the throw. + // Status call is attempted but swallowed by safeSetStatus. expect(spanRecord.statusCalls).toEqual([]); expect(spanRecord.spanAttributes['tool.failure_kind']).toBe('cancelled'); expect(spanRecord.ended).toBe(true); @@ -3352,7 +3371,7 @@ describe('CoreToolScheduler telemetry spans', () => { expect(spanRecord.ended).toBe(true); }); - it('leaves successful tool calls to be marked OK by withSpan', async () => { + it('marks successful tool calls with OK status via endToolSpan', async () => { const { spanRecord, completedCalls } = await runSingleTool(); expect(completedCalls[0].status).toBe('success'); @@ -3360,6 +3379,60 @@ describe('CoreToolScheduler telemetry spans', () => { expect(spanRecord.spanAttributes).not.toHaveProperty('tool.failure_kind'); expect(spanRecord.ended).toBe(true); }); + + // tool.execution sub-span lifecycle assertions — + // ensure the sub-span is started/ended on every meaningful path so that + // future regressions (e.g. dropping the sub-span call or mis-marking a + // failed result as success) fail loudly. + + function getExecutionSpan(): ToolSpanRecord | undefined { + return toolSpanRecords.find((r) => r.name === 'tool.execution'); + } + + it('execution sub-span: started and ended (success: true) on success', async () => { + await runSingleTool(); + const exec = getExecutionSpan(); + expect(exec).toBeDefined(); + expect(exec!.ended).toBe(true); + }); + + it('execution sub-span: ended (success: false) when ToolResult.error is set', async () => { + await runSingleTool({ + execute: vi.fn().mockResolvedValue({ + llmContent: 'failed', + returnDisplay: 'failed', + error: { + message: 'tool failed', + type: ToolErrorType.EXECUTION_FAILED, + }, + }), + }); + const exec = getExecutionSpan(); + expect(exec).toBeDefined(); + expect(exec!.ended).toBe(true); + }); + + it('execution sub-span: ended on thrown invocation exception', async () => { + await runSingleTool({ + execute: vi.fn().mockRejectedValue(new Error('boom')), + }); + const exec = getExecutionSpan(); + expect(exec).toBeDefined(); + expect(exec!.ended).toBe(true); + }); + + it('execution sub-span: NOT created when pre-hook denies execution', async () => { + const messageBus = { + request: vi.fn().mockResolvedValueOnce({ + type: MessageBusType.HOOK_EXECUTION_RESPONSE, + correlationId: 'pre-hook', + success: true, + output: { decision: 'block', reason: 'denied' }, + }), + }; + await runSingleTool({ messageBus, disableHooks: false }); + expect(getExecutionSpan()).toBeUndefined(); + }); }); // Integration tests for the fire* functions diff --git a/packages/core/src/core/coreToolScheduler.ts b/packages/core/src/core/coreToolScheduler.ts index d7cd4ef300..622a07a78a 100644 --- a/packages/core/src/core/coreToolScheduler.ts +++ b/packages/core/src/core/coreToolScheduler.ts @@ -75,8 +75,15 @@ import levenshtein from 'fast-levenshtein'; import { getPlanModeSystemReminder } from './prompts.js'; import { ShellToolInvocation } from '../tools/shell.js'; import { IdeClient } from '../ide/ide-client.js'; -import { safeSetStatus, withSpan } from '../telemetry/tracer.js'; +import { safeSetStatus } from '../telemetry/tracer.js'; import { SpanStatusCode, type Span } from '@opentelemetry/api'; +import { + startToolSpan, + endToolSpan, + runInToolSpanContext, + startToolExecutionSpan, + endToolExecutionSpan, +} from '../telemetry/index.js'; const TOOL_FAILURE_KIND_ATTRIBUTE = 'tool.failure_kind'; const TOOL_FAILURE_KIND_PRE_HOOK_BLOCKED = 'pre_hook_blocked'; @@ -131,9 +138,9 @@ function setToolSpanCancelled(span: Span): void { } catch { // OTel errors must not block the cancellation status update. } - // No explicit span status — cancellation is neither OK nor ERROR. - // The caller uses withSpan({ autoOkOnSuccess: false }) so withSpan - // will not auto-set OK, and the span ends with the default UNSET status. + safeSetStatus(span, { + code: SpanStatusCode.UNSET, + }); } async function safelyFirePostToolUseFailureHook( @@ -1852,424 +1859,434 @@ export class CoreToolScheduler { const scheduledCall = toolCall; const { callId, name: toolName } = scheduledCall.request; - return withSpan( - `tool.${toolName}`, - { tool_name: toolName, call_id: callId }, - async (span: Span) => { - const invocation = scheduledCall.invocation; - const toolInput = scheduledCall.request.args as Record; + const toolSpan = startToolSpan(toolName, { + tool_name: toolName, + call_id: callId, + }); + try { + await runInToolSpanContext(toolSpan, () => + this._executeToolCallBody(scheduledCall, signal, toolSpan), + ); + } finally { + endToolSpan(toolSpan); + } + } - // Normalize shell-escaped path params so hooks operate on actual filesystem - // paths, matching the normalization done in tool validation. - for (const key of PATH_ARG_KEYS) { - if (typeof toolInput[key] === 'string') { - toolInput[key] = unescapePath(String(toolInput[key]).trim()); + private async _executeToolCallBody( + scheduledCall: ScheduledToolCall, + signal: AbortSignal, + span: Span, + ): Promise { + const { callId, name: toolName } = scheduledCall.request; + const invocation = scheduledCall.invocation; + const toolInput = scheduledCall.request.args as Record; + + // Normalize shell-escaped path params so hooks operate on actual filesystem + // paths, matching the normalization done in tool validation. + for (const key of PATH_ARG_KEYS) { + if (typeof toolInput[key] === 'string') { + toolInput[key] = unescapePath(String(toolInput[key]).trim()); + } + } + + // Generate unique tool_use_id for hook tracking + const toolUseId = generateToolUseId(); + + // Get MessageBus for hook execution + const messageBus = this.config.getMessageBus() as MessageBus | undefined; + const hooksEnabled = !this.config.getDisableAllHooks(); + + // PreToolUse Hook + if (hooksEnabled && messageBus) { + // Convert ApprovalMode to permission_mode string for hooks + const permissionMode = this.config.getApprovalMode(); + const preHookResult = await firePreToolUseHook( + messageBus, + toolName, + toolInput, + toolUseId, + permissionMode, + ); + + if (!preHookResult.shouldProceed) { + // Hook blocked the execution + const blockMessage = + preHookResult.blockReason || 'Tool execution blocked by hook'; + const errorResponse = createErrorResponse( + scheduledCall.request, + new Error(blockMessage), + ToolErrorType.EXECUTION_DENIED, + ); + this.setStatusInternal(callId, 'error', errorResponse); + setToolSpanFailure( + span, + TOOL_FAILURE_KIND_PRE_HOOK_BLOCKED, + TOOL_SPAN_STATUS_PRE_HOOK_BLOCKED, + ); + return; + } + } + + this.setStatusInternal(callId, 'executing'); + + const liveOutputCallback = scheduledCall.tool.canUpdateOutput + ? (outputChunk: ToolResultDisplay) => { + if (this.outputUpdateHandler) { + this.outputUpdateHandler(callId, outputChunk); } + this.toolCalls = this.toolCalls.map((tc) => + tc.request.callId === callId && tc.status === 'executing' + ? { ...tc, liveOutput: outputChunk } + : tc, + ); + this.notifyToolCallsUpdate(); } + : undefined; + + const shellExecutionConfig = this.config.getShellExecutionConfig(); + + // TODO: Refactor to remove special casing for ShellToolInvocation. + // Introduce a generic callbacks object for the execute method to handle + // things like `onPid` and `onLiveOutput`. This will make the scheduler + // agnostic to the invocation type. + // + // Start the execution sub-span BEFORE invocation.execute() so its + // synchronous setup (shell command preprocessing, child_process.spawn, + // etc.) is bracketed by the span. We don't manually activate the span + // as OTel context here because the surrounding tool span is already + // active via runInToolSpanContext, and tool implementations don't + // currently emit nested OTel spans of their own — the span boundary + // is purely for timing/attribution. + const execSpan = startToolExecutionSpan(); + // try wraps both invocation.execute() and the await so synchronous + // throws (e.g. shell setup failure) flow into the same catch as async + // rejections — otherwise execSpan leaks unended and failure hooks + // are skipped. + try { + let promise: Promise; + if (invocation instanceof ShellToolInvocation) { + const setPidCallback = (pid: number) => { + this.toolCalls = this.toolCalls.map((tc) => + tc.request.callId === callId && tc.status === 'executing' + ? { ...tc, pid } + : tc, + ); + this.notifyToolCallsUpdate(); + }; + // Stash the promote AbortController on the executing tool call so + // a UI surface (Ctrl+B keybind) can find the foreground shell's + // promote trigger by callId. + const setPromoteAbortControllerCallback = (ac: AbortController) => { + this.toolCalls = this.toolCalls.map((tc) => + tc.request.callId === callId && tc.status === 'executing' + ? { ...tc, promoteAbortController: ac } + : tc, + ); + this.notifyToolCallsUpdate(); + }; + promise = invocation.execute( + signal, + liveOutputCallback, + shellExecutionConfig, + setPidCallback, + setPromoteAbortControllerCallback, + ); + } else { + promise = invocation.execute( + signal, + liveOutputCallback, + shellExecutionConfig, + ); + } - // Generate unique tool_use_id for hook tracking - const toolUseId = generateToolUseId(); + const toolResult: ToolResult = await promise; + endToolExecutionSpan(execSpan, { + success: toolResult.error === undefined, + }); + if (signal.aborted) { + // PostToolUseFailure Hook + if (hooksEnabled && messageBus) { + const failureHookResult = await safelyFirePostToolUseFailureHook( + messageBus, + toolUseId, + toolName, + toolInput, + 'User cancelled tool execution.', + true, + this.config.getApprovalMode(), + ); - // Get MessageBus for hook execution - const messageBus = this.config.getMessageBus() as - | MessageBus - | undefined; - const hooksEnabled = !this.config.getDisableAllHooks(); + // Append additional context from hook if provided + let cancelMessage = 'User cancelled tool execution.'; + if (failureHookResult.additionalContext) { + cancelMessage += `\n\n${failureHookResult.additionalContext}`; + } + this.setStatusInternal(callId, 'cancelled', cancelMessage); + } else { + this.setStatusInternal( + callId, + 'cancelled', + 'User cancelled tool execution.', + ); + } + setToolSpanCancelled(span); + return; // Both code paths should return here + } + + if (toolResult.error === undefined) { + let content = toolResult.llmContent; + const contentLength = + typeof content === 'string' ? content.length : undefined; - // PreToolUse Hook + // PostToolUse Hook if (hooksEnabled && messageBus) { - // Convert ApprovalMode to permission_mode string for hooks + const toolResponse = { + llmContent: content, + returnDisplay: toolResult.returnDisplay, + }; const permissionMode = this.config.getApprovalMode(); - const preHookResult = await firePreToolUseHook( + const postHookResult = await firePostToolUseHook( messageBus, toolName, toolInput, + toolResponse, toolUseId, permissionMode, ); - if (!preHookResult.shouldProceed) { - // Hook blocked the execution - const blockMessage = - preHookResult.blockReason || 'Tool execution blocked by hook'; + // Append additional context from hook if provided + if (postHookResult.additionalContext) { + content = appendAdditionalContext( + content, + postHookResult.additionalContext, + ); + } + + // Check if hook requested to stop execution + if (postHookResult.shouldStop) { + const stopMessage = + postHookResult.stopReason || 'Execution stopped by hook'; const errorResponse = createErrorResponse( scheduledCall.request, - new Error(blockMessage), + new Error(stopMessage), ToolErrorType.EXECUTION_DENIED, ); this.setStatusInternal(callId, 'error', errorResponse); setToolSpanFailure( span, - TOOL_FAILURE_KIND_PRE_HOOK_BLOCKED, - TOOL_SPAN_STATUS_PRE_HOOK_BLOCKED, + TOOL_FAILURE_KIND_POST_HOOK_STOPPED, + TOOL_SPAN_STATUS_POST_HOOK_STOPPED, ); return; } } - this.setStatusInternal(callId, 'executing'); - - const liveOutputCallback = scheduledCall.tool.canUpdateOutput - ? (outputChunk: ToolResultDisplay) => { - if (this.outputUpdateHandler) { - this.outputUpdateHandler(callId, outputChunk); - } - this.toolCalls = this.toolCalls.map((tc) => - tc.request.callId === callId && tc.status === 'executing' - ? { ...tc, liveOutput: outputChunk } - : tc, - ); - this.notifyToolCallsUpdate(); - } - : undefined; - - const shellExecutionConfig = this.config.getShellExecutionConfig(); - - // TODO: Refactor to remove special casing for ShellToolInvocation. - // Introduce a generic callbacks object for the execute method to handle - // things like `onPid` and `onLiveOutput`. This will make the scheduler - // agnostic to the invocation type. - let promise: Promise; - if (invocation instanceof ShellToolInvocation) { - const setPidCallback = (pid: number) => { - this.toolCalls = this.toolCalls.map((tc) => - tc.request.callId === callId && tc.status === 'executing' - ? { ...tc, pid } - : tc, - ); - this.notifyToolCallsUpdate(); - }; - // Stash the promote AbortController on the executing tool call so - // a UI surface (Ctrl+B keybind) can find the foreground shell's - // promote trigger by callId. - const setPromoteAbortControllerCallback = (ac: AbortController) => { - this.toolCalls = this.toolCalls.map((tc) => - tc.request.callId === callId && tc.status === 'executing' - ? { ...tc, promoteAbortController: ac } - : tc, - ); - this.notifyToolCallsUpdate(); - }; - promise = invocation.execute( - signal, - liveOutputCallback, - shellExecutionConfig, - setPidCallback, - setPromoteAbortControllerCallback, - ); - } else { - promise = invocation.execute( - signal, - liveOutputCallback, - shellExecutionConfig, - ); - } - - try { - const toolResult: ToolResult = await promise; - if (signal.aborted) { - // PostToolUseFailure Hook - if (hooksEnabled && messageBus) { - const failureHookResult = await safelyFirePostToolUseFailureHook( - messageBus, - toolUseId, - toolName, - toolInput, - 'User cancelled tool execution.', - true, - this.config.getApprovalMode(), - ); + // Collect filesystem paths the tool just touched. Different tools + // use different parameter names: `file_path` (read/edit/write), + // `path` (ls, glob), `filePath` (grep, lsp), and `paths` + // (ripGrep array form). Conditional rules and skill activation + // both key off the same path set, so inspect the union — and + // gate the inspection on a tool-name allowlist (see + // FS_PATH_TOOL_NAMES) so MCP / non-FS tools that reuse those + // parameter names with different semantics never enter the + // activation pipeline. + const inputPaths = extractToolFilePaths(toolName, toolInput); + const resultPaths = + isFilesystemPathTool(toolName) && + Array.isArray(toolResult.resultFilePaths) + ? toolResult.resultFilePaths + : []; + const candidatePaths = Array.from( + new Set([...inputPaths.map((p) => unescapePath(p)), ...resultPaths]), + ); - // Append additional context from hook if provided - let cancelMessage = 'User cancelled tool execution.'; - if (failureHookResult.additionalContext) { - cancelMessage += `\n\n${failureHookResult.additionalContext}`; - } - this.setStatusInternal(callId, 'cancelled', cancelMessage); - } else { - this.setStatusInternal( - callId, - 'cancelled', - 'User cancelled tool execution.', - ); - } - setToolSpanCancelled(span); - return; // Both code paths should return here + if (candidatePaths.length > 0) { + const rulesRegistry = this.config.getConditionalRulesRegistry(); + const skillManager = this.config.getSkillManager(); + + // Collect every reminder block produced by this tool call, then + // emit them as a single `` envelope at the end. + // The previous version emitted one envelope per matching rule + // PLUS one for skill activation — a multi-path tool could + // produce N+1 envelopes, diluting the model's attention. One + // wrapper / one append also lets us share the breakout-prevention + // sanitization step (closing-tag scrub) in one place. + const reminderBlocks: string[] = []; + + for (const candidatePath of candidatePaths) { + // Inject conditional rules at most once per session per rule + // file. The registry tracks dedup internally. + const rulesCtx = rulesRegistry?.matchAndConsume(candidatePath); + if (rulesCtx) reminderBlocks.push(rulesCtx); } - if (toolResult.error === undefined) { - let content = toolResult.llmContent; - const contentLength = - typeof content === 'string' ? content.length : undefined; - - // PostToolUse Hook - if (hooksEnabled && messageBus) { - const toolResponse = { - llmContent: content, - returnDisplay: toolResult.returnDisplay, - }; - const permissionMode = this.config.getApprovalMode(); - const postHookResult = await firePostToolUseHook( - messageBus, - toolName, - toolInput, - toolResponse, - toolUseId, - permissionMode, + // Skill activation runs in a single batch over all candidate + // paths so `notifyChangeListeners` (and therefore + // `SkillTool.refreshSkills` / `geminiClient.setTools()`) fires + // exactly once for this tool call, regardless of how many + // paths produced new activations. The await is load-bearing: + // matchAndActivateByPaths only resolves after the listener + // chain settles, so the activation reminder we append below + // never lands in a turn where is still + // stale. + const activatedSkills = + await skillManager?.matchAndActivateByPaths(candidatePaths); + if (activatedSkills && activatedSkills.length > 0) { + // Subagents share the parent's SkillManager but may have a + // restricted toolsList that excludes SkillTool entirely. + // Telling such a context "skill X is now available via the + // Skill tool" is misleading — the subagent can't invoke it + // and would waste a turn trying. Gate the reminder on + // whether the active tool registry actually exposes + // SkillTool to the model. + const hasSkillTool = !!this.toolRegistry.getTool(ToolNames.SKILL); + if (hasSkillTool) { + // Escape skill names defensively: validateSkillName already + // excludes `<>&` for parsed file-based skills, but + // extension skills (extension.skills array) bypass that + // validator. A crafted extension name would otherwise + // close the envelope early. + const names = activatedSkills.map(escapeXml).join(', '); + reminderBlocks.push( + `The following skill(s) are now available via the Skill tool based on the file you just accessed: ${names}. Use them if relevant to the task.`, ); - - // Append additional context from hook if provided - if (postHookResult.additionalContext) { - content = appendAdditionalContext( - content, - postHookResult.additionalContext, - ); - } - - // Check if hook requested to stop execution - if (postHookResult.shouldStop) { - const stopMessage = - postHookResult.stopReason || 'Execution stopped by hook'; - const errorResponse = createErrorResponse( - scheduledCall.request, - new Error(stopMessage), - ToolErrorType.EXECUTION_DENIED, - ); - this.setStatusInternal(callId, 'error', errorResponse); - setToolSpanFailure( - span, - TOOL_FAILURE_KIND_POST_HOOK_STOPPED, - TOOL_SPAN_STATUS_POST_HOOK_STOPPED, - ); - return; - } - } - - // Collect filesystem paths the tool just touched. Different tools - // use different parameter names: `file_path` (read/edit/write), - // `path` (ls, glob), `filePath` (grep, lsp), and `paths` - // (ripGrep array form). Conditional rules and skill activation - // both key off the same path set, so inspect the union — and - // gate the inspection on a tool-name allowlist (see - // FS_PATH_TOOL_NAMES) so MCP / non-FS tools that reuse those - // parameter names with different semantics never enter the - // activation pipeline. - const inputPaths = extractToolFilePaths(toolName, toolInput); - const resultPaths = - isFilesystemPathTool(toolName) && - Array.isArray(toolResult.resultFilePaths) - ? toolResult.resultFilePaths - : []; - const candidatePaths = Array.from( - new Set([ - ...inputPaths.map((p) => unescapePath(p)), - ...resultPaths, - ]), - ); - - if (candidatePaths.length > 0) { - const rulesRegistry = this.config.getConditionalRulesRegistry(); - const skillManager = this.config.getSkillManager(); - - // Collect every reminder block produced by this tool call, then - // emit them as a single `` envelope at the end. - // The previous version emitted one envelope per matching rule - // PLUS one for skill activation — a multi-path tool could - // produce N+1 envelopes, diluting the model's attention. One - // wrapper / one append also lets us share the breakout-prevention - // sanitization step (escapeSystemReminderTags) in one place. - const reminderBlocks: string[] = []; - - for (const candidatePath of candidatePaths) { - // Inject conditional rules at most once per session per rule - // file. The registry tracks dedup internally. - const rulesCtx = rulesRegistry?.matchAndConsume(candidatePath); - if (rulesCtx) reminderBlocks.push(rulesCtx); - } - - // Skill activation runs in a single batch over all candidate - // paths so `notifyChangeListeners` (and therefore - // `SkillTool.refreshSkills` / `geminiClient.setTools()`) fires - // exactly once for this tool call, regardless of how many - // paths produced new activations. The await is load-bearing: - // matchAndActivateByPaths only resolves after the listener - // chain settles, so the activation reminder we append below - // never lands in a turn where is still - // stale. - const activatedSkills = - await skillManager?.matchAndActivateByPaths(candidatePaths); - if (activatedSkills && activatedSkills.length > 0) { - // Subagents share the parent's SkillManager but may have a - // restricted toolsList that excludes SkillTool entirely. - // Telling such a context "skill X is now available via the - // Skill tool" is misleading — the subagent can't invoke it - // and would waste a turn trying. Gate the reminder on - // whether the active tool registry actually exposes - // SkillTool to the model. - const hasSkillTool = !!this.toolRegistry.getTool( - ToolNames.SKILL, - ); - if (hasSkillTool) { - // Escape skill names defensively: validateSkillName already - // excludes `<>&` for parsed file-based skills, but - // extension skills (extension.skills array) bypass that - // validator. A crafted extension name would otherwise - // close the envelope early. - const names = activatedSkills.map(escapeXml).join(', '); - reminderBlocks.push( - `The following skill(s) are now available via the Skill tool based on the file you just accessed: ${names}. Use them if relevant to the task.`, - ); - } - } - - if (reminderBlocks.length > 0) { - // Final tag scrub on the joined body — defense in depth - // against rules whose markdown body contains a - // `` open/close sequence (literal or - // obfuscated with whitespace / zero-width / control - // chars). Full XML escaping would mangle code blocks in - // rule bodies; the shared targeted scrub keeps markdown - // readable while neutralizing envelope-breakout - // attempts. Mirrors the IDE-context scrub via the same - // `escapeSystemReminderTags` helper. - const body = escapeSystemReminderTags( - reminderBlocks.join('\n\n'), - ); - content = appendAdditionalContext( - content, - `\n${body}\n`, - ); - } } + } - const response = convertToFunctionResponse( - toolName, - callId, + if (reminderBlocks.length > 0) { + const body = escapeSystemReminderTags(reminderBlocks.join('\n\n')); + content = appendAdditionalContext( content, + `\n${body}\n`, ); - const successResponse: ToolCallResponseInfo = { - callId, - responseParts: response, - resultDisplay: toolResult.returnDisplay, - error: undefined, - errorType: undefined, - contentLength, - // Propagate modelOverride from skill tools. Use `in` to distinguish - // "skill returned undefined (inherit)" from "non-skill tool (no field)". - ...('modelOverride' in toolResult - ? { modelOverride: toolResult.modelOverride } - : {}), - }; - this.setStatusInternal(callId, 'success', successResponse); - safeSetStatus(span, { code: SpanStatusCode.OK }); - } else { - // It is a failure - // PostToolUseFailure Hook - let errorMessage = toolResult.error.message; - if (hooksEnabled && messageBus) { - const failureHookResult = await safelyFirePostToolUseFailureHook( - messageBus, - toolUseId, - toolName, - toolInput, - toolResult.error.message, - false, - this.config.getApprovalMode(), - ); + } + } - // Append additional context from hook if provided - if (failureHookResult.additionalContext) { - errorMessage += `\n\n${failureHookResult.additionalContext}`; - } - } + const response = convertToFunctionResponse(toolName, callId, content); + const successResponse: ToolCallResponseInfo = { + callId, + responseParts: response, + resultDisplay: toolResult.returnDisplay, + error: undefined, + errorType: undefined, + contentLength, + // Propagate modelOverride from skill tools. Use `in` to distinguish + // "skill returned undefined (inherit)" from "non-skill tool (no field)". + ...('modelOverride' in toolResult + ? { modelOverride: toolResult.modelOverride } + : {}), + }; + this.setStatusInternal(callId, 'success', successResponse); + safeSetStatus(span, { code: SpanStatusCode.OK }); + } else { + // It is a failure + // PostToolUseFailure Hook + let errorMessage = toolResult.error.message; + if (hooksEnabled && messageBus) { + const failureHookResult = await safelyFirePostToolUseFailureHook( + messageBus, + toolUseId, + toolName, + toolInput, + toolResult.error.message, + false, + this.config.getApprovalMode(), + ); - const error = new Error(errorMessage); - const errorResponse = createErrorResponse( - scheduledCall.request, - error, - toolResult.error.type, - ); - this.setStatusInternal(callId, 'error', errorResponse); - setToolSpanFailure( - span, - TOOL_FAILURE_KIND_TOOL_ERROR, - TOOL_SPAN_STATUS_TOOL_ERROR, - ); + // Append additional context from hook if provided + if (failureHookResult.additionalContext) { + errorMessage += `\n\n${failureHookResult.additionalContext}`; } - } catch (executionError: unknown) { - const errorMessage = - executionError instanceof Error - ? executionError.message - : String(executionError); + } - if (signal.aborted) { - // PostToolUseFailure Hook (user interrupt) - if (hooksEnabled && messageBus) { - const failureHookResult = await safelyFirePostToolUseFailureHook( - messageBus, - toolUseId, - toolName, - toolInput, - 'User cancelled tool execution.', - true, - this.config.getApprovalMode(), - ); + const error = new Error(errorMessage); + const errorResponse = createErrorResponse( + scheduledCall.request, + error, + toolResult.error.type, + ); + this.setStatusInternal(callId, 'error', errorResponse); + setToolSpanFailure( + span, + TOOL_FAILURE_KIND_TOOL_ERROR, + TOOL_SPAN_STATUS_TOOL_ERROR, + ); + } + } catch (executionError: unknown) { + const errorMessage = + executionError instanceof Error + ? executionError.message + : String(executionError); + endToolExecutionSpan(execSpan, { + success: false, + error: TOOL_SPAN_STATUS_TOOL_EXCEPTION, + }); - // Append additional context from hook if provided - let cancelMessage = 'User cancelled tool execution.'; - if (failureHookResult.additionalContext) { - cancelMessage += `\n\n${failureHookResult.additionalContext}`; - } - this.setStatusInternal(callId, 'cancelled', cancelMessage); - } else { - this.setStatusInternal( - callId, - 'cancelled', - 'User cancelled tool execution.', - ); - } - setToolSpanCancelled(span); - return; - } else { - // PostToolUseFailure Hook - let exceptionErrorMessage = errorMessage; - if (hooksEnabled && messageBus) { - const failureHookResult = await safelyFirePostToolUseFailureHook( - messageBus, - toolUseId, - toolName, - toolInput, - errorMessage, - false, - this.config.getApprovalMode(), - ); + if (signal.aborted) { + // PostToolUseFailure Hook (user interrupt) + if (hooksEnabled && messageBus) { + const failureHookResult = await safelyFirePostToolUseFailureHook( + messageBus, + toolUseId, + toolName, + toolInput, + 'User cancelled tool execution.', + true, + this.config.getApprovalMode(), + ); - // Append additional context from hook if provided - if (failureHookResult.additionalContext) { - exceptionErrorMessage += `\n\n${failureHookResult.additionalContext}`; - } - } - this.setStatusInternal( - callId, - 'error', - createErrorResponse( - scheduledCall.request, - executionError instanceof Error - ? new Error(exceptionErrorMessage) - : new Error(String(executionError)), - ToolErrorType.UNHANDLED_EXCEPTION, - ), - ); - setToolSpanFailure( - span, - TOOL_FAILURE_KIND_TOOL_EXCEPTION, - TOOL_SPAN_STATUS_TOOL_EXCEPTION, - ); + // Append additional context from hook if provided + let cancelMessage = 'User cancelled tool execution.'; + if (failureHookResult.additionalContext) { + cancelMessage += `\n\n${failureHookResult.additionalContext}`; } + this.setStatusInternal(callId, 'cancelled', cancelMessage); + } else { + this.setStatusInternal( + callId, + 'cancelled', + 'User cancelled tool execution.', + ); } - }, - { autoOkOnSuccess: false }, - ); + setToolSpanCancelled(span); + return; + } else { + // PostToolUseFailure Hook + let exceptionErrorMessage = errorMessage; + if (hooksEnabled && messageBus) { + const failureHookResult = await safelyFirePostToolUseFailureHook( + messageBus, + toolUseId, + toolName, + toolInput, + errorMessage, + false, + this.config.getApprovalMode(), + ); + + // Append additional context from hook if provided + if (failureHookResult.additionalContext) { + exceptionErrorMessage += `\n\n${failureHookResult.additionalContext}`; + } + } + this.setStatusInternal( + callId, + 'error', + createErrorResponse( + scheduledCall.request, + executionError instanceof Error + ? new Error(exceptionErrorMessage) + : new Error(String(executionError)), + ToolErrorType.UNHANDLED_EXCEPTION, + ), + ); + setToolSpanFailure( + span, + TOOL_FAILURE_KIND_TOOL_EXCEPTION, + TOOL_SPAN_STATUS_TOOL_EXCEPTION, + ); + } + } } private async checkAndNotifyCompletion(): Promise { diff --git a/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.test.ts b/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.test.ts index 859c0094e8..58e08c68af 100644 --- a/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.test.ts +++ b/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.test.ts @@ -95,14 +95,21 @@ vi.mock('@opentelemetry/api', async (importOriginal) => { }; }); -vi.mock('../../telemetry/tracer.js', () => { +vi.mock('../../telemetry/tracer.js', () => ({ + API_CALL_FAILED_SPAN_STATUS_MESSAGE: 'API call failed', +})); + +vi.mock('../../telemetry/index.js', () => { function createSpan( name: string, attributes: Record, ) { const record = { name, - attributes, + attributes: { ...attributes } as Record< + string, + string | number | boolean + >, statuses: [] as Array<{ code: number; message?: string }>, ended: false, }; @@ -115,7 +122,12 @@ vi.mock('../../telemetry/tracer.js', () => { } record.statuses.push(status); }, - setAttribute: vi.fn(), + setAttribute(key: string, value: string | number | boolean) { + record.attributes[key] = value; + }, + setAttributes(attrs: Record) { + Object.assign(record.attributes, attrs); + }, end() { record.ended = true; }, @@ -127,79 +139,42 @@ vi.mock('../../telemetry/tracer.js', () => { }; } - function runWithActive(label: string, fn: () => T): T { - const previous = activeOtelContext.current; - activeOtelContext.current = label; - try { - const result = fn(); - if (result instanceof Promise) { - return result.finally(() => { - activeOtelContext.current = previous; - }) as T; - } - activeOtelContext.current = previous; - return result; - } catch (error) { - activeOtelContext.current = previous; - throw error; - } - } - return { - API_CALL_FAILED_SPAN_STATUS_MESSAGE: 'API call failed', - safeSetStatus: ( - span: { setStatus: (status: { code: number; message?: string }) => void }, - status: { code: number; message?: string }, - ) => { - try { - span.setStatus(status); - } catch { - // Match production best-effort telemetry behavior. - } - }, - withSpan: vi.fn( - async ( - name: string, - attributes: Record, - fn: (span: ReturnType) => Promise, + startLLMRequestSpan: vi.fn((model: string, promptId: string) => + createSpan('qwen-code.llm_request', { + model, + prompt_id: promptId, + }), + ), + endLLMRequestSpan: vi.fn( + ( + span: ReturnType, + metadata?: { + success: boolean; + inputTokens?: number; + outputTokens?: number; + durationMs?: number; + error?: string; + }, ) => { - const span = createSpan(name, attributes); - let statusSet = false; - const wrappedSpan = { - ...span, - setStatus(status: { code: number; message?: string }) { - statusSet = true; - return span.setStatus(status); - }, - }; try { - const result = await fn(wrappedSpan); - if (!statusSet) { - span.setStatus({ code: 1 }); + if (metadata) { + if (metadata.success) { + span.setStatus({ code: 1 }); // OK + } else { + span.setStatus({ + code: 2, + message: metadata.error ?? 'unknown error', + }); // ERROR + } } - return result; - } catch (error) { - if (!statusSet) { - span.setStatus({ - code: 2, - message: error instanceof Error ? error.message : String(error), - }); - } - throw error; - } finally { + span.end(); + } catch { + // Match production best-effort behavior. span.end(); } }, ), - startSpanWithContext: vi.fn( - (name: string, attributes: Record) => { - const span = createSpan(name, attributes); - return { - span, - runInContext: (fn: () => T): T => runWithActive(name, fn), - }; - }, - ), }; }); @@ -290,20 +265,20 @@ const createResponse = ( const getStreamSpanRecord = () => { const spanRecord = loggingSpanRecords.find( - (record) => record.name === 'api.generateContentStream', + (record) => record.name === 'qwen-code.llm_request', ); if (!spanRecord) { - throw new Error('api.generateContentStream span was not created'); + throw new Error('qwen-code.llm_request span was not created'); } return spanRecord; }; const getGenerateContentSpanRecord = () => { const spanRecord = loggingSpanRecords.find( - (record) => record.name === 'api.generateContent', + (record) => record.name === 'qwen-code.llm_request', ); if (!spanRecord) { - throw new Error('api.generateContent span was not created'); + throw new Error('qwen-code.llm_request span was not created'); } return spanRecord; }; @@ -461,14 +436,69 @@ describe('LoggingContentGenerator', () => { await generator.generateContent(request, 'prompt-span'); const spanRecord = getGenerateContentSpanRecord(); - expect(spanRecord.attributes).toEqual({ + expect(spanRecord.attributes).toMatchObject({ model: 'test-model', prompt_id: 'prompt-span', + 'llm_request.stream': false, }); expect(spanRecord.statuses).toEqual([{ code: SpanStatusCode.OK }]); expect(spanRecord.ended).toBe(true); }); + it('marks non-stream LLM span with llm_request.stream=false', async () => { + const wrapped = createWrappedGenerator( + vi + .fn() + .mockResolvedValue( + createResponse('resp', 'test-model', [{ text: 'ok' }]), + ), + vi.fn(), + ); + const generator = new LoggingContentGenerator(wrapped, createConfig(), { + model: 'test-model', + authType: AuthType.USE_OPENAI, + enableOpenAILogging: false, + }); + const request = { + model: 'test-model', + contents: 'Hello', + } as unknown as GenerateContentParameters; + + await generator.generateContent(request, 'prompt-stream-attr'); + + const spanRecord = getGenerateContentSpanRecord(); + expect(spanRecord.attributes['llm_request.stream']).toBe(false); + }); + + it('marks streaming LLM span with llm_request.stream=true', async () => { + const streamFn = vi.fn().mockResolvedValue( + (async function* () { + yield createResponse('resp-1', 'test-model', [{ text: 'ok' }]); + })(), + ); + const wrapped = createWrappedGenerator(vi.fn(), streamFn); + const generator = new LoggingContentGenerator(wrapped, createConfig(), { + model: 'test-model', + authType: AuthType.USE_OPENAI, + enableOpenAILogging: false, + }); + const request = { + model: 'test-model', + contents: 'Hello', + } as unknown as GenerateContentParameters; + + const stream = await generator.generateContentStream( + request, + 'prompt-stream-attr', + ); + for await (const _ of stream) { + // consume + } + + const spanRecord = getStreamSpanRecord(); + expect(spanRecord.attributes['llm_request.stream']).toBe(true); + }); + it('preserves non-stream success when response and OpenAI logging fail', async () => { vi.mocked(logApiResponse).mockImplementationOnce(() => { throw new Error('response-log-fail'); @@ -790,7 +820,7 @@ describe('LoggingContentGenerator', () => { }); it('preserves stream success when the OK status update fails', async () => { - loggingSpanNamesWithSetStatusFailure.add('api.generateContentStream'); + loggingSpanNamesWithSetStatusFailure.add('qwen-code.llm_request'); const response = createResponse('resp-status', 'model-stream', [ { text: 'ok' }, ]); @@ -858,7 +888,7 @@ describe('LoggingContentGenerator', () => { // Consume stream to trigger cleanup. } - expect(activeContextDuringWrappedCall).toBe('api.generateContentStream'); + expect(activeContextDuringWrappedCall).toBe('qwen-code.llm_request'); }); it('logs stream setup errors before leaving the stream span context', async () => { @@ -889,7 +919,7 @@ describe('LoggingContentGenerator', () => { ).rejects.toThrow('setup-fail'); expect(logApiError).toHaveBeenCalledTimes(1); - expect(activeContextDuringApiError).toBe('api.generateContentStream'); + expect(activeContextDuringApiError).toBe('qwen-code.llm_request'); expect(spanEndedDuringApiError).toBe(false); const spanRecord = getStreamSpanRecord(); @@ -1002,7 +1032,7 @@ describe('LoggingContentGenerator', () => { }); it('preserves stream errors when the error status update fails', async () => { - loggingSpanNamesWithSetStatusFailure.add('api.generateContentStream'); + loggingSpanNamesWithSetStatusFailure.add('qwen-code.llm_request'); const response1 = createResponse('resp-1', 'model-stream', [ { text: 'partial' }, ]); diff --git a/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.ts b/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.ts index 14ffad8156..c5330bce8d 100644 --- a/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.ts +++ b/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.ts @@ -20,13 +20,7 @@ import { type FinishReason, } from '@google/genai'; import type OpenAI from 'openai'; -import { - SpanStatusCode, - context, - trace, - type Context, - type Span, -} from '@opentelemetry/api'; +import { context, trace, type Context, type Span } from '@opentelemetry/api'; import { ApiRequestEvent, ApiResponseEvent, @@ -56,11 +50,10 @@ import { getErrorType, } from '../../utils/errors.js'; import { - API_CALL_FAILED_SPAN_STATUS_MESSAGE, - safeSetStatus, - startSpanWithContext, - withSpan, -} from '../../telemetry/tracer.js'; + startLLMRequestSpan, + endLLMRequestSpan, +} from '../../telemetry/index.js'; +import { API_CALL_FAILED_SPAN_STATUS_MESSAGE } from '../../telemetry/tracer.js'; const debugLogger = createDebugLogger('LOGGING_CONTENT_GENERATOR'); @@ -210,88 +203,101 @@ export class LoggingContentGenerator implements ContentGenerator { req: GenerateContentParameters, userPromptId: string, ): Promise { - return withSpan( - 'api.generateContent', - { model: req.model, prompt_id: userPromptId }, - async (span) => { - const startTime = Date.now(); - const isInternal = isInternalPromptId(userPromptId); - const session = this.startCaptureSession(); - try { - if (!isInternal) { - this.logApiRequest( - this.toContents(req.contents), - req.model, - userPromptId, - ); - } - const response = await session.wrap(() => - this.wrapped.generateContent(req, userPromptId), + const llmSpan = startLLMRequestSpan(req.model, userPromptId); + try { + llmSpan.setAttribute('llm_request.stream', false); + } catch { + /* best-effort */ + } + // Capture span context so the API call and logging activate it via + // context.with(). Without this, nested OTel spans (HTTP instrumentation, + // log-bridge spans) parent to session root instead of llm_request. + const spanContext = trace.setSpan(context.active(), llmSpan); + + const startTime = Date.now(); + const isInternal = isInternalPromptId(userPromptId); + const session = this.startCaptureSession(); + try { + const response = await context.with(spanContext, async () => { + if (!isInternal) { + this.logApiRequest( + this.toContents(req.contents), + req.model, + userPromptId, ); - const durationMs = Date.now() - startTime; - const responseText = isInternal - ? undefined - : this.extractResponseText(response); - this.safelyLogApiResponse( - response.responseId ?? '', - durationMs, - response.modelVersion || req.model, + } + const result = await session.wrap(() => + this.wrapped.generateContent(req, userPromptId), + ); + const durationMs = Date.now() - startTime; + const responseText = isInternal + ? undefined + : this.extractResponseText(result); + this.safelyLogApiResponse( + result.responseId ?? '', + durationMs, + result.modelVersion || req.model, + userPromptId, + result.usageMetadata, + responseText, + ); + try { + await this.safelyLogOpenAIInteraction( + await session.resolve(req), + result, + undefined, userPromptId, - response.usageMetadata, - responseText, ); - try { - await this.safelyLogOpenAIInteraction( - await session.resolve(req), - response, - undefined, - userPromptId, - ); - } catch (loggingError) { - debugLogger.warn('Failed to log OpenAI interaction:', loggingError); - } - return response; - } catch (error) { - const durationMs = Date.now() - startTime; - this.safelyLogApiError( - '', - durationMs, + } catch (loggingError) { + debugLogger.warn('Failed to log OpenAI interaction:', loggingError); + } + return result; + }); + endLLMRequestSpan(llmSpan, { + success: true, + inputTokens: response.usageMetadata?.promptTokenCount, + outputTokens: response.usageMetadata?.candidatesTokenCount, + durationMs: Date.now() - startTime, + }); + return response; + } catch (error) { + const durationMs = Date.now() - startTime; + await context.with(spanContext, async () => { + this.safelyLogApiError('', durationMs, error, req.model, userPromptId); + try { + await this.safelyLogOpenAIInteraction( + await session.resolve(req), + undefined, error, - req.model, userPromptId, ); - try { - await this.safelyLogOpenAIInteraction( - await session.resolve(req), - undefined, - error, - userPromptId, - ); - } catch (loggingError) { - debugLogger.warn('Failed to log OpenAI interaction:', loggingError); - } - safeSetStatus(span, { - code: SpanStatusCode.ERROR, - message: API_CALL_FAILED_SPAN_STATUS_MESSAGE, - }); - throw error; + } catch (loggingError) { + debugLogger.warn('Failed to log OpenAI interaction:', loggingError); } - }, - ); + }); + endLLMRequestSpan(llmSpan, { + success: false, + durationMs, + error: API_CALL_FAILED_SPAN_STATUS_MESSAGE, + }); + throw error; + } } async generateContentStream( req: GenerateContentParameters, userPromptId: string, ): Promise> { - const { span, runInContext } = startSpanWithContext( - 'api.generateContentStream', - { model: req.model, prompt_id: userPromptId }, - ); + const llmSpan = startLLMRequestSpan(req.model, userPromptId); + try { + llmSpan.setAttribute('llm_request.stream', true); + } catch { + /* best-effort */ + } // Capture the span context so the stream wrapper can activate it // during iteration — not just during generator creation. - const spanContext = trace.setSpan(context.active(), span); + const spanContext = trace.setSpan(context.active(), llmSpan); const startTime = Date.now(); const isInternal = isInternalPromptId(userPromptId); @@ -299,7 +305,7 @@ export class LoggingContentGenerator implements ContentGenerator { let stream: AsyncGenerator; try { - stream = await runInContext(async () => { + stream = await context.with(spanContext, async () => { if (!isInternal) { this.logApiRequest( this.toContents(req.contents), @@ -312,19 +318,15 @@ export class LoggingContentGenerator implements ContentGenerator { ); }); } catch (error) { - safeSetStatus(span, { - code: SpanStatusCode.ERROR, - message: API_CALL_FAILED_SPAN_STATUS_MESSAGE, - }); const durationMs = Date.now() - startTime; - runInContext(() => + context.with(spanContext, () => this.safelyLogApiError('', durationMs, error, req.model, userPromptId), ); - try { - span.end(); - } catch { - // OTel errors must not mask the original API error - } + endLLMRequestSpan(llmSpan, { + success: false, + durationMs, + error: API_CALL_FAILED_SPAN_STATUS_MESSAGE, + }); try { await this.safelyLogOpenAIInteraction( await session.resolve(req), @@ -347,14 +349,14 @@ export class LoggingContentGenerator implements ContentGenerator { } } - return runInContext(() => + return context.with(spanContext, () => this.loggingStreamWrapper( stream, startTime, userPromptId, req.model, resolvedRequest, - span, + llmSpan, spanContext, ), ); @@ -402,8 +404,12 @@ export class LoggingContentGenerator implements ContentGenerator { let firstResponseId = ''; let firstModelVersion = ''; let lastUsageMetadata: GenerateContentResponseUsageMetadata | undefined; - let terminalStatusAttempted = false; - let spanEnded = false; + let errorOccurred = false; + // Tracks whether the idle timeout fired and ended the span. If so, + // a resumed-after-timeout consumer must not call endLLMRequestSpan + // again (the helper would no-op, but more importantly we skip the + // redundant work and avoid resetting the timer further). + let spanEndedByTimeout = false; // Helper to run code within the span context during iteration. // This ensures debug log lines emitted during stream processing @@ -419,20 +425,20 @@ export class LoggingContentGenerator implements ContentGenerator { let spanEndTimeout: ReturnType | undefined; const resetSpanTimeout = span ? () => { - if (spanEnded) return; + if (spanEndedByTimeout) return; if (spanEndTimeout !== undefined) clearTimeout(spanEndTimeout); spanEndTimeout = setTimeout(() => { try { - safeSetStatus(span, { - code: SpanStatusCode.ERROR, - message: 'Stream span timed out (idle)', - }); - spanEnded = true; span.setAttribute('stream.timed_out', true); - span.end(); } catch { // OTel errors must not interrupt the consumer. } + endLLMRequestSpan(span, { + success: false, + durationMs: Date.now() - startTime, + error: 'Stream span timed out (idle)', + }); + spanEndedByTimeout = true; }, STREAM_IDLE_TIMEOUT_MS); spanEndTimeout.unref(); } @@ -485,11 +491,8 @@ export class LoggingContentGenerator implements ContentGenerator { userPromptId, ), ); - terminalStatusAttempted = true; - if (span) { - safeSetStatus(span, { code: SpanStatusCode.OK }); - } } catch (error) { + errorOccurred = true; const durationMs = Date.now() - startTime; runInSpan(() => this.safelyLogApiError( @@ -508,29 +511,25 @@ export class LoggingContentGenerator implements ContentGenerator { userPromptId, ), ); - terminalStatusAttempted = true; - if (span) { - safeSetStatus(span, { - code: SpanStatusCode.ERROR, - message: API_CALL_FAILED_SPAN_STATUS_MESSAGE, - }); - } throw error; } finally { if (spanEndTimeout !== undefined) { clearTimeout(spanEndTimeout); } - if (!spanEnded) { - if (!terminalStatusAttempted) { - if (span) { - safeSetStatus(span, { code: SpanStatusCode.OK }); - } - } - try { - span?.end(); - } catch { - // OTel errors must not mask the original API error - } + // If the idle timeout already ended the span, skip the redundant + // endLLMRequestSpan call. The helper itself would no-op due to its + // own ended guard, but we want to avoid pretending the final token + // counts were recorded — they weren't, the span is the timeout one. + if (span && !spanEndedByTimeout) { + endLLMRequestSpan(span, { + success: !errorOccurred, + inputTokens: lastUsageMetadata?.promptTokenCount, + outputTokens: lastUsageMetadata?.candidatesTokenCount, + durationMs: Date.now() - startTime, + error: errorOccurred + ? API_CALL_FAILED_SPAN_STATUS_MESSAGE + : undefined, + }); } } } diff --git a/packages/core/src/telemetry/index.ts b/packages/core/src/telemetry/index.ts index 421338b319..a5a3385ed7 100644 --- a/packages/core/src/telemetry/index.ts +++ b/packages/core/src/telemetry/index.ts @@ -143,6 +143,7 @@ export { endLLMRequestSpan, startToolSpan, endToolSpan, + runInToolSpanContext, startToolExecutionSpan, endToolExecutionSpan, clearSessionTracingForTesting, diff --git a/packages/core/src/telemetry/session-tracing.test.ts b/packages/core/src/telemetry/session-tracing.test.ts index ceb0fdaba5..b2b10f4972 100644 --- a/packages/core/src/telemetry/session-tracing.test.ts +++ b/packages/core/src/telemetry/session-tracing.test.ts @@ -95,6 +95,7 @@ vi.mock('@opentelemetry/api', async () => { }, context: { active: () => ({}), + with: (_ctx: unknown, fn: () => T): T => fn(), }, }; }); @@ -107,6 +108,7 @@ import { endLLMRequestSpan, startToolSpan, endToolSpan, + runInToolSpanContext, startToolExecutionSpan, endToolExecutionSpan, clearSessionTracingForTesting, @@ -348,11 +350,11 @@ describe('session-tracing', () => { expect(mockSpans[0]!.statuses[0]!.message).toBe('command failed'); }); - it('defaults to OK when success is undefined', () => { + it('does not set status when no metadata is passed', () => { const span = startToolSpan('Read'); endToolSpan(span); - expect(mockSpans[0]!.statuses[0]!.code).toBe(SpanStatusCode.OK); + expect(mockSpans[0]!.statuses).toHaveLength(0); }); it('concurrent tool spans are isolated', () => { @@ -388,12 +390,17 @@ describe('session-tracing', () => { }); describe('tool execution sub-spans', () => { - it('creates a tool execution span as child of tool span', () => { + it('creates a tool execution span as child of tool span via runInToolSpanContext', () => { const toolSpan = startToolSpan('Bash'); - const execSpan = startToolExecutionSpan(toolSpan); + + let execSpan!: ReturnType; + runInToolSpanContext(toolSpan, () => { + execSpan = startToolExecutionSpan(); + }); expect(mockSpans).toHaveLength(2); expect(mockSpans[1]!.name).toBe('qwen-code.tool.execution'); + expect(mockSpans[1]!.parentContext).toBeDefined(); endToolExecutionSpan(execSpan, { success: true }); endToolSpan(toolSpan, { success: true }); @@ -403,11 +410,73 @@ describe('session-tracing', () => { it('returns NOOP span when SDK is not initialized', () => { mockState.sdkInitialized = false; - const toolSpan = startToolSpan('Bash'); - const execSpan = startToolExecutionSpan(toolSpan); + startToolSpan('Bash'); + const execSpan = startToolExecutionSpan(); expect(execSpan.spanContext().traceId).toBe('0'.repeat(32)); }); + + it('falls back gracefully when no tool span is active', () => { + const execSpan = startToolExecutionSpan(); + + expect(mockSpans).toHaveLength(1); + expect(mockSpans[0]!.name).toBe('qwen-code.tool.execution'); + + endToolExecutionSpan(execSpan, { success: true }); + expect(mockSpans[0]!.ended).toBe(true); + }); + }); + + describe('toolContext ALS lifecycle', () => { + it('runInToolSpanContext scopes toolContext via run(), not enterWith', () => { + const toolSpan = startToolSpan('Bash'); + + let execSpanInsideContext: ReturnType; + + runInToolSpanContext(toolSpan, () => { + execSpanInsideContext = startToolExecutionSpan(); + }); + const execSpanOutsideContext = startToolExecutionSpan(); + + // Inside context: should have parent + const insideRecord = mockSpans.find( + (s) => + s.name === 'qwen-code.tool.execution' && + (s.parentContext as Record)?.['__parentSpan'], + ); + expect(insideRecord).toBeDefined(); + + // Outside context: should NOT have tool parent + const outsideRecord = mockSpans.filter( + (s) => s.name === 'qwen-code.tool.execution', + ); + expect(outsideRecord).toHaveLength(2); + const noParent = outsideRecord.find( + (s) => !(s.parentContext as Record)?.['__parentSpan'], + ); + expect(noParent).toBeDefined(); + + endToolExecutionSpan(execSpanInsideContext!, { success: true }); + endToolExecutionSpan(execSpanOutsideContext!, { success: true }); + endToolSpan(toolSpan, { success: true }); + }); + + it('endToolSpan without metadata preserves pre-set status', () => { + const toolSpan = startToolSpan('Bash'); + // Simulate setToolSpanFailure calling setStatus directly + ( + toolSpan as unknown as MockSpanRecord & { + setStatus: (s: { code: number; message?: string }) => void; + } + ).setStatus({ code: SpanStatusCode.ERROR, message: 'hook blocked' }); + + endToolSpan(toolSpan); + + // endToolSpan should NOT have added another status + const toolRecord = mockSpans.find((s) => s.name === 'qwen-code.tool'); + expect(toolRecord!.statuses).toHaveLength(1); + expect(toolRecord!.statuses[0]!.code).toBe(SpanStatusCode.ERROR); + }); }); describe('clearSessionTracingForTesting', () => { diff --git a/packages/core/src/telemetry/session-tracing.ts b/packages/core/src/telemetry/session-tracing.ts index 513ed1b1bc..08fef40d08 100644 --- a/packages/core/src/telemetry/session-tracing.ts +++ b/packages/core/src/telemetry/session-tracing.ts @@ -22,6 +22,10 @@ import { SPAN_TOOL_EXECUTION, } from './constants.js'; import { isTelemetrySdkInitialized } from './sdk.js'; +import { getSessionContext } from './session-context.js'; +import { createDebugLogger } from '../utils/debugLogger.js'; + +const debugLogger = createDebugLogger('SESSION_TRACING'); type InteractionStatus = 'ok' | 'error' | 'cancelled'; @@ -53,7 +57,13 @@ interface SpanContext { startTime: number; attributes: Record; ended?: boolean; - type: 'interaction' | 'llm_request' | 'tool' | 'tool.execution'; + type: + | 'interaction' + | 'llm_request' + | 'tool' + | 'tool.execution' + | 'tool.blocked_on_user' + | 'hook'; } const NOOP_SPAN = trace.wrapSpanContext({ @@ -63,6 +73,7 @@ const NOOP_SPAN = trace.wrapSpanContext({ }); const interactionContext = new AsyncLocalStorage(); +const toolContext = new AsyncLocalStorage(); const activeSpans = new Map>(); const strongSpans = new Map(); @@ -183,9 +194,13 @@ export function startLLMRequestSpan(model: string, promptId: string): Span { } const parentCtx = interactionContext.getStore(); + // Fall back to session root context (deterministic traceId from sessionId) + // for side-query LLM calls (auto-title, recap, etc.) that run outside an + // interaction. Without this, those spans start a fresh trace and lose + // correlation with the session. const ctx = parentCtx ? trace.setSpan(otelContext.active(), parentCtx.span) - : otelContext.active(); + : (getSessionContext() ?? otelContext.active()); const attributes: Attributes = { 'qwen-code.model': model, @@ -222,30 +237,43 @@ export function endLLMRequestSpan( spanCtx.ended = true; - const duration = metadata?.durationMs ?? Date.now() - spanCtx.startTime; - const endAttributes: Attributes = { duration_ms: duration }; + try { + const duration = metadata?.durationMs ?? Date.now() - spanCtx.startTime; + const endAttributes: Attributes = { duration_ms: duration }; - if (metadata) { - if (metadata.inputTokens !== undefined) - endAttributes['input_tokens'] = metadata.inputTokens; - if (metadata.outputTokens !== undefined) - endAttributes['output_tokens'] = metadata.outputTokens; - endAttributes['success'] = metadata.success; - if (metadata.error !== undefined) endAttributes['error'] = metadata.error; - } + if (metadata) { + if (metadata.inputTokens !== undefined) + endAttributes['input_tokens'] = metadata.inputTokens; + if (metadata.outputTokens !== undefined) + endAttributes['output_tokens'] = metadata.outputTokens; + endAttributes['success'] = metadata.success; + if (metadata.error !== undefined) endAttributes['error'] = metadata.error; + } - span.setAttributes(endAttributes); + span.setAttributes(endAttributes); - if (metadata === undefined || metadata.success) { - span.setStatus({ code: SpanStatusCode.OK }); - } else { - span.setStatus({ - code: SpanStatusCode.ERROR, - message: metadata.error ?? 'unknown error', - }); + if (metadata === undefined || metadata.success) { + span.setStatus({ code: SpanStatusCode.OK }); + } else { + span.setStatus({ + code: SpanStatusCode.ERROR, + message: metadata.error ?? 'unknown error', + }); + } + } catch (error) { + debugLogger.warn( + `Failed to update LLM request span attributes/status: ${error instanceof Error ? error.message : String(error)}`, + ); + } + // span.end() must run even if attribute/status updates threw, + // otherwise the span leaks (never exported, never cleared from activeSpans). + try { + span.end(); + } catch (error) { + debugLogger.warn( + `Failed to end LLM request span: ${error instanceof Error ? error.message : String(error)}`, + ); } - - span.end(); activeSpans.delete(spanId); strongSpans.delete(spanId); } @@ -261,9 +289,10 @@ export function startToolSpan( } const parentCtx = interactionContext.getStore(); + // Same session-root fallback as startLLMRequestSpan. const ctx = parentCtx ? trace.setSpan(otelContext.active(), parentCtx.span) - : otelContext.active(); + : (getSessionContext() ?? otelContext.active()); const attributes: Attributes = { 'tool.name': toolName, @@ -289,6 +318,29 @@ export function startToolSpan( return span; } +/** + * Runs a callback within the tool span's AsyncLocalStorage context AND + * OpenTelemetry context. Use this instead of enterWith() to scope the + * context to a single async call tree — safe for concurrent tool calls. + * + * Setting the OTel context ensures any nested OTel spans/logs emitted + * during the callback (HTTP instrumentation, hooks, log-bridge spans) + * inherit the tool span as parent. + */ +export function runInToolSpanContext(span: Span, fn: () => T): T { + const spanId = getSpanId(span); + const spanCtx = activeSpans.get(spanId)?.deref(); + if (!spanCtx) return fn(); + const otelCtxWithSpan = trace.setSpan(otelContext.active(), span); + return toolContext.run(spanCtx, () => otelContext.with(otelCtxWithSpan, fn)); +} + +/** + * When metadata is omitted, span status is NOT set — callers on failure paths + * must pre-set status via setToolSpanFailure/setToolSpanCancelled before calling + * this. This asymmetry with endLLMRequestSpan (which defaults to OK) is intentional: + * tool spans have multiple failure modes that set status before endToolSpan runs. + */ export function endToolSpan(span: Span, metadata?: ToolSpanMetadata): void { const spanId = getSpanId(span); const spanCtx = activeSpans.get(spanId)?.deref(); @@ -296,39 +348,61 @@ export function endToolSpan(span: Span, metadata?: ToolSpanMetadata): void { spanCtx.ended = true; - const duration = Date.now() - spanCtx.startTime; - const endAttributes: Attributes = { duration_ms: duration }; + try { + const duration = Date.now() - spanCtx.startTime; + const endAttributes: Attributes = { duration_ms: duration }; - if (metadata) { - if (metadata.success !== undefined) - endAttributes['success'] = metadata.success; - if (metadata.error !== undefined) endAttributes['error'] = metadata.error; - } + if (metadata) { + if (metadata.success !== undefined) + endAttributes['success'] = metadata.success; + if (metadata.error !== undefined) endAttributes['error'] = metadata.error; + } - spanCtx.span.setAttributes(endAttributes); + spanCtx.span.setAttributes(endAttributes); - if (metadata?.success !== false) { - spanCtx.span.setStatus({ code: SpanStatusCode.OK }); - } else { - spanCtx.span.setStatus({ - code: SpanStatusCode.ERROR, - message: metadata?.error ?? 'tool error', - }); + if (metadata) { + if (metadata.success !== false) { + spanCtx.span.setStatus({ code: SpanStatusCode.OK }); + } else { + spanCtx.span.setStatus({ + code: SpanStatusCode.ERROR, + message: metadata.error ?? 'tool error', + }); + } + } + } catch (error) { + debugLogger.warn( + `Failed to update tool span attributes/status: ${error instanceof Error ? error.message : String(error)}`, + ); + } + // span.end() must run even if attribute/status updates threw. + try { + spanCtx.span.end(); + } catch (error) { + debugLogger.warn( + `Failed to end tool span: ${error instanceof Error ? error.message : String(error)}`, + ); } - - spanCtx.span.end(); activeSpans.delete(spanId); strongSpans.delete(spanId); } // --- Tool Execution Sub-Spans --- -export function startToolExecutionSpan(parentToolSpan: Span): Span { +export function startToolExecutionSpan(): Span { if (!isTelemetrySdkInitialized()) { return NOOP_SPAN; } - const ctx = trace.setSpan(otelContext.active(), parentToolSpan); + const parentCtx = toolContext.getStore(); + if (!parentCtx) { + debugLogger.warn( + 'startToolExecutionSpan called outside runInToolSpanContext — span will not be parented to tool span', + ); + } + const ctx = parentCtx + ? trace.setSpan(otelContext.active(), parentCtx.span) + : (getSessionContext() ?? otelContext.active()); const span = getTracer().startSpan( SPAN_TOOL_EXECUTION, @@ -362,27 +436,39 @@ export function endToolExecutionSpan( spanCtx.ended = true; - const duration = Date.now() - spanCtx.startTime; - const endAttributes: Attributes = { duration_ms: duration }; + try { + const duration = Date.now() - spanCtx.startTime; + const endAttributes: Attributes = { duration_ms: duration }; - if (metadata) { - if (metadata.success !== undefined) - endAttributes['success'] = metadata.success; - if (metadata.error !== undefined) endAttributes['error'] = metadata.error; - } + if (metadata) { + if (metadata.success !== undefined) + endAttributes['success'] = metadata.success; + if (metadata.error !== undefined) endAttributes['error'] = metadata.error; + } - spanCtx.span.setAttributes(endAttributes); + spanCtx.span.setAttributes(endAttributes); - if (metadata?.success !== false) { - spanCtx.span.setStatus({ code: SpanStatusCode.OK }); - } else { - spanCtx.span.setStatus({ - code: SpanStatusCode.ERROR, - message: metadata?.error ?? 'tool execution error', - }); + if (metadata?.success !== false) { + spanCtx.span.setStatus({ code: SpanStatusCode.OK }); + } else { + spanCtx.span.setStatus({ + code: SpanStatusCode.ERROR, + message: metadata?.error ?? 'tool execution error', + }); + } + } catch (error) { + debugLogger.warn( + `Failed to update tool execution span attributes/status: ${error instanceof Error ? error.message : String(error)}`, + ); + } + // span.end() must run even if attribute/status updates threw. + try { + spanCtx.span.end(); + } catch (error) { + debugLogger.warn( + `Failed to end tool execution span: ${error instanceof Error ? error.message : String(error)}`, + ); } - - spanCtx.span.end(); activeSpans.delete(spanId); strongSpans.delete(spanId); } @@ -393,6 +479,7 @@ export function clearSessionTracingForTesting(): void { activeSpans.clear(); strongSpans.clear(); interactionContext.enterWith(undefined); + toolContext.enterWith(undefined); interactionSequence = 0; lastInteractionCtx = undefined; }