Skip to content

Commit 609e32b

Browse files
committed
feat(telemetry): unify span creation paths for hierarchical trace tree (#3731 P3 Phase 1)
Replace disconnected withSpan/startSpanWithContext calls in runtime with session-tracing typed helpers so LLM and tool spans become children of the interaction span instead of siblings under the session root. - Add toolContext ALS with runInToolSpanContext() for concurrent-safe tool span scoping (uses AsyncLocalStorage.run, not enterWith) - Wire startLLMRequestSpan/endLLMRequestSpan in loggingContentGenerator for both streaming and non-streaming paths - Wire startToolSpan/endToolSpan + startToolExecutionSpan/endToolExecutionSpan in coreToolScheduler with proper try/finally lifecycle - Remove redundant withSpan('client.generateContent') wrapper from client.ts - Fix endToolSpan to not override pre-set status when metadata is omitted - Change startToolExecutionSpan to read parent from toolContext ALS - Update tests for new span creation APIs and remove dead test infrastructure
1 parent d343e2c commit 609e32b

9 files changed

Lines changed: 1152 additions & 809 deletions

File tree

docs/design/workflow-tracing-gaps.md

Lines changed: 376 additions & 0 deletions
Large diffs are not rendered by default.

packages/core/src/core/client.test.ts

Lines changed: 2 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ import {
1515
} from 'vitest';
1616

1717
import type { Content, GenerateContentResponse, Part } from '@google/genai';
18-
import { SpanStatusCode } from '@opentelemetry/api';
1918
import { GeminiClient, SendMessageType } from './client.js';
2019
import { findCompressSplitPoint } from '../services/chatCompressionService.js';
2120
import {
@@ -161,29 +160,9 @@ const mockUiTelemetryService = vi.hoisted(() => ({
161160
reset: vi.fn(),
162161
addEvent: vi.fn(),
163162
}));
164-
const clientSpanCalls = vi.hoisted(
165-
(): Array<{
166-
name: string;
167-
attributes: Record<string, string | number | boolean>;
168-
statuses: Array<{ code: number; message?: string }>;
169-
}> => [],
170-
);
171-
const mockWithSpan = vi.hoisted(() => vi.fn());
172-
173163
vi.mock('../telemetry/tracer.js', () => ({
174164
API_CALL_ABORTED_SPAN_STATUS_MESSAGE: 'API call aborted',
175165
API_CALL_FAILED_SPAN_STATUS_MESSAGE: 'API call failed',
176-
safeSetStatus: (
177-
span: { setStatus: (status: { code: number; message?: string }) => void },
178-
status: { code: number; message?: string },
179-
) => {
180-
try {
181-
span.setStatus(status);
182-
} catch {
183-
// Match production best-effort telemetry behavior.
184-
}
185-
},
186-
withSpan: mockWithSpan,
187166
}));
188167

189168
vi.mock('../telemetry/index.js', async (importOriginal) => {
@@ -329,32 +308,6 @@ describe('Gemini Client (client.ts)', () => {
329308
};
330309
beforeEach(async () => {
331310
vi.resetAllMocks();
332-
clientSpanCalls.length = 0;
333-
mockWithSpan.mockImplementation(
334-
async (
335-
name: string,
336-
attributes: Record<string, string | number | boolean>,
337-
fn: (span: {
338-
setStatus: ReturnType<typeof vi.fn>;
339-
setAttribute: ReturnType<typeof vi.fn>;
340-
end: ReturnType<typeof vi.fn>;
341-
}) => Promise<unknown>,
342-
) => {
343-
const spanCall = {
344-
name,
345-
attributes,
346-
statuses: [] as Array<{ code: number; message?: string }>,
347-
};
348-
clientSpanCalls.push(spanCall);
349-
return fn({
350-
setStatus: vi.fn((status: { code: number; message?: string }) => {
351-
spanCall.statuses.push(status);
352-
}),
353-
setAttribute: vi.fn(),
354-
end: vi.fn(),
355-
});
356-
},
357-
);
358311
vi.mocked(uiTelemetryService.setLastPromptTokenCount).mockClear();
359312

360313
// Default: createContentGenerator rejects (simulates test env without auth).
@@ -3250,15 +3203,6 @@ Other open files:
32503203
}),
32513204
'btw-prompt-id',
32523205
);
3253-
expect(clientSpanCalls.at(-1)).toEqual(
3254-
expect.objectContaining({
3255-
name: 'client.generateContent',
3256-
attributes: {
3257-
model: DEFAULT_QWEN_FLASH_MODEL,
3258-
prompt_id: 'btw-prompt-id',
3259-
},
3260-
}),
3261-
);
32623206
});
32633207

32643208
it('should prefer an explicit prompt id override over the current context', async () => {
@@ -3286,15 +3230,6 @@ Other open files:
32863230
}),
32873231
'override-prompt-id',
32883232
);
3289-
expect(clientSpanCalls.at(-1)).toEqual(
3290-
expect.objectContaining({
3291-
name: 'client.generateContent',
3292-
attributes: {
3293-
model: DEFAULT_QWEN_FLASH_MODEL,
3294-
prompt_id: 'override-prompt-id',
3295-
},
3296-
}),
3297-
);
32983233
});
32993234

33003235
it('should use config system prompt override when provided', async () => {
@@ -3398,7 +3333,7 @@ Other open files:
33983333
);
33993334
});
34003335

3401-
it('sets a generic span status when content generation fails', async () => {
3336+
it('propagates error when content generation fails', async () => {
34023337
const contents = [{ role: 'user', parts: [{ text: 'hello' }] }];
34033338
const abortSignal = new AbortController().signal;
34043339
mockGenerateContentFn.mockRejectedValueOnce(
@@ -3413,15 +3348,9 @@ Other open files:
34133348
DEFAULT_QWEN_FLASH_MODEL,
34143349
),
34153350
).rejects.toThrow('raw upstream 500 with sensitive details');
3416-
3417-
const spanCall = clientSpanCalls.at(-1);
3418-
expect(spanCall?.statuses).toEqual([
3419-
{ code: SpanStatusCode.ERROR, message: 'API call failed' },
3420-
]);
3421-
expect(JSON.stringify(spanCall?.statuses)).not.toContain('raw upstream');
34223351
});
34233352

3424-
it('sets a generic aborted span status when content generation is aborted', async () => {
3353+
it('propagates error when content generation is aborted', async () => {
34253354
const contents = [{ role: 'user', parts: [{ text: 'hello' }] }];
34263355
const abortController = new AbortController();
34273356
abortController.abort();
@@ -3437,12 +3366,6 @@ Other open files:
34373366
DEFAULT_QWEN_FLASH_MODEL,
34383367
),
34393368
).rejects.toThrow('raw abort reason with sensitive details');
3440-
3441-
const spanCall = clientSpanCalls.at(-1);
3442-
expect(spanCall?.statuses).toEqual([
3443-
{ code: SpanStatusCode.ERROR, message: 'API call aborted' },
3444-
]);
3445-
expect(JSON.stringify(spanCall?.statuses)).not.toContain('raw abort');
34463369
});
34473370

34483371
// Note: there is currently no "fallback mode" model routing; the model used

packages/core/src/core/client.ts

Lines changed: 62 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ import type {
1212
PartListUnion,
1313
Tool,
1414
} from '@google/genai';
15-
import { SpanStatusCode } from '@opentelemetry/api';
1615

1716
// Config
1817
import { ApprovalMode, type Config } from '../config/config.js';
@@ -101,12 +100,6 @@ import { createHookOutput } from '../hooks/types.js';
101100
import { ideContextStore } from '../ide/ideContext.js';
102101
import { type File, type IdeContext } from '../ide/types.js';
103102
import type { StopHookOutput } from '../hooks/types.js';
104-
import {
105-
API_CALL_ABORTED_SPAN_STATUS_MESSAGE,
106-
API_CALL_FAILED_SPAN_STATUS_MESSAGE,
107-
safeSetStatus,
108-
withSpan,
109-
} from '../telemetry/tracer.js';
110103

111104
const MAX_TURNS = 100;
112105

@@ -1493,89 +1486,71 @@ export class GeminiClient {
14931486
const promptId =
14941487
promptIdOverride ?? promptIdContext.getStore() ?? this.lastPromptId!;
14951488

1496-
return withSpan(
1497-
'client.generateContent',
1498-
{ model, prompt_id: promptId },
1499-
async (span) => {
1500-
let currentAttemptModel: string = model;
1489+
let currentAttemptModel: string = model;
15011490

1502-
try {
1503-
const userMemory = this.config.getUserMemory();
1504-
const finalSystemInstruction = generationConfig.systemInstruction
1505-
? getCustomSystemPrompt(
1506-
generationConfig.systemInstruction,
1507-
userMemory,
1508-
)
1509-
: this.getMainSessionSystemInstruction();
1510-
1511-
const requestConfig: GenerateContentConfig = {
1512-
abortSignal,
1513-
...generationConfig,
1514-
systemInstruction: finalSystemInstruction,
1515-
};
1516-
1517-
// When the requested model differs from the main model (e.g. fast model
1518-
// side queries for session recap / title / summary), resolve the target
1519-
// model's own ContentGeneratorConfig so that per-model settings like
1520-
// extra_body, samplingParams, and reasoning are not inherited from the
1521-
// main model's config. The retry authType is resolved alongside so that
1522-
// provider-specific checks (e.g. QWEN_OAUTH quota detection) reference
1523-
// the target model's provider.
1524-
const { contentGenerator, retryAuthType } = await this.config
1525-
.getBaseLlmClient()
1526-
.resolveForModel(model);
1527-
1528-
const apiCall = () => {
1529-
currentAttemptModel = model;
1530-
1531-
return contentGenerator.generateContent(
1532-
{
1533-
model,
1534-
config: requestConfig,
1535-
contents,
1536-
},
1537-
promptId,
1538-
);
1539-
};
1540-
const result = await retryWithBackoff(apiCall, {
1541-
authType: retryAuthType,
1542-
persistentMode: isUnattendedMode(),
1543-
signal: abortSignal,
1544-
heartbeatFn: (info) => {
1545-
process.stderr.write(
1546-
`[qwen-code] Waiting for API capacity... attempt ${info.attempt}, retry in ${Math.ceil(info.remainingMs / 1000)}s\n`,
1547-
);
1548-
},
1549-
});
1550-
return result;
1551-
} catch (error: unknown) {
1552-
if (abortSignal.aborted) {
1553-
safeSetStatus(span, {
1554-
code: SpanStatusCode.ERROR,
1555-
message: API_CALL_ABORTED_SPAN_STATUS_MESSAGE,
1556-
});
1557-
throw error;
1558-
}
1491+
try {
1492+
const userMemory = this.config.getUserMemory();
1493+
const finalSystemInstruction = generationConfig.systemInstruction
1494+
? getCustomSystemPrompt(generationConfig.systemInstruction, userMemory)
1495+
: this.getMainSessionSystemInstruction();
1496+
1497+
const requestConfig: GenerateContentConfig = {
1498+
abortSignal,
1499+
...generationConfig,
1500+
systemInstruction: finalSystemInstruction,
1501+
};
15591502

1560-
safeSetStatus(span, {
1561-
code: SpanStatusCode.ERROR,
1562-
message: API_CALL_FAILED_SPAN_STATUS_MESSAGE,
1563-
});
1564-
await reportError(
1565-
error,
1566-
`Error generating content via API with model ${currentAttemptModel}.`,
1567-
{
1568-
requestContents: contents,
1569-
requestConfig: generationConfig,
1570-
},
1571-
'generateContent-api',
1572-
);
1573-
throw new Error(
1574-
`Failed to generate content with model ${currentAttemptModel}: ${getErrorMessage(error)}`,
1503+
// When the requested model differs from the main model (e.g. fast model
1504+
// side queries for session recap / title / summary), resolve the target
1505+
// model's own ContentGeneratorConfig so that per-model settings like
1506+
// extra_body, samplingParams, and reasoning are not inherited from the
1507+
// main model's config. The retry authType is resolved alongside so that
1508+
// provider-specific checks (e.g. QWEN_OAUTH quota detection) reference
1509+
// the target model's provider.
1510+
const { contentGenerator, retryAuthType } = await this.config
1511+
.getBaseLlmClient()
1512+
.resolveForModel(model);
1513+
1514+
const apiCall = () => {
1515+
currentAttemptModel = model;
1516+
1517+
return contentGenerator.generateContent(
1518+
{
1519+
model,
1520+
config: requestConfig,
1521+
contents,
1522+
},
1523+
promptId,
1524+
);
1525+
};
1526+
const result = await retryWithBackoff(apiCall, {
1527+
authType: retryAuthType,
1528+
persistentMode: isUnattendedMode(),
1529+
signal: abortSignal,
1530+
heartbeatFn: (info) => {
1531+
process.stderr.write(
1532+
`[qwen-code] Waiting for API capacity... attempt ${info.attempt}, retry in ${Math.ceil(info.remainingMs / 1000)}s\n`,
15751533
);
1576-
}
1577-
},
1578-
);
1534+
},
1535+
});
1536+
return result;
1537+
} catch (error: unknown) {
1538+
if (abortSignal.aborted) {
1539+
throw error;
1540+
}
1541+
await reportError(
1542+
error,
1543+
`Error generating content via API with model ${currentAttemptModel}.`,
1544+
{
1545+
requestContents: contents,
1546+
requestConfig: generationConfig,
1547+
},
1548+
'generateContent-api',
1549+
);
1550+
throw new Error(
1551+
`Failed to generate content with model ${currentAttemptModel}: ${getErrorMessage(error)}`,
1552+
);
1553+
}
15791554
}
15801555

15811556
/**

0 commit comments

Comments
 (0)