Skip to content

Commit a74ec39

Browse files
authored
Agent Debug: Converge debug data with JSONL as single source of truth (#4954)
* converge debug data * Feedback updates * add ver field * exp update * subagent fix * Few fixes and test * settings update * msg update
1 parent a956daf commit a74ec39

File tree

11 files changed

+1695
-495
lines changed

11 files changed

+1695
-495
lines changed

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4631,6 +4631,7 @@
46314631
"type": "boolean",
46324632
"default": false,
46334633
"markdownDescription": "%github.copilot.config.chat.agentDebugLog.enabled%",
4634+
"deprecationMessage": "%github.copilot.config.chat.agentDebugLog.enabled.deprecated%",
46344635
"tags": [
46354636
"advanced",
46364637
"experimental"

package.nls.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -373,8 +373,9 @@
373373
"github.copilot.config.editRecording.enabled": "Enable edit recording for analysis.",
374374
"github.copilot.config.inlineChat.selectionRatioThreshold": "Threshold at which to switch editing strategies for inline chat. When a selection portion of code matches a parse tree node, only that is presented to the language model. This speeds up response times but might have lower quality results. Requires having a parse tree for the document and the `inlineChat.enableV2` setting. Values must be between 0 and 1, where 0 means off and 1 means the selection perfectly matches a parse tree node.",
375375
"github.copilot.config.debug.requestLogger.maxEntries": "Maximum number of entries to keep in the request logger for debugging purposes.",
376-
"github.copilot.config.chat.agentDebugLog.enabled": "When enabled, collect agent request information (tool calls, LLM requests, token usage, and errors) for viewing and troubleshooting in VS Code. Requires window reload to take effect.",
377-
"github.copilot.config.chat.agentDebugLog.fileLogging.enabled": "Enable writing chat debug events to JSONL files on disk for diagnostics. When disabled, the built-in `troubleshoot` skill is also disabled. Requires window reload to take effect.",
376+
"github.copilot.config.chat.agentDebugLog.enabled": "Deprecated: use `github.copilot.chat.agentDebugLog.fileLogging.enabled` instead.",
377+
"github.copilot.config.chat.agentDebugLog.enabled.deprecated": "This setting has been merged into `github.copilot.chat.agentDebugLog.fileLogging.enabled`. Please use this setting instead.",
378+
"github.copilot.config.chat.agentDebugLog.fileLogging.enabled": "Enable agent debug logging: write chat debug events (tool calls, LLM requests, token usage, errors) to JSONL files for the debug panel and troubleshoot skill. Requires window reload to take effect.",
378379
"github.copilot.config.chat.agentDebugLog.fileLogging.flushIntervalMs": "How often (in milliseconds) buffered debug log entries are flushed to disk. Lower values provide more up-to-date logs at the cost of more frequent disk writes.",
379380
"github.copilot.config.chat.agentDebugLog.fileLogging.maxRetainedSessionLogs": "Maximum number of chat debug session log directories to retain on disk. Each chat session produces one directory. Older session logs are automatically deleted when this limit is exceeded.",
380381
"github.copilot.config.chat.agentDebugLog.fileLogging.maxSessionLogSizeMB": "Maximum size in megabytes for a single chat debug session log file. When the log exceeds this size, older entries are truncated to retain the most recent data. Defaults to 100 MB.",

src/extension/chat/vscode-node/chatDebugFileLoggerService.ts

Lines changed: 270 additions & 57 deletions
Large diffs are not rendered by default.

src/extension/chat/vscode-node/test/chatDebugFileLoggerService.spec.ts

Lines changed: 197 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ class TestConfigurationService {
147147
getConfig(key: { defaultValue: unknown }) { return key.defaultValue; }
148148
getExperimentBasedConfig(key: { defaultValue: unknown }) {
149149
if (key === ConfigKey.Advanced.ChatDebugFileLogging) {
150-
return true; // Enable file logging for tests
150+
return true; // Enable debug logging for tests
151151
}
152152
return key.defaultValue;
153153
}
@@ -348,10 +348,11 @@ describe('ChatDebugFileLoggerService', () => {
348348
expect(childPath!.fsPath).toContain('parent-session');
349349
expect(childPath!.fsPath).toContain('title-title-child-id.jsonl');
350350

351-
// Child should have the LLM request entry
351+
// Child should have the session_start + LLM request entry
352352
const childEntries = await readLogEntries('title-child-id');
353-
expect(childEntries).toHaveLength(1);
354-
expect(childEntries[0].type).toBe('llm_request');
353+
expect(childEntries).toHaveLength(2);
354+
expect(childEntries[0].type).toBe('session_start');
355+
expect(childEntries[1].type).toBe('llm_request');
355356
});
356357

357358
it('restarts flush timer when flushIntervalMs config changes at runtime', async () => {
@@ -522,4 +523,196 @@ describe('ChatDebugFileLoggerService', () => {
522523
const content = await fs.promises.readFile(modelsPath, 'utf-8');
523524
expect(content).toBe('"sentinel"');
524525
});
526+
527+
it('readEntries returns entries from flushed JSONL and unflushed buffer', async () => {
528+
await service.startSession('session-read');
529+
otelService.fireSpan(makeToolCallSpan('session-read', 'tool_a'));
530+
await service.flush('session-read');
531+
532+
// Fire another span without flushing — it should be in the buffer
533+
otelService.fireSpan(makeToolCallSpan('session-read', 'tool_b'));
534+
535+
const entries = await service.readEntries('session-read');
536+
const toolEntries = entries.filter(e => e.type === 'tool_call');
537+
expect(toolEntries.length).toBe(2);
538+
expect(toolEntries[0].name).toBe('tool_a');
539+
expect(toolEntries[1].name).toBe('tool_b');
540+
});
541+
542+
it('readEntries returns empty array for unknown session', async () => {
543+
const entries = await service.readEntries('nonexistent-session');
544+
expect(entries).toEqual([]);
545+
});
546+
547+
it('readTailEntries returns last N entries from a flushed session', async () => {
548+
await service.startSession('session-tail');
549+
for (let i = 0; i < 5; i++) {
550+
otelService.fireSpan(makeToolCallSpan('session-tail', `tool_${i}`));
551+
}
552+
await service.flush('session-tail');
553+
554+
const entries = await service.readTailEntries('session-tail', 2);
555+
const toolEntries = entries.filter(e => e.type === 'tool_call');
556+
// Should return the last 2 tool entries
557+
expect(toolEntries.length).toBe(2);
558+
expect(toolEntries[0].name).toBe('tool_3');
559+
expect(toolEntries[1].name).toBe('tool_4');
560+
});
561+
562+
it('streamEntries calls onEntry for each parsed line', async () => {
563+
await service.startSession('session-stream');
564+
otelService.fireSpan(makeToolCallSpan('session-stream', 'tool_x'));
565+
otelService.fireSpan(makeChatSpan('session-stream', 'gpt-4o', 100, 50));
566+
await service.flush('session-stream');
567+
568+
const types: string[] = [];
569+
await service.streamEntries('session-stream', entry => {
570+
types.push(entry.type);
571+
});
572+
expect(types).toContain('tool_call');
573+
expect(types).toContain('llm_request');
574+
});
575+
576+
it('onDidEmitEntry fires for each buffered entry', async () => {
577+
await service.startSession('session-emit');
578+
const emitted: Array<{ sessionId: string; type: string }> = [];
579+
const sub = service.onDidEmitEntry(({ sessionId, entry }) => {
580+
emitted.push({ sessionId, type: entry.type });
581+
});
582+
583+
otelService.fireSpan(makeToolCallSpan('session-emit', 'tool_y'));
584+
585+
sub.dispose();
586+
587+
const toolEvents = emitted.filter(e => e.type === 'tool_call');
588+
expect(toolEvents.length).toBe(1);
589+
expect(toolEvents[0].sessionId).toBe('session-emit');
590+
});
591+
592+
// ── Subagent / child session tests ──
593+
594+
it('startChildSession with parentToolSpanId sets parentSpanId on child_session_ref', async () => {
595+
await service.startSession('parent-1');
596+
otelService.fireSpan(makeToolCallSpan('parent-1', 'read_file'));
597+
598+
service.startChildSession('child-1', 'parent-1', 'runSubagent-Explore', 'tool-span-42');
599+
600+
// Fire a span for the child to trigger _ensureSession → child_session_ref
601+
const childSpan = makeChatSpan('child-1', 'claude-haiku', 100, 20);
602+
otelService.fireSpan(childSpan);
603+
604+
await service.flush('parent-1');
605+
await service.flush('child-1');
606+
607+
const parentEntries = await readLogEntries('parent-1');
608+
const ref = parentEntries.find(e => e.type === 'child_session_ref');
609+
expect(ref).toBeDefined();
610+
expect(ref!.parentSpanId).toBe('tool-span-42');
611+
expect((ref!.attrs as Record<string, unknown>).label).toBe('runSubagent-Explore');
612+
expect((ref!.attrs as Record<string, unknown>).childSessionId).toBe('child-1');
613+
});
614+
615+
it('startChildSession without parentToolSpanId omits parentSpanId', async () => {
616+
await service.startSession('parent-2');
617+
otelService.fireSpan(makeToolCallSpan('parent-2', 'read_file'));
618+
619+
service.startChildSession('child-2', 'parent-2', 'title');
620+
621+
const childSpan = makeChatSpan('child-2', 'gpt-4o-mini', 50, 10);
622+
otelService.fireSpan(childSpan);
623+
624+
await service.flush('parent-2');
625+
await service.flush('child-2');
626+
627+
const parentEntries = await readLogEntries('parent-2');
628+
const ref = parentEntries.find(e => e.type === 'child_session_ref');
629+
expect(ref).toBeDefined();
630+
expect(ref!.parentSpanId).toBeUndefined();
631+
});
632+
633+
it('child session JSONL is written under parent directory', async () => {
634+
await service.startSession('parent-dir');
635+
otelService.fireSpan(makeToolCallSpan('parent-dir', 'read_file'));
636+
637+
service.startChildSession('child-dir', 'parent-dir', 'runSubagent-default', 'tool-span-1');
638+
639+
otelService.fireSpan(makeChatSpan('child-dir', 'claude-opus', 500, 100));
640+
641+
await service.flush('parent-dir');
642+
await service.flush('child-dir');
643+
644+
const childLogPath = service.getLogPath('child-dir');
645+
expect(childLogPath).toBeDefined();
646+
expect(childLogPath!.fsPath).toContain('parent-dir');
647+
expect(childLogPath!.fsPath).toContain('runSubagent-default-child-dir.jsonl');
648+
});
649+
650+
it('child_session_ref includes childLogFile for direct file read fallback', async () => {
651+
await service.startSession('parent-file');
652+
otelService.fireSpan(makeToolCallSpan('parent-file', 'read_file'));
653+
654+
service.startChildSession('child-file', 'parent-file', 'runSubagent-Explore', 'tool-span-2');
655+
656+
otelService.fireSpan(makeChatSpan('child-file', 'claude-haiku', 100, 20));
657+
658+
await service.flush('parent-file');
659+
await service.flush('child-file');
660+
661+
const parentEntries = await readLogEntries('parent-file');
662+
const ref = parentEntries.find(e => e.type === 'child_session_ref');
663+
expect(ref).toBeDefined();
664+
expect((ref!.attrs as Record<string, unknown>).childLogFile).toBe('runSubagent-Explore-child-file.jsonl');
665+
expect((ref!.attrs as Record<string, unknown>).childSessionId).toBe('child-file');
666+
});
667+
668+
it('readEntries returns child session entries', async () => {
669+
await service.startSession('parent-read');
670+
otelService.fireSpan(makeToolCallSpan('parent-read', 'read_file'));
671+
672+
service.startChildSession('child-read', 'parent-read', 'runSubagent-default');
673+
674+
otelService.fireSpan(makeToolCallSpan('child-read', 'file_search'));
675+
otelService.fireSpan(makeChatSpan('child-read', 'claude-haiku', 200, 50));
676+
677+
await service.flush('parent-read');
678+
await service.flush('child-read');
679+
680+
const childEntries = await service.readEntries('child-read');
681+
const types = childEntries.map(e => e.type);
682+
expect(types).toContain('session_start');
683+
expect(types).toContain('tool_call');
684+
expect(types).toContain('llm_request');
685+
expect(childEntries.length).toBe(3);
686+
});
687+
688+
it('multiple child sessions under same parent each get their own file', async () => {
689+
await service.startSession('parent-multi');
690+
otelService.fireSpan(makeToolCallSpan('parent-multi', 'read_file'));
691+
692+
service.startChildSession('child-a', 'parent-multi', 'runSubagent-Explore', 'tool-a');
693+
service.startChildSession('child-b', 'parent-multi', 'runSubagent-default', 'tool-b');
694+
695+
otelService.fireSpan(makeChatSpan('child-a', 'claude-haiku', 100, 20));
696+
otelService.fireSpan(makeChatSpan('child-b', 'claude-haiku', 150, 30));
697+
698+
await service.flush('parent-multi');
699+
await service.flush('child-a');
700+
await service.flush('child-b');
701+
702+
// Parent should have two child_session_ref entries
703+
const parentEntries = await readLogEntries('parent-multi');
704+
const refs = parentEntries.filter(e => e.type === 'child_session_ref');
705+
expect(refs).toHaveLength(2);
706+
707+
const labels = refs.map(r => (r.attrs as Record<string, unknown>).label);
708+
expect(labels).toContain('runSubagent-Explore');
709+
expect(labels).toContain('runSubagent-default');
710+
711+
// Each child has its own log path
712+
const pathA = service.getLogPath('child-a');
713+
const pathB = service.getLogPath('child-b');
714+
expect(pathA).toBeDefined();
715+
expect(pathB).toBeDefined();
716+
expect(pathA!.fsPath).not.toBe(pathB!.fsPath);
717+
});
525718
});

src/extension/intents/node/toolCallingLoop.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import * as l10n from '@vscode/l10n';
77
import { Raw } from '@vscode/prompt-tsx';
88
import type { CancellationToken, ChatRequest, ChatResponseProgressPart, ChatResponseReferencePart, ChatResponseStream, ChatResult, LanguageModelToolInformation, Progress } from 'vscode';
99
import { IAuthenticationChatUpgradeService } from '../../../platform/authentication/common/authenticationUpgrade';
10+
import { IChatDebugFileLoggerService } from '../../../platform/chat/common/chatDebugFileLoggerService';
1011
import { IChatHookService, SessionStartHookInput, SessionStartHookOutput, StopHookInput, StopHookOutput, SubagentStartHookInput, SubagentStartHookOutput, SubagentStopHookInput, SubagentStopHookOutput } from '../../../platform/chat/common/chatHookService';
1112
import { FetchStreamSource, IResponsePart } from '../../../platform/chat/common/chatMLFetcher';
1213
import { CanceledResult, ChatFetchResponseType, ChatResponse } from '../../../platform/chat/common/commonTypes';
@@ -727,6 +728,8 @@ export abstract class ToolCallingLoop<TOptions extends IToolCallingLoopOptions =
727728

728729
// Get the VS Code chat session ID from the CapturingToken (same mechanism as old debug panel)
729730
const chatSessionId = getCurrentCapturingToken()?.chatSessionId;
731+
const parentChatSessionId = getCurrentCapturingToken()?.parentChatSessionId;
732+
const debugLogLabel = getCurrentCapturingToken()?.debugLogLabel;
730733

731734
return this._otelService.startActiveSpan(
732735
`invoke_agent ${agentName}`,
@@ -739,6 +742,8 @@ export abstract class ToolCallingLoop<TOptions extends IToolCallingLoopOptions =
739742
[GenAiAttr.CONVERSATION_ID]: this.options.conversation.sessionId,
740743
[CopilotChatAttr.SESSION_ID]: this.options.conversation.sessionId,
741744
...(chatSessionId ? { [CopilotChatAttr.CHAT_SESSION_ID]: chatSessionId } : {}),
745+
...(parentChatSessionId ? { [CopilotChatAttr.PARENT_CHAT_SESSION_ID]: parentChatSessionId } : {}),
746+
...(debugLogLabel ? { [CopilotChatAttr.DEBUG_LOG_LABEL]: debugLogLabel } : {}),
742747
...(customModeName ? { 'copilot_chat.mode_name': customModeName } : {}),
743748
...workspaceMetadataToOTelAttributes(resolveWorkspaceOTelMetadata(this._gitService)),
744749
},
@@ -747,6 +752,17 @@ export abstract class ToolCallingLoop<TOptions extends IToolCallingLoopOptions =
747752
async (span) => {
748753
const otelStartTime = Date.now();
749754

755+
// Register this session as a child of its parent so that debug
756+
// log entries are routed to a dedicated child JSONL file.
757+
// parentChatSessionId is only set on subagent requests
758+
// (see CapturingToken setup in defaultIntentRequestHandler).
759+
if (parentChatSessionId && chatSessionId) {
760+
const childLabel = debugLogLabel ?? `runSubagent-${agentName}`;
761+
this._instantiationService.invokeFunction(accessor =>
762+
accessor.get(IChatDebugFileLoggerService).startChildSession(
763+
chatSessionId, parentChatSessionId, childLabel, parentTraceContext?.spanId));
764+
}
765+
750766
// Emit session start event and metric for top-level agent invocations (not subagents)
751767
if (!parentTraceContext) {
752768
GenAiMetrics.incrementSessionCount(this._otelService);

src/extension/tools/vscode-node/toolsService.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@ export class ToolsService extends BaseToolsService {
123123
const isMcpTool = String(name).includes('mcp_');
124124
const toolInfo = this.tools.find(t => t.name === String(name));
125125
const chatSessionId = getCurrentCapturingToken()?.chatSessionId;
126+
const parentChatSessionId = getCurrentCapturingToken()?.parentChatSessionId;
127+
const debugLogLabel = getCurrentCapturingToken()?.debugLogLabel;
126128
const parentTraceContext = (options as { parentTraceContext?: { traceId: string; spanId: string } }).parentTraceContext;
127129
const span = this._otelService.startSpan(`execute_tool ${name}`, {
128130
kind: SpanKind.INTERNAL,
@@ -134,6 +136,8 @@ export class ToolsService extends BaseToolsService {
134136
[GenAiAttr.TOOL_CALL_ID]: (options as { chatStreamToolCallId?: string }).chatStreamToolCallId ?? '',
135137
...(toolInfo?.description ? { [GenAiAttr.TOOL_DESCRIPTION]: toolInfo.description } : {}),
136138
...(chatSessionId ? { [CopilotChatAttr.CHAT_SESSION_ID]: chatSessionId } : {}),
139+
...(parentChatSessionId ? { [CopilotChatAttr.PARENT_CHAT_SESSION_ID]: parentChatSessionId } : {}),
140+
...(debugLogLabel ? { [CopilotChatAttr.DEBUG_LOG_LABEL]: debugLogLabel } : {}),
137141
},
138142
});
139143
// Always capture tool call arguments for the debug panel

0 commit comments

Comments
 (0)