Skip to content

Commit 99a90cb

Browse files
authored
fix(copilot): Correct shell commands and skills/agents display (#527)
1 parent d54f21d commit 99a90cb

2 files changed

Lines changed: 246 additions & 3 deletions

File tree

src/providers/copilot.ts

Lines changed: 130 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ import { join, basename, dirname, posix, win32 } from 'path'
6060
import { existsSync } from 'fs'
6161
import { readSessionFile } from '../fs-utils.js'
6262
import { calculateCost } from '../models.js'
63+
import { extractBashCommands } from '../bash-utils.js'
6364
import type {
6465
Provider,
6566
SessionSource,
@@ -139,12 +140,17 @@ const modelDisplayEntries = Object.entries(modelDisplayNames).sort(
139140
(a, b) => b[0].length - a[0].length
140141
)
141142

143+
// Tool names that represent shell/bash execution. When the AI calls one of
144+
// these, we extract the `arguments.command` string into bashCommands[].
145+
const BASH_TOOL_NAMES = new Set(['bash', 'run_in_terminal', 'runInTerminal', 'runCommand'])
146+
142147
// ---------------------------------------------------------------------------
143148
// Types for JSONL session state events (unchanged from original)
144149
// ---------------------------------------------------------------------------
145150
type ToolRequest = {
146151
toolName?: string // older format
147152
name?: string // newer format (copilot-agent)
153+
arguments?: Record<string, unknown>
148154
}
149155

150156
type SessionStartData = {
@@ -169,11 +175,18 @@ type AssistantMessageData = {
169175
toolRequests?: ToolRequest[]
170176
}
171177

178+
type SubagentSelectedData = {
179+
agentName: string
180+
agentDisplayName?: string
181+
tools?: string[]
182+
}
183+
172184
type CopilotEvent =
173185
| { type: 'session.start'; data: SessionStartData; timestamp?: string }
174186
| { type: 'session.model_change'; data: ModelChangeData; timestamp?: string }
175187
| { type: 'user.message'; data: UserMessageData; timestamp?: string }
176188
| { type: 'assistant.message'; data: AssistantMessageData; timestamp?: string }
189+
| { type: 'subagent.selected'; data: SubagentSelectedData; timestamp?: string }
177190

178191
// ---------------------------------------------------------------------------
179192
// Types for OTel span rows from agent-traces.db
@@ -196,6 +209,8 @@ interface SpanAttributes {
196209
'gen_ai.conversation.id'?: string
197210
'gen_ai.agent.name'?: string
198211
'gen_ai.tool.name'?: string
212+
'gen_ai.tool.call.arguments'?: string
213+
'copilot_chat.parent_chat_session_id'?: string
199214
'github.copilot.chat.turn.id'?: string
200215
[key: string]: unknown
201216
}
@@ -319,6 +334,51 @@ function epochToISO(epoch: number): string {
319334
return new Date(ms).toISOString()
320335
}
321336

337+
/**
338+
* Extract a shell command string from an OTel execute_tool span's
339+
* `gen_ai.tool.call.arguments` attribute. The attribute is a JSON-encoded
340+
* argument object (e.g. `{"command":"ls -la"}`); we pull out the `command`
341+
* field. Returns null when the attribute is absent or doesn't carry a command,
342+
* so callers can skip shell-command extraction cleanly.
343+
*/
344+
function parseToolCommand(raw: unknown): string | null {
345+
if (typeof raw !== 'string' || !raw.trim()) return null
346+
try {
347+
const parsed = JSON.parse(raw) as Record<string, unknown>
348+
const command = parsed['command']
349+
return typeof command === 'string' ? command : null
350+
} catch {
351+
return null
352+
}
353+
}
354+
355+
// Shell control-flow keywords. These lead a statement but are not commands, so
356+
// they must never be reported as bash commands.
357+
const OTEL_SHELL_KEYWORDS = new Set([
358+
'if', 'then', 'else', 'elif', 'fi',
359+
'for', 'while', 'until', 'do', 'done',
360+
'case', 'esac', 'select', 'function', 'in', 'time', 'coproc',
361+
])
362+
363+
/**
364+
* Normalise an OTEL shell command before command-name extraction.
365+
*
366+
* Unlike the Copilot CLI / VS Code JSONL logs — which record a single command
367+
* per tool call (e.g. `cd x && python3 y`) — the OTEL store records the FULL
368+
* multi-line script the agent ran (heredocs, for/if blocks, newline-separated
369+
* statements). The shared extractBashCommands helper only splits on `;`/`&&`/`|`
370+
* and has no concept of shell keywords, so those scripts leak control-flow words
371+
* (`for`, `do`, `if`, `then`, …) and collapse newline-separated statements.
372+
*
373+
* Normalising here — rather than in the shared helper — keeps every other
374+
* provider's behaviour unchanged. We (1) turn newlines into `;` so each
375+
* statement is its own segment, then (2) drop shell control-flow keywords.
376+
*/
377+
function extractOtelBashCommands(command: string): string[] {
378+
const normalized = command.replace(/\r?\n/g, '; ')
379+
return extractBashCommands(normalized).filter(c => !OTEL_SHELL_KEYWORDS.has(c))
380+
}
381+
322382
// ---------------------------------------------------------------------------
323383
// Helpers for JSONL / transcript parsing
324384
// ---------------------------------------------------------------------------
@@ -386,6 +446,9 @@ function createJsonlParser(
386446
let isTranscript = false
387447
let currentModel = ''
388448
let pendingUserMessage = ''
449+
// Track the active subagent for this session (from subagent.selected events).
450+
// Resets when a new subagent is selected.
451+
let currentSubagentType: string | undefined
389452

390453
// First pass: detect format and infer transcript model if needed.
391454
for (const line of lines) {
@@ -429,6 +492,11 @@ function createJsonlParser(
429492
continue
430493
}
431494

495+
if (event.type === 'subagent.selected') {
496+
currentSubagentType = (event.data as SubagentSelectedData).agentName
497+
continue
498+
}
499+
432500
if (event.type === 'user.message') {
433501
pendingUserMessage = (event.data as UserMessageData).content ?? ''
434502
continue
@@ -459,6 +527,18 @@ function createJsonlParser(
459527
})
460528
.filter((t): t is string => t !== null)
461529

530+
// Extract base command names from bash-type tool requests, routing the
531+
// raw command through the shared extractBashCommands helper so chained
532+
// commands are normalised the same way as every other provider
533+
// (see bash-utils.ts, parser.ts, forge.ts, grok.ts, etc.).
534+
const bashCommands = toolRequests.flatMap((t) => {
535+
if (typeof t !== 'object' || t === null) return []
536+
const name = (t.name ?? t.toolName) ?? ''
537+
if (!BASH_TOOL_NAMES.has(name)) return []
538+
const cmd = t.arguments?.['command']
539+
return typeof cmd === 'string' ? extractBashCommands(cmd) : []
540+
})
541+
462542
// Copilot JSONL only logs outputTokens; inputTokens are NOT available.
463543
// Cost will be lower than actual API cost. This is the original
464544
// behaviour — OTel data (below) replaces it when available.
@@ -477,7 +557,8 @@ function createJsonlParser(
477557
webSearchRequests: 0,
478558
costUSD,
479559
tools,
480-
bashCommands: [],
560+
bashCommands,
561+
subagentTypes: currentSubagentType ? [currentSubagentType] : undefined,
481562
timestamp: event.timestamp ?? '',
482563
speed: 'standard' as const,
483564
deduplicationKey: dedupKey,
@@ -577,8 +658,23 @@ function createOtelParser(
577658
traceIdArr
578659
)
579660

580-
// Collect tool names from execute_tool spans for each trace
661+
// Collect tool names, shell commands and subagent names from the
662+
// execute_tool / invoke_agent spans for each trace. These mirror the
663+
// metadata the JSONL path captures, so the OTel source stays
664+
// equivalent (tools + bashCommands + subagentTypes are all first-class
665+
// call metadata per types.ts).
666+
//
667+
// Subagent attribution: VS Code records a subagent run as an
668+
// invoke_agent span carrying copilot_chat.parent_chat_session_id. The
669+
// root turn agent (gen_ai.agent.name = 'GitHub Copilot Chat') has NO
670+
// parent session and is intentionally excluded, otherwise it would
671+
// surface as a bogus 'GitHub Copilot Chat' entry in the agents view.
672+
// A subagent's invoke_agent span lives in the same trace as that
673+
// subagent's own chat spans, so attributing the agent name per-trace
674+
// labels exactly the subagent's calls.
581675
const toolsByTrace = new Map<string, string[]>()
676+
const bashByTrace = new Map<string, string[]>()
677+
const subagentsByTrace = new Map<string, string[]>()
582678
const chatSpanIds: string[] = []
583679
const spanMetaById = new Map<string, { trace_id: string; start_time_ms: number; response_model: string | null }>()
584680

@@ -588,6 +684,7 @@ function createOtelParser(
588684

589685
if (opName === 'chat') {
590686
chatSpanIds.push(span.span_id)
687+
continue
591688
}
592689

593690
if (opName === 'execute_tool') {
@@ -598,6 +695,33 @@ function createOtelParser(
598695
const existing = toolsByTrace.get(span.trace_id) ?? []
599696
existing.push(normalizeTool(rawToolName))
600697
toolsByTrace.set(span.trace_id, existing)
698+
699+
// For shell tools, extract command names via the OTEL-specific
700+
// normaliser (handles the full multi-line scripts the OTEL store
701+
// records; see extractOtelBashCommands).
702+
if (BASH_TOOL_NAMES.has(rawToolName)) {
703+
const command = parseToolCommand(attrs['gen_ai.tool.call.arguments'])
704+
if (command) {
705+
const bash = bashByTrace.get(span.trace_id) ?? []
706+
bash.push(...extractOtelBashCommands(command))
707+
bashByTrace.set(span.trace_id, bash)
708+
}
709+
}
710+
}
711+
continue
712+
}
713+
714+
// Genuine subagent invocation: an invoke_agent span with a parent
715+
// chat session. The root turn agent ('GitHub Copilot Chat') has no
716+
// parent session and is skipped to avoid a bogus agents-view entry.
717+
if (opName === 'invoke_agent') {
718+
const attrs = loadSpanAttributesFromTable(db, span.span_id)
719+
const parentSession = attrs['copilot_chat.parent_chat_session_id']
720+
const agentName = attrs['gen_ai.agent.name'] as string | undefined
721+
if (parentSession && agentName) {
722+
const subs = subagentsByTrace.get(span.trace_id) ?? []
723+
subs.push(agentName)
724+
subagentsByTrace.set(span.trace_id, subs)
601725
}
602726
}
603727
}
@@ -639,6 +763,8 @@ function createOtelParser(
639763
}
640764

641765
const tools = toolsByTrace.get(spanMetadata.trace_id) ?? []
766+
const bashCommands = bashByTrace.get(spanMetadata.trace_id) ?? []
767+
const subagentTypes = subagentsByTrace.get(spanMetadata.trace_id)
642768
const timestamp = epochToISO(spanMetadata.start_time_ms)
643769

644770
// calculateCost with FULL token data — this is the key improvement.
@@ -665,7 +791,8 @@ function createOtelParser(
665791
webSearchRequests: 0,
666792
costUSD,
667793
tools,
668-
bashCommands: [],
794+
bashCommands,
795+
subagentTypes: subagentTypes && subagentTypes.length > 0 ? subagentTypes : undefined,
669796
timestamp,
670797
speed: 'standard' as const,
671798
deduplicationKey: dedupKey,

tests/providers/copilot.test.ts

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -816,4 +816,120 @@ describe('copilot provider - OTel cache token parsing', () => {
816816
expect(calls).toHaveLength(1)
817817
expect(calls[0]!.inputTokens).toBe(100)
818818
})
819+
820+
it('attributes genuine subagents but excludes the root agent', async () => {
821+
if (!isSqliteAvailable()) return
822+
823+
createOtelDb(dbPath)
824+
825+
// Root agent turn: chat span + invoke_agent WITHOUT a parent session.
826+
insertSpan(dbPath, {
827+
spanId: 'span-root-chat', traceId: 'trace-root', operationName: 'chat', startTimeMs: 1000,
828+
attrs: {
829+
'gen_ai.conversation.id': 'conv-h',
830+
'gen_ai.response.model': 'gpt-4.1',
831+
'gen_ai.usage.input_tokens': 400,
832+
'gen_ai.usage.output_tokens': 60,
833+
'gen_ai.usage.cache_read.input_tokens': 0,
834+
'gen_ai.usage.cache_creation.input_tokens': 0,
835+
},
836+
})
837+
insertSpan(dbPath, {
838+
spanId: 'span-root-agent', traceId: 'trace-root', operationName: 'invoke_agent', startTimeMs: 1010,
839+
attrs: {
840+
'gen_ai.conversation.id': 'conv-h',
841+
'gen_ai.agent.name': 'GitHub Copilot Chat',
842+
},
843+
})
844+
845+
// Genuine subagent: its own trace holds the subagent's chat span plus an
846+
// invoke_agent span carrying copilot_chat.parent_chat_session_id.
847+
insertSpan(dbPath, {
848+
spanId: 'span-sub-chat', traceId: 'trace-sub', operationName: 'chat', startTimeMs: 2000,
849+
attrs: {
850+
'gen_ai.conversation.id': 'conv-h',
851+
'gen_ai.response.model': 'claude-haiku-4.5',
852+
'gen_ai.usage.input_tokens': 250,
853+
'gen_ai.usage.output_tokens': 30,
854+
'gen_ai.usage.cache_read.input_tokens': 0,
855+
'gen_ai.usage.cache_creation.input_tokens': 0,
856+
},
857+
})
858+
insertSpan(dbPath, {
859+
spanId: 'span-sub-agent', traceId: 'trace-sub', operationName: 'invoke_agent', startTimeMs: 2010,
860+
attrs: {
861+
'gen_ai.conversation.id': 'conv-h',
862+
'gen_ai.agent.name': 'Explore',
863+
'copilot_chat.parent_chat_session_id': 'conv-h',
864+
},
865+
})
866+
867+
const provider = createCopilotProvider('/nonexistent/jsonl', '/nonexistent/ws')
868+
const sources = await provider.discoverSessions()
869+
const src = sources.find(s => s.path.startsWith(dbPath))
870+
expect(src).toBeDefined()
871+
872+
const calls: ParsedProviderCall[] = []
873+
for await (const call of provider.createSessionParser(src!, new Set()).parse()) {
874+
calls.push(call)
875+
}
876+
877+
expect(calls).toHaveLength(2)
878+
const rootCall = calls.find(c => c.model === 'gpt-4.1')!
879+
const subCall = calls.find(c => c.model === 'claude-haiku-4.5')!
880+
881+
// Root agent must NOT surface as a subagent
882+
expect(rootCall.subagentTypes ?? []).not.toContain('GitHub Copilot Chat')
883+
expect(rootCall.subagentTypes ?? []).toHaveLength(0)
884+
885+
// Genuine subagent is attributed to its own call
886+
expect(subCall.subagentTypes).toEqual(['Explore'])
887+
})
888+
889+
it('normalises multi-line OTel shell scripts, dropping control-flow keywords', async () => {
890+
if (!isSqliteAvailable()) return
891+
892+
createOtelDb(dbPath)
893+
insertSpan(dbPath, {
894+
spanId: 'span-sh-chat', traceId: 'trace-sh', operationName: 'chat', startTimeMs: 1000,
895+
attrs: {
896+
'gen_ai.conversation.id': 'conv-sh',
897+
'gen_ai.response.model': 'gpt-4.1',
898+
'gen_ai.usage.input_tokens': 100,
899+
'gen_ai.usage.output_tokens': 10,
900+
'gen_ai.usage.cache_read.input_tokens': 0,
901+
'gen_ai.usage.cache_creation.input_tokens': 0,
902+
},
903+
})
904+
// A full multi-line script with control flow and newline-separated commands,
905+
// exactly as the OTel store records it.
906+
insertSpan(dbPath, {
907+
spanId: 'span-sh-tool', traceId: 'trace-sh', operationName: 'execute_tool', startTimeMs: 1500,
908+
attrs: {
909+
'gen_ai.tool.name': 'run_in_terminal',
910+
'gen_ai.tool.call.arguments': JSON.stringify({
911+
command: 'for f in *.ts; do\n echo "$f"\ndone\ngit status\nnpm test',
912+
}),
913+
},
914+
})
915+
916+
const provider = createCopilotProvider('/nonexistent/jsonl', '/nonexistent/ws')
917+
const sources = await provider.discoverSessions()
918+
const src = sources.find(s => s.path.startsWith(dbPath))
919+
expect(src).toBeDefined()
920+
921+
const calls: ParsedProviderCall[] = []
922+
for await (const call of provider.createSessionParser(src!, new Set()).parse()) {
923+
calls.push(call)
924+
}
925+
926+
expect(calls).toHaveLength(1)
927+
const bash = calls[0]!.bashCommands
928+
// Real commands separated by newlines/`;` are captured
929+
expect(bash).toEqual(expect.arrayContaining(['echo', 'git', 'npm']))
930+
// Control-flow keywords are NOT reported as commands
931+
for (const kw of ['for', 'do', 'done']) {
932+
expect(bash).not.toContain(kw)
933+
}
934+
})
819935
})

0 commit comments

Comments
 (0)