Skip to content

Commit 010440e

Browse files
Copilotpelikhan
andauthored
Add GitHub Copilot agent detection and specialized log parser to audit command (#3913)
* Initial plan * Initial exploration of audit command and log parsing system Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> * Add GitHub Copilot agent detection and specialized log parser - Created copilot_agent.go with CopilotAgentDetector for heuristic detection - Implemented ParseCopilotAgentLogMetrics for parsing agent-specific logs - Updated extractLogMetrics to automatically detect and use agent parser - Modified parseLogFileWithEngine to support agent detection - Added comprehensive test coverage for detection and parsing - Updated existing tests to support new function signature Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> * Fix detection logic: aw_info.json indicates agentic workflow, not GitHub Copilot agent - Changed IsGitHubCopilotAgent to return false when aw_info.json exists - Removed hasAgentWorkflowPattern function (no longer needed) - Updated detection logic: presence of aw_info.json means it's an agentic workflow - Updated tests to reflect correct behavior - GitHub Copilot agent runs (from cloud) do NOT have aw_info.json Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> * Add workflow path detection for GitHub Copilot agent runs - Added WorkflowPath field to WorkflowRun struct - Updated audit command to fetch workflow path from GitHub API - Added hasAgentWorkflowPath heuristic using workflow ID "copilot-swe-agent" - Created NewCopilotAgentDetectorWithPath constructor for path-based detection - Updated extractLogMetrics to accept optional workflow path parameter - Added tests for workflow path detection (copilot-swe-agent.yml/yaml) - Workflow ID is most reliable hint from GitHub API Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> * Addressing PR comments Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> Co-authored-by: Peli de Halleux <pelikhan@users.noreply.github.com>
1 parent 455625e commit 010440e

7 files changed

Lines changed: 760 additions & 14 deletions

File tree

pkg/cli/audit.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ func AuditWorkflowRun(runInfo RunURLInfo, outputDir string, verbose bool, parse
213213
}
214214

215215
// Extract metrics from logs
216-
metrics, err := extractLogMetrics(runOutputDir, verbose)
216+
metrics, err := extractLogMetrics(runOutputDir, verbose, run.WorkflowPath)
217217
if err != nil {
218218
if verbose {
219219
fmt.Fprintln(os.Stderr, console.FormatWarningMessage(fmt.Sprintf("Failed to extract metrics: %v", err)))
@@ -386,7 +386,7 @@ func fetchWorkflowRunMetadata(runInfo RunURLInfo, verbose bool) (WorkflowRun, er
386386
args = append(args,
387387
endpoint,
388388
"--jq",
389-
"{databaseId: .id, number: .run_number, url: .html_url, status: .status, conclusion: .conclusion, workflowName: .name, createdAt: .created_at, startedAt: .run_started_at, updatedAt: .updated_at, event: .event, headBranch: .head_branch, headSha: .head_sha, displayTitle: .display_title}",
389+
"{databaseId: .id, number: .run_number, url: .html_url, status: .status, conclusion: .conclusion, workflowName: .name, workflowPath: .path, createdAt: .created_at, startedAt: .run_started_at, updatedAt: .updated_at, event: .event, headBranch: .head_branch, headSha: .head_sha, displayTitle: .display_title}",
390390
)
391391

392392
if verbose {

pkg/cli/copilot_agent.go

Lines changed: 328 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,328 @@
1+
package cli
2+
3+
import (
4+
"fmt"
5+
"os"
6+
"path/filepath"
7+
"regexp"
8+
"strings"
9+
10+
"github.com/githubnext/gh-aw/pkg/console"
11+
"github.com/githubnext/gh-aw/pkg/logger"
12+
"github.com/githubnext/gh-aw/pkg/workflow"
13+
)
14+
15+
var copilotAgentLog = logger.New("cli:copilot_agent")
16+
17+
// CopilotAgentDetector contains heuristics to detect if a workflow run was executed by GitHub Copilot agent
18+
type CopilotAgentDetector struct {
19+
runDir string
20+
verbose bool
21+
workflowPath string // Optional: workflow file path from GitHub API (e.g., .github/workflows/copilot-swe-agent.yml)
22+
}
23+
24+
// NewCopilotAgentDetector creates a new detector for GitHub Copilot agent runs
25+
func NewCopilotAgentDetector(runDir string, verbose bool) *CopilotAgentDetector {
26+
return &CopilotAgentDetector{
27+
runDir: runDir,
28+
verbose: verbose,
29+
}
30+
}
31+
32+
// NewCopilotAgentDetectorWithPath creates a detector with workflow path hint
33+
func NewCopilotAgentDetectorWithPath(runDir string, verbose bool, workflowPath string) *CopilotAgentDetector {
34+
return &CopilotAgentDetector{
35+
runDir: runDir,
36+
verbose: verbose,
37+
workflowPath: workflowPath,
38+
}
39+
}
40+
41+
// IsGitHubCopilotAgent uses heuristics to determine if this run was executed by GitHub Copilot agent
42+
// (not the Copilot CLI engine or agentic workflows)
43+
func (d *CopilotAgentDetector) IsGitHubCopilotAgent() bool {
44+
copilotAgentLog.Printf("Detecting if run is GitHub Copilot agent: %s", d.runDir)
45+
46+
// If aw_info.json exists, this is an agentic workflow, NOT a GitHub Copilot agent run
47+
awInfoPath := filepath.Join(d.runDir, "aw_info.json")
48+
if _, err := os.Stat(awInfoPath); err == nil {
49+
copilotAgentLog.Print("Found aw_info.json - this is an agentic workflow, not a GitHub Copilot agent")
50+
return false
51+
}
52+
53+
// Heuristic 1: Check workflow path if provided (most reliable hint from GitHub API)
54+
if d.hasAgentWorkflowPath() {
55+
copilotAgentLog.Print("Detected copilot-swe-agent in workflow path")
56+
return true
57+
}
58+
59+
// Heuristic 2: Check for agent-specific log patterns
60+
if d.hasAgentLogPatterns() {
61+
copilotAgentLog.Print("Detected agent log patterns")
62+
return true
63+
}
64+
65+
// Heuristic 3: Check for agent-specific artifacts
66+
if d.hasAgentArtifacts() {
67+
copilotAgentLog.Print("Detected agent artifacts")
68+
return true
69+
}
70+
71+
copilotAgentLog.Print("No GitHub Copilot agent indicators found")
72+
return false
73+
}
74+
75+
// hasAgentWorkflowPath checks if the workflow path indicates a Copilot agent run
76+
// The workflow ID is always "copilot-swe-agent" for GitHub Copilot agent runs
77+
func (d *CopilotAgentDetector) hasAgentWorkflowPath() bool {
78+
if d.workflowPath == "" {
79+
return false
80+
}
81+
82+
// Extract the workflow filename from the path
83+
// E.g., .github/workflows/copilot-swe-agent.yml -> copilot-swe-agent
84+
filename := filepath.Base(d.workflowPath)
85+
workflowID := strings.TrimSuffix(filename, filepath.Ext(filename))
86+
87+
// GitHub Copilot agent runs always use "copilot-swe-agent" as the workflow ID
88+
if workflowID == "copilot-swe-agent" {
89+
if d.verbose {
90+
fmt.Fprintln(os.Stderr, console.FormatInfoMessage(
91+
fmt.Sprintf("Detected GitHub Copilot agent from workflow path: %s (ID: %s)", d.workflowPath, workflowID)))
92+
}
93+
return true
94+
}
95+
96+
return false
97+
}
98+
99+
// hasAgentLogPatterns checks log files for patterns specific to GitHub Copilot agent
100+
func (d *CopilotAgentDetector) hasAgentLogPatterns() bool {
101+
// Patterns that indicate GitHub Copilot agent (not Copilot CLI)
102+
agentPatterns := []*regexp.Regexp{
103+
regexp.MustCompile(`(?i)github.*copilot.*agent`),
104+
regexp.MustCompile(`(?i)copilot-swe-agent`),
105+
regexp.MustCompile(`(?i)@github/copilot-swe-agent`),
106+
regexp.MustCompile(`(?i)agent.*task.*execution`),
107+
regexp.MustCompile(`(?i)copilot.*agent.*v\d+\.\d+`),
108+
}
109+
110+
found := false
111+
// Check log files for agent-specific patterns
112+
_ = filepath.Walk(d.runDir, func(path string, info os.FileInfo, err error) error {
113+
if err != nil || info == nil || info.IsDir() {
114+
return nil
115+
}
116+
117+
fileName := strings.ToLower(info.Name())
118+
if strings.HasSuffix(fileName, ".log") || strings.HasSuffix(fileName, ".txt") {
119+
// Read first 10KB of log file to check patterns
120+
content, err := readLogHeader(path, 10240)
121+
if err != nil {
122+
return nil
123+
}
124+
125+
for _, pattern := range agentPatterns {
126+
if pattern.MatchString(content) {
127+
if d.verbose {
128+
fmt.Fprintln(os.Stderr, console.FormatInfoMessage(
129+
fmt.Sprintf("Found agent pattern in %s: %s", filepath.Base(path), pattern.String())))
130+
}
131+
found = true
132+
return filepath.SkipAll // Stop walking, we found a match
133+
}
134+
}
135+
}
136+
137+
return nil
138+
})
139+
140+
return found
141+
}
142+
143+
// hasAgentArtifacts checks for artifacts specific to GitHub Copilot agent runs
144+
func (d *CopilotAgentDetector) hasAgentArtifacts() bool {
145+
// Check for agent-specific artifact patterns
146+
agentArtifacts := []string{
147+
"copilot-agent-output",
148+
"agent-task-result",
149+
"copilot-swe-agent-output",
150+
}
151+
152+
for _, artifactName := range agentArtifacts {
153+
artifactPath := filepath.Join(d.runDir, artifactName)
154+
if _, err := os.Stat(artifactPath); err == nil {
155+
if d.verbose {
156+
fmt.Fprintln(os.Stderr, console.FormatInfoMessage(
157+
fmt.Sprintf("Found agent artifact: %s", artifactName)))
158+
}
159+
return true
160+
}
161+
}
162+
163+
return false
164+
}
165+
166+
// readLogHeader reads the first maxBytes from a log file
167+
func readLogHeader(path string, maxBytes int) (string, error) {
168+
file, err := os.Open(path)
169+
if err != nil {
170+
return "", err
171+
}
172+
defer file.Close()
173+
174+
buffer := make([]byte, maxBytes)
175+
n, err := file.Read(buffer)
176+
if err != nil && n == 0 {
177+
return "", err
178+
}
179+
180+
return string(buffer[:n]), nil
181+
}
182+
183+
// ParseCopilotAgentLogMetrics extracts metrics from GitHub Copilot agent logs
184+
// This is different from Copilot CLI logs and requires specialized parsing
185+
func ParseCopilotAgentLogMetrics(logContent string, verbose bool) workflow.LogMetrics {
186+
copilotAgentLog.Printf("Parsing GitHub Copilot agent log metrics: %d bytes", len(logContent))
187+
188+
var metrics workflow.LogMetrics
189+
var maxTokenUsage int
190+
191+
lines := strings.Split(logContent, "\n")
192+
toolCallMap := make(map[string]*workflow.ToolCallInfo)
193+
var currentSequence []string
194+
turns := 0
195+
196+
// GitHub Copilot agent log patterns
197+
// These patterns are designed to match the specific log format of the agent
198+
turnPattern := regexp.MustCompile(`(?i)task.*iteration|agent.*turn|step.*\d+`)
199+
toolCallPattern := regexp.MustCompile(`(?i)tool.*call|executing.*tool|calling.*(\w+)`)
200+
errorPattern := regexp.MustCompile(`(?i)error|exception|failed`)
201+
warningPattern := regexp.MustCompile(`(?i)warning|warn`)
202+
203+
for lineNum, line := range lines {
204+
// Skip empty lines
205+
if strings.TrimSpace(line) == "" {
206+
continue
207+
}
208+
209+
// Count turns based on agent iteration patterns
210+
if turnPattern.MatchString(line) {
211+
turns++
212+
// Start of a new turn, save previous sequence if any
213+
if len(currentSequence) > 0 {
214+
metrics.ToolSequences = append(metrics.ToolSequences, currentSequence)
215+
currentSequence = []string{}
216+
}
217+
}
218+
219+
// Extract tool calls from agent logs
220+
if matches := toolCallPattern.FindStringSubmatch(line); len(matches) > 1 {
221+
toolName := extractToolName(line)
222+
if toolName != "" {
223+
// Track tool call
224+
if _, exists := toolCallMap[toolName]; !exists {
225+
toolCallMap[toolName] = &workflow.ToolCallInfo{
226+
Name: toolName,
227+
CallCount: 0,
228+
}
229+
}
230+
toolCallMap[toolName].CallCount++
231+
232+
// Add to current sequence
233+
currentSequence = append(currentSequence, toolName)
234+
235+
if verbose {
236+
copilotAgentLog.Printf("Found tool call: %s", toolName)
237+
}
238+
}
239+
}
240+
241+
// Try to extract token usage from JSON format if available
242+
jsonMetrics := workflow.ExtractJSONMetrics(line, verbose)
243+
if jsonMetrics.TokenUsage > 0 || jsonMetrics.EstimatedCost > 0 {
244+
if jsonMetrics.TokenUsage > maxTokenUsage {
245+
maxTokenUsage = jsonMetrics.TokenUsage
246+
}
247+
if jsonMetrics.EstimatedCost > 0 {
248+
metrics.EstimatedCost += jsonMetrics.EstimatedCost
249+
}
250+
}
251+
252+
// Collect errors and warnings
253+
lowerLine := strings.ToLower(line)
254+
if errorPattern.MatchString(lowerLine) && !strings.Contains(lowerLine, "no error") {
255+
message := extractErrorMessage(line)
256+
if message != "" {
257+
metrics.Errors = append(metrics.Errors, workflow.LogError{
258+
Line: lineNum + 1,
259+
Type: "error",
260+
Message: message,
261+
})
262+
}
263+
}
264+
if warningPattern.MatchString(lowerLine) {
265+
message := extractErrorMessage(line)
266+
if message != "" {
267+
metrics.Errors = append(metrics.Errors, workflow.LogError{
268+
Line: lineNum + 1,
269+
Type: "warning",
270+
Message: message,
271+
})
272+
}
273+
}
274+
}
275+
276+
// Add final sequence if any
277+
if len(currentSequence) > 0 {
278+
metrics.ToolSequences = append(metrics.ToolSequences, currentSequence)
279+
}
280+
281+
// Convert tool call map to slice
282+
for _, toolInfo := range toolCallMap {
283+
metrics.ToolCalls = append(metrics.ToolCalls, *toolInfo)
284+
}
285+
286+
metrics.TokenUsage = maxTokenUsage
287+
metrics.Turns = turns
288+
289+
copilotAgentLog.Printf("Parsed metrics: tokens=%d, cost=$%.4f, turns=%d, errors=%d",
290+
metrics.TokenUsage, metrics.EstimatedCost, metrics.Turns, len(metrics.Errors))
291+
292+
return metrics
293+
}
294+
295+
// extractErrorMessage extracts a clean error message from a log line
296+
// This is a simplified version for the copilot agent parser
297+
func extractErrorMessage(line string) string {
298+
// Remove common timestamp patterns
299+
line = regexp.MustCompile(`^\d{4}-\d{2}-\d{2}[T\s]\d{2}:\d{2}:\d{2}(\.\d+)?([+-]\d{2}:\d{2}|Z)?\s*`).ReplaceAllString(line, "")
300+
line = regexp.MustCompile(`^\[\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}\]\s*`).ReplaceAllString(line, "")
301+
302+
// Remove common log level prefixes
303+
line = regexp.MustCompile(`^(ERROR|WARN|WARNING|INFO|DEBUG):\s*`).ReplaceAllString(line, "")
304+
305+
// Trim whitespace
306+
line = strings.TrimSpace(line)
307+
308+
return line
309+
}
310+
311+
// extractToolName extracts a tool name from a log line
312+
func extractToolName(line string) string {
313+
// Try to extract tool name from various patterns
314+
patterns := []*regexp.Regexp{
315+
regexp.MustCompile(`(?i)tool[:\s]+([a-zA-Z0-9_-]+)`),
316+
regexp.MustCompile(`(?i)calling[:\s]+([a-zA-Z0-9_-]+)`),
317+
regexp.MustCompile(`(?i)executing[:\s]+([a-zA-Z0-9_-]+)`),
318+
regexp.MustCompile(`(?i)using[:\s]+tool[:\s]+([a-zA-Z0-9_-]+)`),
319+
}
320+
321+
for _, pattern := range patterns {
322+
if matches := pattern.FindStringSubmatch(line); len(matches) > 1 {
323+
return strings.TrimSpace(matches[1])
324+
}
325+
}
326+
327+
return ""
328+
}

0 commit comments

Comments
 (0)