-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcodebase_executor.go
More file actions
462 lines (408 loc) · 16 KB
/
codebase_executor.go
File metadata and controls
462 lines (408 loc) · 16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
// internal/agent/codebase_executor.go
package agent
import (
"bytes"
"context"
"fmt"
"go/ast"
"go/token"
"go/types"
"os"
"path/filepath"
"sort"
"github.com/xkilldash9x/scalpel-cli/pkg/observability"
"go.uber.org/zap"
"golang.org/x/tools/go/packages"
)
// CodebaseExecutor is a specialized executor for handling static analysis of the Go codebase.
type CodebaseExecutor struct {
logger *zap.Logger
projectRoot string
}
// NewCodebaseExecutor creates a new instance of the CodebaseExecutor.
func NewCodebaseExecutor(projectRoot string) *CodebaseExecutor {
return &CodebaseExecutor{
logger: observability.GetLogger().Named("codebase_executor"),
projectRoot: projectRoot,
}
}
const (
// -- Headers used for separating sections in the final output --
moduleSourceCodeHeaderFmt = "## Source Code for Module: %s ##"
dependenciesHeader = "## Discovered External Dependencies ##"
fileHeaderFmt = "-- File: %s --"
definitionHeaderFmt = "-- Definition for: %s (from %s) --"
)
// Definition stores the precise location of a symbol's declaration, including
// the file path and the start and end offsets within that file. This allows for
// exact extraction of the source code for a given definition.
type Definition struct {
FilePath string // The absolute path to the file containing the definition.
StartOffset int // The byte offset of the start of the definition.
EndOffset int // The byte offset of the end of the definition.
}
// Execute handles the GATHER_CODEBASE_CONTEXT action. It performs a deep static
// analysis of the Go codebase based on a package pattern (e.g., "./..."),
// gathers the source code of local packages, and identifies the definitions of
// all external dependencies they use. The result is a comprehensive text blob
// providing context to the agent's mind.
func (e *CodebaseExecutor) Execute(ctx context.Context, action Action) (*ExecutionResult, error) {
if action.Type != ActionGatherCodebaseContext {
return nil, fmt.Errorf("codebase executor cannot handle action type: %s", action.Type)
}
// The module_path is now interpreted as a package pattern (e.g., "./...").
packagePattern, ok := action.Metadata["module_path"].(string)
if !ok || packagePattern == "" {
packagePattern = "./..." // Default to analyzing the entire project.
}
e.logger.Info("Executing deep codebase analysis", zap.String("pattern", packagePattern))
// -- Main analysis logic integrated from contextor --
analysisResult, err := e.analyzeCodebase(ctx, packagePattern)
if err != nil {
e.logger.Error("Failed to perform codebase analysis", zap.Error(err))
return &ExecutionResult{
Status: "failed",
ObservationType: ObservedCodebaseContext,
ErrorCode: ErrCodeExecutionFailure,
ErrorDetails: map[string]interface{}{"message": err.Error()},
}, nil
}
// The result payload contains the full analysis output.
return &ExecutionResult{
Status: "success",
ObservationType: ObservedCodebaseContext,
Data: analysisResult,
}, nil
}
// analyzeCodebase orchestrates the full static analysis workflow.
func (e *CodebaseExecutor) analyzeCodebase(ctx context.Context, pattern string) (string, error) {
// FIX: To extract source definitions for dependencies (like stdlib), we must ensure their syntax trees (ASTs) are loaded.
// By default, packages.Load only loads syntax for the packages matching the input patterns (e.g., "./...").
// We use a two-step loading process to achieve this.
// Step 1.1: Load metadata to discover the dependency graph. (loadSyntax=false)
// This step is fast as it avoids parsing syntax and full type checking for dependencies.
initialPkgs, err := e.loadPackages(ctx, []string{pattern}, false)
if err != nil {
return "", fmt.Errorf("error loading packages initially: %w", err)
}
if len(initialPkgs) == 0 {
return "", fmt.Errorf("no packages were loaded for pattern '%s' in root '%s'", pattern, e.projectRoot)
}
// Step 1.2: Identify local packages and all external dependencies.
// We determine what is "local" based on the initial load.
mainModule, localPkgs, _ := determineLocalPackages(initialPkgs)
if mainModule != nil {
e.logger.Info("Analyzing module", zap.String("path", mainModule.Path))
}
// Flatten the graph to find all packages (including transitive dependencies).
allInitialPkgs := flattenPackages(initialPkgs)
var externalPatterns []string
externalPatternsSet := make(map[string]bool)
for _, pkg := range allInitialPkgs {
// Identify external packages (including stdlib).
if !localPkgs[pkg.PkgPath] {
if _, added := externalPatternsSet[pkg.PkgPath]; !added {
externalPatterns = append(externalPatterns, pkg.PkgPath)
externalPatternsSet[pkg.PkgPath] = true
}
}
}
// Step 1.3: Full load: Load syntax and type info for the main module AND all dependencies.
// By explicitly including dependencies in the patterns list, they are treated as "root" packages,
// which forces packages.Load to parse their syntax (loadSyntax=true).
allPatterns := []string{pattern}
if len(externalPatterns) > 0 {
allPatterns = append(allPatterns, externalPatterns...)
e.logger.Info("Starting full load with syntax", zap.Int("dependencies_count", len(externalPatterns)))
}
pkgs, err := e.loadPackages(ctx, allPatterns, true)
if err != nil {
e.logger.Error("Failed during full package load", zap.Error(err))
return "", fmt.Errorf("error during full package load: %w", err)
}
if len(pkgs) == 0 {
// Should be impossible if the initial load succeeded, but checked for safety.
return "", fmt.Errorf("no packages were loaded during full analysis")
}
// 2. Final analysis preparation using the fully loaded data.
// Flatten the graph again from the new results.
allPkgs := flattenPackages(pkgs)
// We must collect the list of `localFiles` from the newly loaded packages that belong to the main module.
// We call determineLocalPackages again on the full results (which include dependencies as roots)
// to correctly filter only the files belonging to the main module.
_, _, localFiles := determineLocalPackages(pkgs)
// 3. Build a comprehensive symbol table. This now works because syntax is loaded for all packages.
symbolTable := buildSymbolTable(allPkgs)
// 4. Find all identifiers used locally that resolve to an external dependency.
// We use the 'localPkgs' map determined during the initial load to distinguish local vs external usages.
depsToExtract := findExternalDependencies(allPkgs, localPkgs, symbolTable)
// 5. Generate the final output string.
output, err := e.generateOutput(mainModule, localFiles, depsToExtract, symbolTable)
if err != nil {
return "", fmt.Errorf("failed to generate analysis output: %w", err)
}
return output, nil
}
// flattenPackages performs a breadth-first traversal of the package dependency
// graph, starting from an initial set of packages. It returns a flattened,
// unique slice containing all packages in the transitive dependency tree.
func flattenPackages(initialPkgs []*packages.Package) []*packages.Package {
allPkgsMap := make(map[string]*packages.Package)
var queue []*packages.Package
queue = append(queue, initialPkgs...)
for len(queue) > 0 {
pkg := queue[0]
queue = queue[1:]
// Safety check for nil package pointers
if pkg == nil {
continue
}
if _, visited := allPkgsMap[pkg.ID]; !visited {
allPkgsMap[pkg.ID] = pkg
for _, imp := range pkg.Imports {
queue = append(queue, imp)
}
}
}
// Convert map back to slice for consistent return type.
result := make([]*packages.Package, 0, len(allPkgsMap))
for _, pkg := range allPkgsMap {
result = append(result, pkg)
}
return result
}
// -- Helper methods integrated from contextor --
// loadPackages configures and runs the Go packages loader.
// If loadSyntax is true, it requests syntax trees and full type information (for deep analysis).
// If false, it only loads metadata required to understand the dependency graph (faster).
func (e *CodebaseExecutor) loadPackages(ctx context.Context, patterns []string, loadSyntax bool) ([]*packages.Package, error) {
// Define the base required modes for metadata loading.
mode := packages.NeedName | packages.NeedFiles | packages.NeedCompiledGoFiles |
packages.NeedImports | packages.NeedModule
// Add modes required for deep analysis (syntax, types) if requested.
if loadSyntax {
mode |= packages.NeedTypes | packages.NeedTypesSizes |
packages.NeedSyntax | packages.NeedTypesInfo
}
cfg := &packages.Config{
Context: ctx, // Pass the context to handle cancellation.
Mode: mode,
Tests: true, // Also include test files.
Dir: e.projectRoot,
}
pkgs, err := packages.Load(cfg, patterns...)
if err != nil {
return nil, err
}
if packages.PrintErrors(pkgs) > 0 {
e.logger.Warn("Errors were found while loading packages, attempting to continue.")
}
var hasGoFiles bool
for _, p := range pkgs {
if len(p.GoFiles) > 0 {
hasGoFiles = true
break
}
}
if !hasGoFiles {
return nil, nil // Return nil slice to signal no packages were loaded.
}
return pkgs, nil
}
// determineLocalPackages identifies the main module and collects all packages and source files that belong to it.
func determineLocalPackages(pkgs []*packages.Package) (*packages.Module, map[string]bool, []string) {
var mainModule *packages.Module
// Try to find the module explicitly marked as "Main".
for _, pkg := range pkgs {
if pkg.Module != nil && pkg.Module.Main {
mainModule = pkg.Module
break
}
}
// Fallback if no package is explicitly marked as Main; assume the first one defines the module context if available.
if mainModule == nil && len(pkgs) > 0 && pkgs[0].Module != nil {
mainModule = pkgs[0].Module
}
localPkgs := make(map[string]bool)
var localFiles []string
if mainModule != nil {
for _, pkg := range pkgs {
// Check if the package belongs to the main module based on the module path.
if pkg.Module != nil && pkg.Module.Path == mainModule.Path {
localPkgs[pkg.PkgPath] = true
localFiles = append(localFiles, pkg.GoFiles...)
}
}
} else {
// Fallback if no module information is available (e.g., GOPATH mode or analyzing loose files).
// Treat all initially loaded packages as local.
for _, pkg := range pkgs {
localPkgs[pkg.PkgPath] = true
localFiles = append(localFiles, pkg.GoFiles...)
}
}
// Sort and deduplicate files for deterministic output.
sort.Strings(localFiles)
uniqueFiles := make([]string, 0, len(localFiles))
seenFiles := make(map[string]bool)
for _, file := range localFiles {
if !seenFiles[file] {
uniqueFiles = append(uniqueFiles, file)
seenFiles[file] = true
}
}
return mainModule, localPkgs, uniqueFiles
}
// buildSymbolTable creates a map of type objects to their definition sites.
func buildSymbolTable(pkgs []*packages.Package) map[types.Object]Definition {
symbolTable := make(map[types.Object]Definition)
for _, pkg := range pkgs {
// We rely on the two-step loading process to ensure TypesInfo and Syntax are available.
if pkg.TypesInfo == nil || len(pkg.Syntax) == 0 {
continue
}
// Iterate over all definitions in the package.
for _, obj := range pkg.TypesInfo.Defs {
if obj == nil || obj.Pkg() == nil || !obj.Pos().IsValid() {
continue
}
// Find the AST node corresponding to the declaration.
_, node, _ := findEnclosingDeclaration(pkg, obj.Pos())
if node == nil {
continue
}
// Get file information and calculate offsets.
tokenFile := pkg.Fset.File(obj.Pos())
if tokenFile == nil {
continue
}
symbolTable[obj] = Definition{
FilePath: tokenFile.Name(),
StartOffset: tokenFile.Offset(node.Pos()),
EndOffset: tokenFile.Offset(node.End()),
}
}
}
return symbolTable
}
// findExternalDependencies inspects the AST of local packages to find usages of external symbols.
func findExternalDependencies(pkgs []*packages.Package, localPkgs map[string]bool, symbolTable map[types.Object]Definition) map[types.Object]bool {
depsToExtract := make(map[types.Object]bool)
for _, pkg := range pkgs {
// Only inspect packages identified as local.
if !localPkgs[pkg.PkgPath] {
continue
}
// Walk the AST of each file in the local package.
for _, file := range pkg.Syntax {
ast.Inspect(file, func(n ast.Node) bool {
ident, ok := n.(*ast.Ident)
if !ok {
return true
}
// Check if the identifier is a usage of a symbol.
if obj, ok := pkg.TypesInfo.Uses[ident]; ok && obj != nil && obj.Pkg() != nil {
// Check if the symbol belongs to an external package.
if !localPkgs[obj.Pkg().Path()] {
// Check if we successfully located the definition for this external symbol.
if _, exists := symbolTable[obj]; exists {
depsToExtract[obj] = true
}
}
}
return true
})
}
}
return depsToExtract
}
// generateOutput constructs the final text output.
func (e *CodebaseExecutor) generateOutput(mainModule *packages.Module, localFiles []string, deps map[types.Object]bool, symbolTable map[types.Object]Definition) (string, error) {
var out bytes.Buffer
// Cache file content to avoid repeated I/O.
fileCache := make(map[string][]byte)
// -- Part 1: Output local source files --
moduleName := "Project (Unknown Module)"
if mainModule != nil {
moduleName = mainModule.Path
}
out.WriteString(fmt.Sprintf(moduleSourceCodeHeaderFmt, moduleName))
out.WriteString("\n\n")
for _, path := range localFiles {
// Correctly make file paths relative to the project root.
relPath, err := filepath.Rel(e.projectRoot, path)
if err != nil {
// Fallback to absolute path if relative path cannot be determined.
relPath = path
}
content, err := os.ReadFile(path)
if err != nil {
e.logger.Warn("Failed to read source file", zap.String("path", path), zap.Error(err))
continue
}
fileCache[path] = content
out.WriteString(fmt.Sprintf(fileHeaderFmt, relPath))
out.WriteString("\n")
out.Write(content)
out.WriteString("\n\n")
}
// -- Part 2: Output external dependency definitions --
out.WriteString(dependenciesHeader)
out.WriteString("\n\n")
// Sort dependencies alphabetically for deterministic output.
sortedDeps := make([]types.Object, 0, len(deps))
for depObj := range deps {
sortedDeps = append(sortedDeps, depObj)
}
sort.Slice(sortedDeps, func(i, j int) bool {
// Sort by the full string representation (e.g., "func fmt.Println...")
return sortedDeps[i].String() < sortedDeps[j].String()
})
for _, depObj := range sortedDeps {
def, ok := symbolTable[depObj]
if !ok {
continue
}
// Load the dependency source file (if not already cached).
content, inCache := fileCache[def.FilePath]
if !inCache {
var readErr error
content, readErr = os.ReadFile(def.FilePath)
if readErr != nil {
e.logger.Warn("Failed to read dependency file", zap.String("path", def.FilePath), zap.Error(readErr))
continue
}
fileCache[def.FilePath] = content
}
// Extract the specific definition using the stored offsets.
if def.StartOffset >= 0 && def.EndOffset <= len(content) && def.StartOffset < def.EndOffset {
out.WriteString(fmt.Sprintf(definitionHeaderFmt, depObj.String(), def.FilePath))
out.WriteString("\n")
out.Write(content[def.StartOffset:def.EndOffset])
out.WriteString("\n\n")
} else {
e.logger.Error("Invalid definition offsets calculated during analysis",
zap.String("symbol", depObj.String()),
zap.String("file", def.FilePath),
zap.Int("start", def.StartOffset),
zap.Int("end", def.EndOffset))
}
}
return out.String(), nil
}
// findEnclosingDeclaration locates the top-level declaration containing a given position.
func findEnclosingDeclaration(pkg *packages.Package, pos token.Pos) (*ast.File, ast.Node, error) {
// Iterate through the syntax trees (files) of the package.
for _, file := range pkg.Syntax {
if file.Pos() <= pos && pos < file.End() {
// Found the file. Now find the top-level declaration (GenDecl, FuncDecl).
for _, decl := range file.Decls {
if decl.Pos() <= pos && pos < decl.End() {
return file, decl, nil
}
}
}
}
// This requires pkg.Syntax to be available for the package where the definition resides.
return nil, nil, fmt.Errorf("declaration not found for position %d in package %s", pos, pkg.PkgPath)
}