|
| 1 | +// Package workflow provides generic package extraction utilities for agentic workflows. |
| 2 | +// |
| 3 | +// # Package Extraction Framework |
| 4 | +// |
| 5 | +// This file provides a generic framework for extracting package names from command strings. |
| 6 | +// The PackageExtractor type can be configured to handle different package managers |
| 7 | +// (npm, pip, uv, go, etc.) with minimal code duplication. |
| 8 | +// |
| 9 | +// # Usage Example |
| 10 | +// |
| 11 | +// extractor := PackageExtractor{ |
| 12 | +// CommandNames: []string{"pip", "pip3"}, |
| 13 | +// RequiredSubcommand: "install", |
| 14 | +// TrimSuffixes: "&|;", |
| 15 | +// } |
| 16 | +// packages := extractor.ExtractPackages("pip install requests") |
| 17 | +// // Returns: []string{"requests"} |
| 18 | +// |
| 19 | +// For package-specific extraction, see npm.go, pip.go, and dependabot.go. |
| 20 | +// For validation, see validation.go. |
| 21 | +package workflow |
| 22 | + |
| 23 | +import ( |
| 24 | + "strings" |
| 25 | +) |
| 26 | + |
| 27 | +// PackageExtractor provides a configurable framework for extracting package names |
| 28 | +// from command-line strings. It can be configured to handle different package |
| 29 | +// managers (npm, pip, uv, go) by setting the appropriate command names and options. |
| 30 | +type PackageExtractor struct { |
| 31 | + // CommandNames is the list of command names to look for (e.g., ["pip", "pip3"]) |
| 32 | + CommandNames []string |
| 33 | + |
| 34 | + // RequiredSubcommand is the subcommand that must follow the command name |
| 35 | + // (e.g., "install" for pip). If empty, the package name is expected immediately |
| 36 | + // after the command name (e.g., "npx <package>"). |
| 37 | + RequiredSubcommand string |
| 38 | + |
| 39 | + // TrimSuffixes is a string of characters to trim from the end of package names |
| 40 | + // (e.g., "&|;" for shell operators) |
| 41 | + TrimSuffixes string |
| 42 | +} |
| 43 | + |
| 44 | +// ExtractPackages extracts package names from command strings using the configured |
| 45 | +// extraction rules. It processes multi-line command strings and returns all found |
| 46 | +// package names. |
| 47 | +// |
| 48 | +// The extraction process: |
| 49 | +// 1. Split commands by newlines |
| 50 | +// 2. Split each line into words |
| 51 | +// 3. Find command name matches |
| 52 | +// 4. If RequiredSubcommand is set, look for that subcommand |
| 53 | +// 5. Skip flags (words starting with -) |
| 54 | +// 6. Extract package name and trim configured suffixes |
| 55 | +// 7. Return first package found per command invocation |
| 56 | +// |
| 57 | +// Example usage: |
| 58 | +// |
| 59 | +// extractor := PackageExtractor{ |
| 60 | +// CommandNames: []string{"pip", "pip3"}, |
| 61 | +// RequiredSubcommand: "install", |
| 62 | +// TrimSuffixes: "&|;", |
| 63 | +// } |
| 64 | +// packages := extractor.ExtractPackages("pip install requests==2.28.0") |
| 65 | +// // Returns: []string{"requests==2.28.0"} |
| 66 | +func (pe *PackageExtractor) ExtractPackages(commands string) []string { |
| 67 | + var packages []string |
| 68 | + lines := strings.Split(commands, "\n") |
| 69 | + |
| 70 | + for _, line := range lines { |
| 71 | + words := strings.Fields(line) |
| 72 | + for i, word := range words { |
| 73 | + // Check if this word matches one of our command names |
| 74 | + if !pe.isCommandName(word) { |
| 75 | + continue |
| 76 | + } |
| 77 | + |
| 78 | + // If we have a required subcommand, find it first |
| 79 | + if pe.RequiredSubcommand != "" { |
| 80 | + pkg := pe.extractWithSubcommand(words, i) |
| 81 | + if pkg != "" { |
| 82 | + packages = append(packages, pkg) |
| 83 | + } |
| 84 | + } else { |
| 85 | + // No subcommand required - package comes directly after command |
| 86 | + pkg := pe.extractDirectPackage(words, i) |
| 87 | + if pkg != "" { |
| 88 | + packages = append(packages, pkg) |
| 89 | + } |
| 90 | + } |
| 91 | + } |
| 92 | + } |
| 93 | + |
| 94 | + return packages |
| 95 | +} |
| 96 | + |
| 97 | +// isCommandName checks if the given word matches any of the configured command names |
| 98 | +func (pe *PackageExtractor) isCommandName(word string) bool { |
| 99 | + for _, cmdName := range pe.CommandNames { |
| 100 | + if word == cmdName { |
| 101 | + return true |
| 102 | + } |
| 103 | + } |
| 104 | + return false |
| 105 | +} |
| 106 | + |
| 107 | +// extractWithSubcommand extracts a package name when a required subcommand must be present |
| 108 | +// (e.g., "pip install <package>") |
| 109 | +func (pe *PackageExtractor) extractWithSubcommand(words []string, commandIndex int) string { |
| 110 | + // Look for the required subcommand after the command name |
| 111 | + for j := commandIndex + 1; j < len(words); j++ { |
| 112 | + if words[j] == pe.RequiredSubcommand { |
| 113 | + // Found the subcommand - now find the package name |
| 114 | + return pe.findPackageName(words, j+1) |
| 115 | + } |
| 116 | + } |
| 117 | + return "" |
| 118 | +} |
| 119 | + |
| 120 | +// extractDirectPackage extracts a package name that comes directly after the command |
| 121 | +// (e.g., "npx <package>") |
| 122 | +func (pe *PackageExtractor) extractDirectPackage(words []string, commandIndex int) string { |
| 123 | + if commandIndex+1 >= len(words) { |
| 124 | + return "" |
| 125 | + } |
| 126 | + return pe.findPackageName(words, commandIndex+1) |
| 127 | +} |
| 128 | + |
| 129 | +// findPackageName finds and processes the package name starting at the given index. |
| 130 | +// It skips flags (words starting with -) and returns the first non-flag word, |
| 131 | +// trimming configured suffixes. |
| 132 | +// |
| 133 | +// This method is exported to allow special-case extraction patterns (like uv) |
| 134 | +// to reuse the package finding logic. |
| 135 | +func (pe *PackageExtractor) FindPackageName(words []string, startIndex int) string { |
| 136 | + return pe.findPackageName(words, startIndex) |
| 137 | +} |
| 138 | + |
| 139 | +// findPackageName is the internal implementation of FindPackageName |
| 140 | +func (pe *PackageExtractor) findPackageName(words []string, startIndex int) string { |
| 141 | + for i := startIndex; i < len(words); i++ { |
| 142 | + pkg := words[i] |
| 143 | + // Skip flags (start with - or --) |
| 144 | + if strings.HasPrefix(pkg, "-") { |
| 145 | + continue |
| 146 | + } |
| 147 | + // Trim configured suffixes (e.g., shell operators) |
| 148 | + if pe.TrimSuffixes != "" { |
| 149 | + pkg = strings.TrimRight(pkg, pe.TrimSuffixes) |
| 150 | + } |
| 151 | + return pkg |
| 152 | + } |
| 153 | + return "" |
| 154 | +} |
0 commit comments