Skip to content

Commit fe8fc89

Browse files
authored
Create package extraction framework to eliminate duplication (#3762)
1 parent d9ed877 commit fe8fc89

5 files changed

Lines changed: 584 additions & 90 deletions

File tree

pkg/workflow/dependabot.go

Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -603,29 +603,22 @@ func extractGoPackages(workflowData *WorkflowData) []string {
603603
// extractGoFromCommands extracts Go package paths from command strings
604604
func extractGoFromCommands(commands string) []string {
605605
var packages []string
606-
lines := strings.Split(commands, "\n")
607-
608-
for _, line := range lines {
609-
// Look for "go install <package>" or "go get <package>" patterns
610-
words := strings.Fields(line)
611-
for i, word := range words {
612-
if word == "go" && i+1 < len(words) {
613-
cmd := words[i+1]
614-
if cmd == "install" || cmd == "get" {
615-
// Find the package path
616-
for j := i + 2; j < len(words); j++ {
617-
pkg := words[j]
618-
pkg = strings.TrimRight(pkg, "&|;")
619-
// Skip flags (start with - or --)
620-
if !strings.HasPrefix(pkg, "-") {
621-
packages = append(packages, pkg)
622-
break
623-
}
624-
}
625-
}
626-
}
627-
}
606+
607+
// Extract "go install <package>" pattern
608+
installExtractor := PackageExtractor{
609+
CommandNames: []string{"go"},
610+
RequiredSubcommand: "install",
611+
TrimSuffixes: "&|;",
612+
}
613+
packages = append(packages, installExtractor.ExtractPackages(commands)...)
614+
615+
// Extract "go get <package>" pattern
616+
getExtractor := PackageExtractor{
617+
CommandNames: []string{"go"},
618+
RequiredSubcommand: "get",
619+
TrimSuffixes: "&|;",
628620
}
621+
packages = append(packages, getExtractor.ExtractPackages(commands)...)
629622

630623
return packages
631624
}

pkg/workflow/npm.go

Lines changed: 5 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -15,38 +15,17 @@
1515
// For detailed documentation, see specs/validation-architecture.md
1616
package workflow
1717

18-
import (
19-
"strings"
20-
)
21-
2218
// extractNpxPackages extracts npx package names from workflow data
2319
func extractNpxPackages(workflowData *WorkflowData) []string {
2420
return collectPackagesFromWorkflow(workflowData, extractNpxFromCommands, "npx")
2521
}
2622

2723
// extractNpxFromCommands extracts npx package names from command strings
2824
func extractNpxFromCommands(commands string) []string {
29-
var packages []string
30-
lines := strings.Split(commands, "\n")
31-
32-
for _, line := range lines {
33-
// Look for "npx <package>" pattern
34-
words := strings.Fields(line)
35-
for i, word := range words {
36-
if word == "npx" && i+1 < len(words) {
37-
// Skip flags and find the first package name
38-
for j := i + 1; j < len(words); j++ {
39-
pkg := words[j]
40-
pkg = strings.TrimRight(pkg, "&|;")
41-
// Skip flags (start with - or --)
42-
if !strings.HasPrefix(pkg, "-") {
43-
packages = append(packages, pkg)
44-
break
45-
}
46-
}
47-
}
48-
}
25+
extractor := PackageExtractor{
26+
CommandNames: []string{"npx"},
27+
RequiredSubcommand: "",
28+
TrimSuffixes: "&|;",
4929
}
50-
51-
return packages
30+
return extractor.ExtractPackages(commands)
5231
}

pkg/workflow/package_extraction.go

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
// Package workflow provides generic package extraction utilities for agentic workflows.
2+
//
3+
// # Package Extraction Framework
4+
//
5+
// This file provides a generic framework for extracting package names from command strings.
6+
// The PackageExtractor type can be configured to handle different package managers
7+
// (npm, pip, uv, go, etc.) with minimal code duplication.
8+
//
9+
// # Usage Example
10+
//
11+
// extractor := PackageExtractor{
12+
// CommandNames: []string{"pip", "pip3"},
13+
// RequiredSubcommand: "install",
14+
// TrimSuffixes: "&|;",
15+
// }
16+
// packages := extractor.ExtractPackages("pip install requests")
17+
// // Returns: []string{"requests"}
18+
//
19+
// For package-specific extraction, see npm.go, pip.go, and dependabot.go.
20+
// For validation, see validation.go.
21+
package workflow
22+
23+
import (
24+
"strings"
25+
)
26+
27+
// PackageExtractor provides a configurable framework for extracting package names
28+
// from command-line strings. It can be configured to handle different package
29+
// managers (npm, pip, uv, go) by setting the appropriate command names and options.
30+
type PackageExtractor struct {
31+
// CommandNames is the list of command names to look for (e.g., ["pip", "pip3"])
32+
CommandNames []string
33+
34+
// RequiredSubcommand is the subcommand that must follow the command name
35+
// (e.g., "install" for pip). If empty, the package name is expected immediately
36+
// after the command name (e.g., "npx <package>").
37+
RequiredSubcommand string
38+
39+
// TrimSuffixes is a string of characters to trim from the end of package names
40+
// (e.g., "&|;" for shell operators)
41+
TrimSuffixes string
42+
}
43+
44+
// ExtractPackages extracts package names from command strings using the configured
45+
// extraction rules. It processes multi-line command strings and returns all found
46+
// package names.
47+
//
48+
// The extraction process:
49+
// 1. Split commands by newlines
50+
// 2. Split each line into words
51+
// 3. Find command name matches
52+
// 4. If RequiredSubcommand is set, look for that subcommand
53+
// 5. Skip flags (words starting with -)
54+
// 6. Extract package name and trim configured suffixes
55+
// 7. Return first package found per command invocation
56+
//
57+
// Example usage:
58+
//
59+
// extractor := PackageExtractor{
60+
// CommandNames: []string{"pip", "pip3"},
61+
// RequiredSubcommand: "install",
62+
// TrimSuffixes: "&|;",
63+
// }
64+
// packages := extractor.ExtractPackages("pip install requests==2.28.0")
65+
// // Returns: []string{"requests==2.28.0"}
66+
func (pe *PackageExtractor) ExtractPackages(commands string) []string {
67+
var packages []string
68+
lines := strings.Split(commands, "\n")
69+
70+
for _, line := range lines {
71+
words := strings.Fields(line)
72+
for i, word := range words {
73+
// Check if this word matches one of our command names
74+
if !pe.isCommandName(word) {
75+
continue
76+
}
77+
78+
// If we have a required subcommand, find it first
79+
if pe.RequiredSubcommand != "" {
80+
pkg := pe.extractWithSubcommand(words, i)
81+
if pkg != "" {
82+
packages = append(packages, pkg)
83+
}
84+
} else {
85+
// No subcommand required - package comes directly after command
86+
pkg := pe.extractDirectPackage(words, i)
87+
if pkg != "" {
88+
packages = append(packages, pkg)
89+
}
90+
}
91+
}
92+
}
93+
94+
return packages
95+
}
96+
97+
// isCommandName checks if the given word matches any of the configured command names
98+
func (pe *PackageExtractor) isCommandName(word string) bool {
99+
for _, cmdName := range pe.CommandNames {
100+
if word == cmdName {
101+
return true
102+
}
103+
}
104+
return false
105+
}
106+
107+
// extractWithSubcommand extracts a package name when a required subcommand must be present
108+
// (e.g., "pip install <package>")
109+
func (pe *PackageExtractor) extractWithSubcommand(words []string, commandIndex int) string {
110+
// Look for the required subcommand after the command name
111+
for j := commandIndex + 1; j < len(words); j++ {
112+
if words[j] == pe.RequiredSubcommand {
113+
// Found the subcommand - now find the package name
114+
return pe.findPackageName(words, j+1)
115+
}
116+
}
117+
return ""
118+
}
119+
120+
// extractDirectPackage extracts a package name that comes directly after the command
121+
// (e.g., "npx <package>")
122+
func (pe *PackageExtractor) extractDirectPackage(words []string, commandIndex int) string {
123+
if commandIndex+1 >= len(words) {
124+
return ""
125+
}
126+
return pe.findPackageName(words, commandIndex+1)
127+
}
128+
129+
// findPackageName finds and processes the package name starting at the given index.
130+
// It skips flags (words starting with -) and returns the first non-flag word,
131+
// trimming configured suffixes.
132+
//
133+
// This method is exported to allow special-case extraction patterns (like uv)
134+
// to reuse the package finding logic.
135+
func (pe *PackageExtractor) FindPackageName(words []string, startIndex int) string {
136+
return pe.findPackageName(words, startIndex)
137+
}
138+
139+
// findPackageName is the internal implementation of FindPackageName
140+
func (pe *PackageExtractor) findPackageName(words []string, startIndex int) string {
141+
for i := startIndex; i < len(words); i++ {
142+
pkg := words[i]
143+
// Skip flags (start with - or --)
144+
if strings.HasPrefix(pkg, "-") {
145+
continue
146+
}
147+
// Trim configured suffixes (e.g., shell operators)
148+
if pe.TrimSuffixes != "" {
149+
pkg = strings.TrimRight(pkg, pe.TrimSuffixes)
150+
}
151+
return pkg
152+
}
153+
return ""
154+
}

0 commit comments

Comments
 (0)