Skip to content

Commit b5ad3e1

Browse files
authored
Refactor duplicated workflow YAML parsing and promote shared slice/sanitization utilities (#30926)
1 parent 4f64876 commit b5ad3e1

16 files changed

Lines changed: 368 additions & 217 deletions

pkg/sliceutil/README.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ All functions in this package are pure: they never modify their input. They are
1818
| `FilterMapKeys` | `func[K comparable, V any](m map[K]V, predicate func(K, V) bool) []K` | Returns map keys for which `predicate(key, value)` is `true`; order is not guaranteed |
1919
| `Any` | `func[T any](slice []T, predicate func(T) bool) bool` | Returns `true` if at least one element satisfies `predicate`; returns `false` for nil or empty slices |
2020
| `Deduplicate` | `func[T comparable](slice []T) []T` | Returns a new slice with duplicate elements removed, preserving order of first occurrence |
21+
| `MergeUnique` | `func[T comparable](base []T, extra ...T) []T` | Returns a deduplicated slice starting with `base` and appending unseen values from `extra` |
22+
| `Exclude` | `func[T comparable](base []T, exclude ...T) []T` | Returns a new slice with all `exclude` values removed while preserving order |
2123

2224
## Usage Examples
2325

@@ -38,12 +40,20 @@ upper := sliceutil.Map(names, strings.ToUpper)
3840
items := []string{"a", "b", "a", "c"}
3941
unique := sliceutil.Deduplicate(items)
4042
// unique = ["a", "b", "c"]
43+
44+
// Merge unique values
45+
merged := sliceutil.MergeUnique([]string{"a", "b"}, "b", "c")
46+
// merged = ["a", "b", "c"]
47+
48+
// Exclude values
49+
filtered := sliceutil.Exclude([]string{"a", "b", "c"}, "b")
50+
// filtered = ["a", "c"]
4151
```
4252

4353
## Design Notes
4454

4555
- `Any` is implemented via `slices.ContainsFunc` from the standard library.
46-
- `Deduplicate` uses a `map[T]bool` for O(n) time complexity.
56+
- `Deduplicate`, `MergeUnique`, and `Exclude` use hash sets (`map[T]struct{}`) for O(n) behavior.
4757
- None of these functions sort their output; callers that require sorted results should call `slices.Sort` on the returned slice.
4858

4959
---

pkg/sliceutil/sliceutil.go

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,45 @@ func Deduplicate[T comparable](slice []T) []T {
7272
}
7373
return result
7474
}
75+
76+
// MergeUnique returns a deduplicated slice that starts with base and appends any
77+
// items from extra that are not already present in base. Order is preserved.
78+
func MergeUnique[T comparable](base []T, extra ...T) []T {
79+
seen := make(map[T]struct{}, len(base)+len(extra))
80+
result := make([]T, 0, len(base)+len(extra))
81+
for _, item := range base {
82+
if _, exists := seen[item]; !exists {
83+
seen[item] = struct{}{}
84+
result = append(result, item)
85+
}
86+
}
87+
for _, item := range extra {
88+
if _, exists := seen[item]; !exists {
89+
seen[item] = struct{}{}
90+
result = append(result, item)
91+
}
92+
}
93+
return result
94+
}
95+
96+
// Exclude returns a new slice containing the items from base that do not appear
97+
// in the exclude set. Order of remaining items is preserved.
98+
// Always returns a fresh slice (never aliases base) even when no items are removed.
99+
func Exclude[T comparable](base []T, exclude ...T) []T {
100+
if len(exclude) == 0 {
101+
return append([]T(nil), base...)
102+
}
103+
104+
excluded := make(map[T]struct{}, len(exclude))
105+
for _, item := range exclude {
106+
excluded[item] = struct{}{}
107+
}
108+
109+
result := make([]T, 0, len(base))
110+
for _, item := range base {
111+
if _, isExcluded := excluded[item]; !isExcluded {
112+
result = append(result, item)
113+
}
114+
}
115+
return result
116+
}

pkg/sliceutil/sliceutil_test.go

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,3 +297,64 @@ func TestAny_StopsEarly(t *testing.T) {
297297
})
298298
assert.Equal(t, 2, callCount, "Any should stop evaluating after first match")
299299
}
300+
301+
func TestMergeUnique(t *testing.T) {
302+
tests := []struct {
303+
name string
304+
base []string
305+
extra []string
306+
expected []string
307+
}{
308+
{
309+
name: "deduplicates base and extra preserving first seen order",
310+
base: []string{"a", "b", "a"},
311+
extra: []string{"b", "c", "a", "d"},
312+
expected: []string{"a", "b", "c", "d"},
313+
},
314+
{
315+
name: "nil base with extra values",
316+
base: nil,
317+
extra: []string{"x", "x", "y"},
318+
expected: []string{"x", "y"},
319+
},
320+
}
321+
322+
for _, tt := range tests {
323+
t.Run(tt.name, func(t *testing.T) {
324+
result := MergeUnique(tt.base, tt.extra...)
325+
assert.Equal(t, tt.expected, result, "MergeUnique should return deduplicated merged slice")
326+
})
327+
}
328+
}
329+
330+
func TestExclude(t *testing.T) {
331+
tests := []struct {
332+
name string
333+
base []string
334+
exclude []string
335+
expected []string
336+
}{
337+
{
338+
name: "excludes matching values while preserving order",
339+
base: []string{"a", "b", "c", "b"},
340+
exclude: []string{"b"},
341+
expected: []string{"a", "c"},
342+
},
343+
{
344+
name: "no excludes returns cloned slice",
345+
base: []string{"a", "b"},
346+
exclude: nil,
347+
expected: []string{"a", "b"},
348+
},
349+
}
350+
351+
for _, tt := range tests {
352+
t.Run(tt.name, func(t *testing.T) {
353+
result := Exclude(tt.base, tt.exclude...)
354+
assert.Equal(t, tt.expected, result, "Exclude should remove excluded elements")
355+
if len(tt.exclude) == 0 && len(tt.base) > 0 {
356+
assert.NotSame(t, &tt.base[0], &result[0], "Exclude should always return a fresh slice copy")
357+
}
358+
})
359+
}
360+
}

pkg/stringutil/sanitize.go

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,16 @@ package stringutil
22

33
import (
44
"regexp"
5+
"slices"
56
"strings"
67

78
"github.com/github/gh-aw/pkg/logger"
89
)
910

1011
var sanitizeLog = logger.New("stringutil:sanitize")
1112

13+
var multipleHyphens = regexp.MustCompile(`-+`)
14+
1215
// Regex patterns for detecting potential secret key names
1316
var (
1417
// Match uppercase snake_case identifiers that look like secret names (e.g., MY_SECRET_KEY, GITHUB_TOKEN, API_KEY)
@@ -46,6 +49,100 @@ var (
4649
}
4750
)
4851

52+
// SanitizeOptions configures the behavior of the SanitizeName function.
53+
type SanitizeOptions struct {
54+
// PreserveSpecialChars is a list of special characters to preserve during sanitization.
55+
// Common characters include '.', '_'. If nil or empty, only alphanumeric and hyphens are preserved.
56+
PreserveSpecialChars []rune
57+
58+
// TrimHyphens controls whether leading and trailing hyphens are removed from the result.
59+
// When true, hyphens at the start and end of the sanitized name are trimmed.
60+
TrimHyphens bool
61+
62+
// DefaultValue is returned when the sanitized name is empty after all transformations.
63+
// If empty string, no default is applied.
64+
DefaultValue string
65+
}
66+
67+
// SanitizeName sanitizes a string for use as an identifier, file name, or similar context.
68+
// It provides configurable behavior through the SanitizeOptions parameter.
69+
func SanitizeName(name string, opts *SanitizeOptions) string {
70+
if sanitizeLog.Enabled() {
71+
preserveCount := 0
72+
trimHyphens := false
73+
if opts != nil {
74+
preserveCount = len(opts.PreserveSpecialChars)
75+
trimHyphens = opts.TrimHyphens
76+
}
77+
sanitizeLog.Printf("Sanitizing name: input=%q, preserve_chars=%d, trim_hyphens=%t",
78+
name, preserveCount, trimHyphens)
79+
}
80+
81+
// Handle nil options
82+
if opts == nil {
83+
opts = &SanitizeOptions{}
84+
}
85+
86+
// Convert to lowercase
87+
result := strings.ToLower(name)
88+
89+
// Replace common separators with hyphens
90+
result = strings.ReplaceAll(result, ":", "-")
91+
result = strings.ReplaceAll(result, "\\", "-")
92+
result = strings.ReplaceAll(result, "/", "-")
93+
result = strings.ReplaceAll(result, " ", "-")
94+
95+
// Check if underscores should be preserved
96+
preserveUnderscore := slices.Contains(opts.PreserveSpecialChars, '_')
97+
98+
// Replace underscores with hyphens if not preserved
99+
if !preserveUnderscore {
100+
result = strings.ReplaceAll(result, "_", "-")
101+
}
102+
103+
// Build character preservation pattern based on options
104+
var preserveChars strings.Builder
105+
preserveChars.WriteString("a-z0-9-") // Always preserve alphanumeric and hyphens
106+
if len(opts.PreserveSpecialChars) > 0 {
107+
for _, char := range opts.PreserveSpecialChars {
108+
// Escape special regex characters
109+
switch char {
110+
case '.', '_':
111+
preserveChars.WriteRune(char)
112+
}
113+
}
114+
}
115+
116+
// Create pattern for characters to remove/replace
117+
pattern := regexp.MustCompile(`[^` + preserveChars.String() + `]+`)
118+
119+
// Replace unwanted characters with hyphens or empty based on context
120+
if len(opts.PreserveSpecialChars) > 0 {
121+
// Replace with hyphens (SanitizeWorkflowName behavior)
122+
result = pattern.ReplaceAllString(result, "-")
123+
} else {
124+
// Remove completely (SanitizeIdentifier behavior)
125+
result = pattern.ReplaceAllString(result, "")
126+
}
127+
128+
// Consolidate multiple consecutive hyphens into a single hyphen
129+
result = multipleHyphens.ReplaceAllString(result, "-")
130+
131+
// Optionally trim leading/trailing hyphens
132+
if opts.TrimHyphens {
133+
result = strings.Trim(result, "-")
134+
}
135+
136+
// Return default value if result is empty
137+
if result == "" && opts.DefaultValue != "" {
138+
sanitizeLog.Printf("Sanitized name is empty, using default: %q", opts.DefaultValue)
139+
return opts.DefaultValue
140+
}
141+
142+
sanitizeLog.Printf("Sanitized name result: %q", result)
143+
return result
144+
}
145+
49146
// SanitizeErrorMessage removes potential secret key names from error messages to prevent
50147
// information disclosure via logs. This prevents exposing details about an organization's
51148
// security infrastructure by redacting secret key names that might appear in error messages.

pkg/stringutil/sanitize_test.go

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -668,3 +668,45 @@ func BenchmarkSanitizeForFilename(b *testing.B) {
668668
SanitizeForFilename(slug)
669669
}
670670
}
671+
672+
func TestSanitizeName(t *testing.T) {
673+
tests := []struct {
674+
name string
675+
input string
676+
opts *SanitizeOptions
677+
expected string
678+
}{
679+
{
680+
name: "nil options remove special chars",
681+
input: "My Workflow@123",
682+
opts: nil,
683+
expected: "my-workflow123",
684+
},
685+
{
686+
name: "preserve dot and underscore",
687+
input: "My.Workflow_Name",
688+
opts: &SanitizeOptions{
689+
PreserveSpecialChars: []rune{'.', '_'},
690+
},
691+
expected: "my.workflow_name",
692+
},
693+
{
694+
name: "trim and default when empty",
695+
input: "@@@",
696+
opts: &SanitizeOptions{
697+
TrimHyphens: true,
698+
DefaultValue: "default-name",
699+
},
700+
expected: "default-name",
701+
},
702+
}
703+
704+
for _, tt := range tests {
705+
t.Run(tt.name, func(t *testing.T) {
706+
result := SanitizeName(tt.input, tt.opts)
707+
if result != tt.expected {
708+
t.Errorf("SanitizeName(%q) = %q; want %q", tt.input, result, tt.expected)
709+
}
710+
})
711+
}
712+
}

pkg/workflow/call_workflow_permissions.go

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,9 @@ package workflow
33
import (
44
"fmt"
55
"os"
6-
"path/filepath"
76

87
"github.com/github/gh-aw/pkg/logger"
98
"github.com/github/gh-aw/pkg/parser"
10-
"github.com/goccy/go-yaml"
119
)
1210

1311
var callWorkflowPermissionsLog = logger.New("workflow:call_workflow_permissions")
@@ -85,17 +83,9 @@ func extractCallWorkflowPermissions(workflowName, markdownPath string) (*Permiss
8583
// extractPermissionsFromYAMLFile reads a .lock.yml or .yml workflow file, parses it,
8684
// and returns the merged permissions from all its jobs.
8785
func extractPermissionsFromYAMLFile(filePath string) (*Permissions, error) {
88-
cleanPath := filepath.Clean(filePath)
89-
// filePath originates from findWorkflowFile(), which validates all paths via
90-
// isPathWithinDir() to prevent directory traversal before returning them.
91-
content, err := os.ReadFile(cleanPath) // #nosec G304 -- path pre-validated by findWorkflowFile() via isPathWithinDir()
86+
workflow, err := readWorkflowYAML(filePath)
9287
if err != nil {
93-
return nil, fmt.Errorf("failed to read workflow file %s: %w", filePath, err)
94-
}
95-
96-
var workflow map[string]any
97-
if err := yaml.Unmarshal(content, &workflow); err != nil {
98-
return nil, fmt.Errorf("failed to parse workflow file %s: %w", filePath, err)
88+
return nil, err
9989
}
10090

10191
perms := extractJobPermissionsFromParsedWorkflow(workflow)

pkg/workflow/call_workflow_secrets.go

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,9 @@ package workflow
22

33
import (
44
"fmt"
5-
"os"
6-
"path/filepath"
75
"sort"
86

97
"github.com/github/gh-aw/pkg/logger"
10-
"github.com/goccy/go-yaml"
118
)
129

1310
var callWorkflowSecretsLog = logger.New("workflow:call_workflow_secrets")
@@ -45,17 +42,9 @@ func extractCallWorkflowSecrets(workflowName, markdownPath string) ([]string, er
4542
// extractSecretsFromWorkflowFile parses a .lock.yml or .yml workflow file and returns
4643
// the secret names declared in its on.workflow_call.secrets section.
4744
func extractSecretsFromWorkflowFile(filePath string) ([]string, error) {
48-
cleanPath := filepath.Clean(filePath)
49-
// filePath originates from findWorkflowFile(), which validates all paths via
50-
// isPathWithinDir() to prevent directory traversal before returning them.
51-
content, err := os.ReadFile(cleanPath) // #nosec G304 -- path pre-validated by findWorkflowFile() via isPathWithinDir()
45+
workflow, err := readWorkflowYAML(filePath)
5246
if err != nil {
53-
return nil, fmt.Errorf("failed to read workflow file %s: %w", filePath, err)
54-
}
55-
56-
var workflow map[string]any
57-
if err := yaml.Unmarshal(content, &workflow); err != nil {
58-
return nil, fmt.Errorf("failed to parse workflow file %s: %w", filePath, err)
47+
return nil, err
5948
}
6049

6150
secrets := extractWorkflowCallSecretsFromParsed(workflow)

pkg/workflow/call_workflow_validation.go

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -181,19 +181,9 @@ func (c *Compiler) validateCallWorkflow(data *WorkflowData, workflowPath string)
181181
// extractWorkflowCallInputs parses a workflow file and extracts the workflow_call inputs schema.
182182
// Returns a map of input definitions that can be used to generate MCP tool schemas.
183183
func extractWorkflowCallInputs(workflowPath string) (map[string]any, error) {
184-
cleanPath := filepath.Clean(workflowPath)
185-
if !filepath.IsAbs(cleanPath) {
186-
return nil, fmt.Errorf("workflow path must be absolute: %s", workflowPath)
187-
}
188-
189-
workflowContent, err := os.ReadFile(cleanPath) // #nosec G304 -- Path is sanitized above
184+
workflow, err := readWorkflowYAML(workflowPath)
190185
if err != nil {
191-
return nil, fmt.Errorf("failed to read workflow file %s: %w", workflowPath, err)
192-
}
193-
194-
var workflow map[string]any
195-
if err := yaml.Unmarshal(workflowContent, &workflow); err != nil {
196-
return nil, fmt.Errorf("failed to parse workflow file %s: %w", workflowPath, err)
186+
return nil, err
197187
}
198188

199189
return extractWorkflowCallInputsFromParsed(workflow), nil

0 commit comments

Comments
 (0)