Skip to content

Commit 1295e24

Browse files
committed
perf(parser): optimize memory with streaming diff input
1 parent a1fb571 commit 1295e24

3 files changed

Lines changed: 151 additions & 118 deletions

File tree

internal/analyzer/analyzer.go

Lines changed: 62 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -111,24 +111,67 @@ func (a *Analyzer) AnalyzeChanges(totalAdded, totalRemoved int, branchName strin
111111
return msg
112112
}
113113

114-
// Default analysis based on the first change if no specific fallback applies
115-
firstChange := a.changes[0]
114+
// Initialize a score tracker for the action (type)
115+
scoreMap := make(map[string]int)
116116

117-
// Apply diff stat analysis to infer intent based on added vs deleted lines
118-
action := a.analyzeDiffStat(totalAdded, totalRemoved)
119-
if action != "" {
120-
commitMessage.Action = action
121-
} else {
122-
// Use keyword scoring algorithm to determine the best action
123-
action = a.determineActionByKeywordScoring()
124-
if action != "" {
125-
commitMessage.Action = action
126-
} else {
127-
// Fallback to default action determination
128-
commitMessage.Action = a.determineAction(firstChange)
117+
// Step 1: Scan the Branch status
118+
if branchName != "" {
119+
branchAction, branchScope := a.parseBranchName(branchName)
120+
if branchAction != "" {
121+
scoreMap[branchAction] += 3
122+
}
123+
if branchScope != "" {
124+
commitMessage.Scope = branchScope
125+
}
126+
}
127+
128+
// Step 2: Add weights from diff stat ratio
129+
statAction := a.analyzeDiffStat(totalAdded, totalRemoved)
130+
if statAction != "" {
131+
scoreMap[statAction] += 2
132+
}
133+
134+
// Step 3: Aggregate keyword scores
135+
keywordScores := a.calculateKeywordScores()
136+
for action, score := range keywordScores {
137+
scoreMap[action] += score
138+
}
139+
140+
// Step 4: Add weights from multi-file patterns
141+
multiPatterns := a.detectMultiFilePatterns()
142+
for _, p := range multiPatterns {
143+
switch p {
144+
case "feature-addition":
145+
scoreMap["feat"] += 4
146+
case "bug-fix-cascade":
147+
scoreMap["fix"] += 4
148+
case "refactor-sweep":
149+
scoreMap["refactor"] += 3
150+
case "test-suite-update":
151+
scoreMap["test"] += 4
152+
}
153+
}
154+
155+
// Step 5: Select the recommended type with the highest accumulated score
156+
bestAction := ""
157+
maxScore := -1
158+
for action, score := range scoreMap {
159+
if score > maxScore {
160+
maxScore = score
161+
bestAction = action
129162
}
130163
}
131164

165+
if bestAction != "" {
166+
commitMessage.Action = bestAction
167+
} else {
168+
// Fallback to default action determination if no signals
169+
commitMessage.Action = a.determineAction(a.changes[0])
170+
}
171+
172+
// Default analysis based on the first change if no specific fallback applies
173+
firstChange := a.changes[0]
174+
132175
// Determine other components
133176
commitMessage.Topic = a.determineTopic(firstChange.File)
134177
commitMessage.Item = a.determineItem(firstChange.File)
@@ -142,17 +185,6 @@ func (a *Analyzer) AnalyzeChanges(totalAdded, totalRemoved int, branchName strin
142185
}
143186
}
144187

145-
// NEW: Extract intent from branch name
146-
if branchName != "" {
147-
branchAction, branchScope := a.parseBranchName(branchName)
148-
if branchAction != "" {
149-
commitMessage.Action = branchAction
150-
}
151-
if branchScope != "" {
152-
commitMessage.Scope = branchScope
153-
}
154-
}
155-
156188
// NEW: Monitoring Dependency Changes (Dependency Watcher)
157189
newDeps := a.detectNewDependencies()
158190
if len(newDeps) > 0 {
@@ -179,33 +211,14 @@ func (a *Analyzer) AnalyzeChanges(totalAdded, totalRemoved int, branchName strin
179211
}
180212
}
181213

182-
// Detect multi-file patterns
183-
multiPatterns := a.detectMultiFilePatterns()
184-
if len(multiPatterns) > 0 {
185-
// Adjust action and purpose based on multi-file patterns
186-
if contains(multiPatterns, "feature-addition") {
187-
commitMessage.Action = "feat"
188-
commitMessage.Purpose = "add new feature across multiple modules"
189-
} else if contains(multiPatterns, "bug-fix-cascade") {
190-
commitMessage.Action = "fix"
191-
commitMessage.Purpose = "resolve issue across multiple components"
192-
} else if contains(multiPatterns, "refactor-sweep") {
193-
commitMessage.Action = "refactor"
194-
commitMessage.Purpose = "restructure and improve code organization"
195-
} else if contains(multiPatterns, "test-suite-update") {
196-
commitMessage.Action = "test"
197-
commitMessage.Purpose = "update test suite"
198-
}
199-
}
200-
201214
return commitMessage
202215
}
203216

204-
// determineActionByKeywordScoring analyzes git diff content and scores keywords to determine the best action
205-
// This implements the keyword scoring algorithm requirement
206-
func (a *Analyzer) determineActionByKeywordScoring() string {
217+
// calculateKeywordScores analyzes git diff content and returns a map of scores for each action
218+
func (a *Analyzer) calculateKeywordScores() map[string]int {
219+
actionScores := make(map[string]int)
207220
if len(a.config.Keywords) == 0 {
208-
return "" // No keywords configured, fall back to default logic
221+
return actionScores
209222
}
210223

211224
// Concatenate all diffs
@@ -216,9 +229,6 @@ func (a *Analyzer) determineActionByKeywordScoring() string {
216229
}
217230
diffContent := strings.ToLower(allDiffs.String())
218231

219-
// Score each action based on keyword matches
220-
actionScores := make(map[string]int)
221-
222232
for action, keywords := range a.config.Keywords {
223233
score := 0
224234
for keyword, weight := range keywords {
@@ -230,22 +240,7 @@ func (a *Analyzer) determineActionByKeywordScoring() string {
230240
actionScores[action] = score
231241
}
232242

233-
// Find the action with the highest score
234-
maxScore := 0
235-
bestAction := ""
236-
for action, score := range actionScores {
237-
if score > maxScore {
238-
maxScore = score
239-
bestAction = action
240-
}
241-
}
242-
243-
// Only return the action if the score is significant (> 0)
244-
if maxScore > 0 {
245-
return bestAction
246-
}
247-
248-
return ""
243+
return actionScores
249244
}
250245

251246
// detectIntelligentScope determines the best scope based on file paths and patterns

internal/analyzer/analyzer_test.go

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,3 +227,43 @@ func TestStructureDetectionRegex(t *testing.T) {
227227
}
228228
})
229229
}
230+
231+
func TestCrossScoringMatrix(t *testing.T) {
232+
cfg := &config.Config{
233+
Keywords: map[string]map[string]int{
234+
"fix": {"error": 4},
235+
},
236+
}
237+
238+
t.Run("Branch overrides keyword if score is higher", func(t *testing.T) {
239+
a := &Analyzer{
240+
config: cfg,
241+
changes: []*parser.Change{
242+
{File: "main.go", Diff: "+ fmt.Println(\"error\")"},
243+
},
244+
}
245+
// branch "feat/new-ui" adds 3 to feat
246+
// "error" keyword adds 4 to fix
247+
// fix (4) > feat (3) -> fix
248+
msg := a.AnalyzeChanges(1, 0, "feat/new-ui")
249+
if msg.Action != "fix" {
250+
t.Errorf("Expected action fix, got %s", msg.Action)
251+
}
252+
})
253+
254+
t.Run("Combined signals", func(t *testing.T) {
255+
a := &Analyzer{
256+
config: cfg,
257+
changes: []*parser.Change{
258+
{File: "main.go", Diff: "+ func NewFeature() {", Added: 40, Removed: 0},
259+
},
260+
}
261+
// branch "feature/cool" adds 3 to feat
262+
// ratio 1.0 adds 2 to feat (added > 30)
263+
// total feat = 5
264+
msg := a.AnalyzeChanges(40, 0, "feature/cool")
265+
if msg.Action != "feat" {
266+
t.Errorf("Expected action feat, got %s", msg.Action)
267+
}
268+
})
269+
}

internal/parser/git.go

Lines changed: 49 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package parser
22

33
import (
44
"bufio"
5-
"bytes"
65
"fmt"
76
"os/exec"
87
"path/filepath"
@@ -39,100 +38,86 @@ func NewGitParser() *GitParser {
3938
func (p *GitParser) ParseStagedChanges() ([]*Change, error) {
4039
// Use git status --porcelain for more accurate file state detection
4140
cmd := exec.Command("git", "status", "--porcelain")
42-
var out bytes.Buffer
43-
cmd.Stdout = &out
44-
err := cmd.Run()
41+
stdout, err := cmd.StdoutPipe()
4542
if err != nil {
46-
return nil, fmt.Errorf("error running git status --porcelain: %w", err)
43+
return nil, fmt.Errorf("error creating stdout pipe for git status: %w", err)
44+
}
45+
46+
if err := cmd.Start(); err != nil {
47+
return nil, fmt.Errorf("error starting git status: %w", err)
4748
}
4849

4950
var changes []*Change
50-
scanner := bufio.NewScanner(&out)
51+
scanner := bufio.NewScanner(stdout)
5152
for scanner.Scan() {
5253
line := scanner.Text()
5354
if len(line) < 3 {
5455
continue
5556
}
5657

5758
// Porcelain format: XY filename
58-
// X = staged status, Y = unstaged status
5959
stagedStatus := line[0:1]
60-
// unstagedStatus := line[1:2]
6160
filename := strings.TrimSpace(line[3:])
6261

63-
// Skip if not staged (staged status is space)
62+
// Skip if not staged
6463
if stagedStatus == " " || stagedStatus == "?" {
6564
continue
6665
}
6766

68-
// Map porcelain status to action
6967
action := stagedStatus
70-
switch stagedStatus {
71-
case "M":
72-
action = "M" // Modified
73-
case "A":
74-
action = "A" // Added
75-
case "D":
76-
action = "D" // Deleted
77-
case "R":
78-
action = "R" // Renamed
79-
case "C":
80-
action = "C" // Copied
81-
}
82-
8368
change := &Change{
8469
File: filename,
8570
Action: action,
8671
FileExtension: getFileExtension(filename),
8772
}
8873

89-
// Handle renames and copies (format: "R oldname -> newname")
74+
// Handle renames and copies
9075
if action == "R" || action == "C" {
9176
parts := strings.Split(filename, " -> ")
9277
if len(parts) == 2 {
9378
change.IsRename = action == "R"
9479
change.IsCopy = action == "C"
9580
change.Source = strings.TrimSpace(parts[0])
9681
change.Target = strings.TrimSpace(parts[1])
97-
change.File = change.Target // Use the new name as the file
82+
change.File = change.Target
9883
change.FileExtension = getFileExtension(change.Target)
9984
}
10085
}
10186

102-
// Get the diff for the file
87+
// Get the diff for the file using streaming
10388
diffCmd := exec.Command("git", "diff", "--cached", "-U0", "--", change.File)
104-
var diffOut bytes.Buffer
105-
diffCmd.Stdout = &diffOut
106-
err := diffCmd.Run()
107-
if err != nil && action != "D" {
108-
// For deleted files, diff may fail, which is expected
109-
return nil, fmt.Errorf("error running git diff for %s: %w", change.File, err)
110-
}
111-
change.Diff = diffOut.String()
112-
113-
// Count added and removed lines
114-
diffScanner := bufio.NewScanner(strings.NewReader(change.Diff))
115-
for diffScanner.Scan() {
116-
diffLine := diffScanner.Text()
117-
if strings.HasPrefix(diffLine, "+") && !strings.HasPrefix(diffLine, "+++") {
118-
change.Added++
119-
} else if strings.HasPrefix(diffLine, "-") && !strings.HasPrefix(diffLine, "---") {
120-
change.Removed++
89+
diffStdout, err := diffCmd.StdoutPipe()
90+
if err == nil {
91+
if err := diffCmd.Start(); err == nil {
92+
diffScanner := bufio.NewScanner(diffStdout)
93+
var diffBuilder strings.Builder
94+
for diffScanner.Scan() {
95+
diffLine := diffScanner.Text()
96+
if strings.HasPrefix(diffLine, "+") && !strings.HasPrefix(diffLine, "+++") {
97+
change.Added++
98+
} else if strings.HasPrefix(diffLine, "-") && !strings.HasPrefix(diffLine, "---") {
99+
change.Removed++
100+
}
101+
diffBuilder.WriteString(diffLine)
102+
diffBuilder.WriteString("\n")
103+
}
104+
change.Diff = diffBuilder.String()
105+
diffCmd.Wait()
121106
}
122107
}
108+
123109
p.TotalAdded += change.Added
124110
p.TotalRemoved += change.Removed
125111

126-
// Detect large changes
127112
if (change.Added + change.Removed) >= 500 {
128113
change.IsMajor = true
129114
}
130115

131116
changes = append(changes, change)
132117
}
133118

134-
if err := scanner.Err(); err != nil {
135-
return nil, fmt.Errorf("error scanning git status output: %w", err)
119+
if err := cmd.Wait(); err != nil {
120+
return nil, fmt.Errorf("error waiting for git status: %w", err)
136121
}
137122

138123
return changes, nil
@@ -141,13 +126,26 @@ func (p *GitParser) ParseStagedChanges() ([]*Change, error) {
141126
// GetCurrentBranch returns the name of the current git branch
142127
func (p *GitParser) GetCurrentBranch() (string, error) {
143128
cmd := exec.Command("git", "rev-parse", "--abbrev-ref", "HEAD")
144-
var out bytes.Buffer
145-
cmd.Stdout = &out
146-
err := cmd.Run()
129+
stdout, err := cmd.StdoutPipe()
147130
if err != nil {
148-
return "", fmt.Errorf("error getting current branch: %w", err)
131+
return "", fmt.Errorf("error creating stdout pipe for rev-parse: %w", err)
149132
}
150-
return strings.TrimSpace(out.String()), nil
133+
134+
if err := cmd.Start(); err != nil {
135+
return "", fmt.Errorf("error starting rev-parse: %w", err)
136+
}
137+
138+
var branch string
139+
scanner := bufio.NewScanner(stdout)
140+
if scanner.Scan() {
141+
branch = strings.TrimSpace(scanner.Text())
142+
}
143+
144+
if err := cmd.Wait(); err != nil {
145+
return "", fmt.Errorf("error waiting for rev-parse: %w", err)
146+
}
147+
148+
return branch, nil
151149
}
152150

153151
// getFileExtension returns the file extension of a given file path

0 commit comments

Comments
 (0)