Skip to content

Commit d669491

Browse files
committed
fix: resolve critical issues in reviewer, CWE matching, diff handling, and performance
- Fix go.mod version (1.25 → 1.23) - Accumulate LLM errors instead of swallowing in reviewer - Add regex field length validation (2000 char cap) - Add git path traversal prevention with filepath.Rel() - CWE matching with word boundaries instead of naive substring - O(1) line lookup in comment mapping (pre-built map) - Move regex compilation to package level (out of hot loops) - Include concern in dedup key, skip empty paths, normalize with filepath.Clean() - Add reflection output size validation - Fix parseInt with strconv.Atoi and range validation - Add binary file detection and line number bounds in diff parser
1 parent bf6bfb2 commit d669491

10 files changed

Lines changed: 1246 additions & 44 deletions

File tree

config.go

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package sight
33
import (
44
"os"
55
"path/filepath"
6+
"strconv"
67
"strings"
78
)
89

@@ -180,14 +181,28 @@ func parseTOMLArray(s string) []string {
180181
return result
181182
}
182183

184+
// parseInt parses a string as an integer with range validation.
185+
// It reads leading digits (ignoring trailing non-digit chars for TOML compat)
186+
// and caps the result at 1,000,000 to prevent unreasonable values.
183187
func parseInt(s string) int {
184-
n := 0
185-
for _, c := range s {
186-
if c >= '0' && c <= '9' {
187-
n = n*10 + int(c-'0')
188-
} else {
189-
break
190-
}
188+
s = strings.TrimSpace(s)
189+
if s == "" {
190+
return 0
191+
}
192+
// Extract leading digits
193+
end := 0
194+
for end < len(s) && s[end] >= '0' && s[end] <= '9' {
195+
end++
196+
}
197+
if end == 0 {
198+
return 0
199+
}
200+
n, err := strconv.Atoi(s[:end])
201+
if err != nil {
202+
return 0
203+
}
204+
if n < 0 || n > 1_000_000 {
205+
return 0
191206
}
192207
return n
193208
}

coverage.out

Lines changed: 1008 additions & 0 deletions
Large diffs are not rendered by default.

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
module github.com/GrayCodeAI/sight
22

3-
go 1.25.0
3+
go 1.23.0

internal/comment/comment.go

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,16 @@ func MapToInlineFiltered(findings []FindingInput, files []diff.File, mode Filter
6565
var comments []Inline
6666
fileMap := buildFileMap(files)
6767

68+
// Pre-build line sets for O(1) lookup when using context or added modes
69+
lineSetMap := make(map[string]diffLineSet)
70+
if mode == FilterDiffContext || mode == FilterAdded {
71+
for _, f := range files {
72+
lineSetMap[f.Path] = buildDiffLineSet(f)
73+
}
74+
}
75+
6876
for _, f := range findings {
69-
diffFile, exists := fileMap[f.File]
77+
_, exists := fileMap[f.File]
7078

7179
switch mode {
7280
case FilterNone:
@@ -81,15 +89,19 @@ func MapToInlineFiltered(findings []FindingInput, files []diff.File, mode Filter
8189

8290
case FilterDiffContext:
8391
// Include if the line falls within any hunk's full range
84-
// (added, removed, or context lines)
85-
if exists && isInDiffContext(diffFile, f.Line) {
86-
comments = append(comments, buildComment(f))
92+
// (added, removed, or context lines) — O(1) lookup
93+
if exists {
94+
if ls, ok := lineSetMap[f.File]; ok && ls.contextLines[f.Line] {
95+
comments = append(comments, buildComment(f))
96+
}
8797
}
8898

8999
default: // FilterAdded
90-
// Include only if the line is within the diff's new-side range
91-
if exists && isInDiff(diffFile, f.Line) {
92-
comments = append(comments, buildComment(f))
100+
// Include only if the line is within the diff's new-side range — O(1) lookup
101+
if exists {
102+
if ls, ok := lineSetMap[f.File]; ok && ls.newRangeLines[f.Line] {
103+
comments = append(comments, buildComment(f))
104+
}
93105
}
94106
}
95107
}
@@ -105,6 +117,36 @@ func buildFileMap(files []diff.File) map[string]diff.File {
105117
return m
106118
}
107119

120+
// diffLineSet holds precomputed line number sets for O(1) lookup.
121+
type diffLineSet struct {
122+
newRangeLines map[int]bool // lines within new-side hunk ranges
123+
contextLines map[int]bool // lines within context ranges or individual line refs
124+
}
125+
126+
func buildDiffLineSet(file diff.File) diffLineSet {
127+
s := diffLineSet{
128+
newRangeLines: make(map[int]bool),
129+
contextLines: make(map[int]bool),
130+
}
131+
for _, hunk := range file.Hunks {
132+
start := hunk.NewStart
133+
end := hunk.NewStart + hunk.NewCount
134+
for i := start; i <= end; i++ {
135+
s.newRangeLines[i] = true
136+
s.contextLines[i] = true
137+
}
138+
for _, l := range hunk.Lines {
139+
if l.NewNum > 0 {
140+
s.contextLines[l.NewNum] = true
141+
}
142+
if l.OldNum > 0 {
143+
s.contextLines[l.OldNum] = true
144+
}
145+
}
146+
}
147+
return s
148+
}
149+
108150
func isInDiff(file diff.File, line int) bool {
109151
for _, hunk := range file.Hunks {
110152
start := hunk.NewStart

internal/context/git.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@ package context
33

44
import (
55
"fmt"
6+
"os"
67
"os/exec"
8+
"path/filepath"
79
"strconv"
810
"strings"
911
)
@@ -83,6 +85,10 @@ func ChangedFiles(base string) ([]string, error) {
8385

8486
// Blame runs git blame on a specific line range and returns a summary.
8587
func Blame(file string, startLine, endLine int) (string, error) {
88+
if err := validateFilePath(file); err != nil {
89+
return "", fmt.Errorf("git blame: invalid path: %w", err)
90+
}
91+
8692
args := []string{"blame", "--line-porcelain",
8793
"-L", strconv.Itoa(startLine) + "," + strconv.Itoa(endLine),
8894
"--", file}
@@ -95,7 +101,33 @@ func Blame(file string, startLine, endLine int) (string, error) {
95101
return parseBlameAuthors(string(out)), nil
96102
}
97103

104+
// validateFilePath ensures a file path does not traverse outside the working directory.
105+
func validateFilePath(file string) error {
106+
if file == "" {
107+
return fmt.Errorf("empty path")
108+
}
109+
wd, err := os.Getwd()
110+
if err != nil {
111+
return fmt.Errorf("cannot determine working directory: %w", err)
112+
}
113+
absPath := file
114+
if !filepath.IsAbs(file) {
115+
absPath = filepath.Join(wd, file)
116+
}
117+
rel, err := filepath.Rel(wd, absPath)
118+
if err != nil {
119+
return fmt.Errorf("cannot resolve relative path: %w", err)
120+
}
121+
if strings.HasPrefix(rel, "..") {
122+
return fmt.Errorf("path %q traverses outside working directory", file)
123+
}
124+
return nil
125+
}
126+
98127
func recentCommits(file string, n int) ([]string, error) {
128+
if err := validateFilePath(file); err != nil {
129+
return nil, fmt.Errorf("recentCommits: invalid path: %w", err)
130+
}
99131
out, err := exec.Command("git", "log", "--oneline", "-n", strconv.Itoa(n), "--", file).Output()
100132
if err != nil {
101133
return nil, err

internal/diff/diff.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,24 @@ func parseFileChunk(chunk string) File {
107107
lines := strings.Split(chunk, "\n")
108108
file := File{}
109109

110+
// Detect binary files — git outputs "Binary files ... differ" for them
111+
for _, line := range lines {
112+
if strings.HasPrefix(line, "Binary files") && strings.Contains(line, "differ") {
113+
// Binary file: extract path but skip hunk parsing
114+
for _, l := range lines {
115+
if strings.HasPrefix(l, "+++ b/") {
116+
file.Path = strings.TrimPrefix(l, "+++ b/")
117+
} else if strings.HasPrefix(l, "--- a/") {
118+
file.OldPath = strings.TrimPrefix(l, "--- a/")
119+
}
120+
}
121+
if file.Path == "" && file.OldPath != "" {
122+
file.Path = file.OldPath
123+
}
124+
return file
125+
}
126+
}
127+
110128
for i, line := range lines {
111129
switch {
112130
case strings.HasPrefix(line, "--- a/"):
@@ -124,6 +142,10 @@ func parseFileChunk(chunk string) File {
124142
file.Path = strings.TrimPrefix(line, "rename to ")
125143
case strings.HasPrefix(line, "@@"):
126144
hunk := parseHunkHeader(line)
145+
// Validate line number bounds before parsing
146+
if hunk.NewStart < 0 || hunk.OldStart < 0 || hunk.NewCount < 0 || hunk.OldCount < 0 {
147+
continue
148+
}
127149
hunk.Lines = parseHunkLines(lines[i+1:])
128150
file.Hunks = append(file.Hunks, hunk)
129151
}

internal/review/cwe.go

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,18 +90,45 @@ var cweDatabase = []CWEMapping{
9090

9191
// MatchCWE checks a finding's message (and fix) against the CWE database and
9292
// returns the CWE ID if a match is found. Returns empty string if no match.
93+
// Uses word boundary checks to avoid false positives from substring matching.
9394
func MatchCWE(message, fix string) string {
9495
lower := strings.ToLower(message + " " + fix)
9596
for _, cwe := range cweDatabase {
9697
for _, keyword := range cwe.Keywords {
97-
if strings.Contains(lower, keyword) {
98+
if containsWordBoundary(lower, keyword) {
9899
return cwe.ID
99100
}
100101
}
101102
}
102103
return ""
103104
}
104105

106+
// containsWordBoundary checks if text contains keyword at a word boundary.
107+
// A word boundary is the start/end of string or a non-alphanumeric character.
108+
func containsWordBoundary(text, keyword string) bool {
109+
idx := 0
110+
for {
111+
pos := strings.Index(text[idx:], keyword)
112+
if pos < 0 {
113+
return false
114+
}
115+
pos += idx
116+
// Check left boundary
117+
leftOK := pos == 0 || !isWordChar(text[pos-1])
118+
// Check right boundary
119+
end := pos + len(keyword)
120+
rightOK := end >= len(text) || !isWordChar(text[end])
121+
if leftOK && rightOK {
122+
return true
123+
}
124+
idx = pos + 1
125+
}
126+
}
127+
128+
func isWordChar(c byte) bool {
129+
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')
130+
}
131+
105132
// LookupCWEName returns the human-readable name for a CWE ID.
106133
func LookupCWEName(id string) string {
107134
for _, cwe := range cweDatabase {

internal/review/reflect.go

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,13 @@ Adjust severity when:
4343
- A "critical" finding is actually just a code smell → downgrade
4444
- A "low" finding is actually exploitable → upgrade`
4545

46+
// maxReflectPromptSize is the maximum allowed size for a reflection prompt.
47+
// This prevents oversized output from consuming excessive tokens.
48+
const maxReflectPromptSize = 100_000
49+
4650
// BuildReflectPrompt constructs the prompt for self-reflection.
51+
// Returns an empty string if the resulting prompt exceeds maxReflectPromptSize,
52+
// indicating reflection should be skipped.
4753
func BuildReflectPrompt(findings []Finding, diffContext string) string {
4854
var b strings.Builder
4955
b.WriteString("Review these findings for accuracy. The original diff context is provided below.\n\n")
@@ -66,7 +72,13 @@ func BuildReflectPrompt(findings []Finding, diffContext string) string {
6672
}
6773
b.WriteString("```\n")
6874

69-
return b.String()
75+
result := b.String()
76+
// Reject if the prompt exceeds the maximum allowed size
77+
if len(result) > maxReflectPromptSize {
78+
return ""
79+
}
80+
81+
return result
7082
}
7183

7284
// ReflectResult holds the LLM's validation of a finding.

0 commit comments

Comments
 (0)