|
| 1 | +package sight |
| 2 | + |
| 3 | +import ( |
| 4 | + "context" |
| 5 | + "encoding/json" |
| 6 | + "fmt" |
| 7 | + "os" |
| 8 | + "strings" |
| 9 | +) |
| 10 | + |
| 11 | +// EvalCase defines a single test case for evaluating review quality. |
| 12 | +type EvalCase struct { |
| 13 | + Name string `json:"name"` |
| 14 | + Diff string `json:"diff"` |
| 15 | + ExpectFindings []EvalExpectation `json:"expect_findings"` |
| 16 | + DenyFindings []EvalDenial `json:"deny_findings"` |
| 17 | +} |
| 18 | + |
| 19 | +// EvalExpectation defines what we expect the reviewer to find. |
| 20 | +type EvalExpectation struct { |
| 21 | + Concern string `json:"concern,omitempty"` |
| 22 | + MinSeverity string `json:"min_severity,omitempty"` |
| 23 | + MessageContains string `json:"message_contains,omitempty"` |
| 24 | + File string `json:"file,omitempty"` |
| 25 | +} |
| 26 | + |
| 27 | +// EvalDenial defines what the reviewer should NOT report (false positive check). |
| 28 | +type EvalDenial struct { |
| 29 | + MessageContains string `json:"message_contains,omitempty"` |
| 30 | + Concern string `json:"concern,omitempty"` |
| 31 | +} |
| 32 | + |
| 33 | +// EvalResult is the outcome of running one eval case. |
| 34 | +type EvalResult struct { |
| 35 | + Case string `json:"case"` |
| 36 | + Passed bool `json:"passed"` |
| 37 | + Failures []string `json:"failures,omitempty"` |
| 38 | + Findings []Finding `json:"findings"` |
| 39 | +} |
| 40 | + |
| 41 | +// EvalSuite is a collection of eval cases. |
| 42 | +type EvalSuite struct { |
| 43 | + Cases []EvalCase `json:"cases"` |
| 44 | +} |
| 45 | + |
| 46 | +// RunEval executes an evaluation suite against the reviewer with the given options. |
| 47 | +// For each case it runs Review() on the diff, then checks expectations and denials. |
| 48 | +func RunEval(ctx context.Context, suite *EvalSuite, opts ...Option) ([]EvalResult, error) { |
| 49 | + if suite == nil || len(suite.Cases) == 0 { |
| 50 | + return nil, nil |
| 51 | + } |
| 52 | + |
| 53 | + results := make([]EvalResult, 0, len(suite.Cases)) |
| 54 | + |
| 55 | + for _, ec := range suite.Cases { |
| 56 | + if ctx.Err() != nil { |
| 57 | + return results, ctx.Err() |
| 58 | + } |
| 59 | + |
| 60 | + er := EvalResult{Case: ec.Name} |
| 61 | + |
| 62 | + reviewResult, err := Review(ctx, ec.Diff, opts...) |
| 63 | + if err != nil { |
| 64 | + er.Failures = append(er.Failures, fmt.Sprintf("review error: %v", err)) |
| 65 | + results = append(results, er) |
| 66 | + continue |
| 67 | + } |
| 68 | + |
| 69 | + er.Findings = reviewResult.Findings |
| 70 | + |
| 71 | + // Check expectations: each must be matched by at least one finding. |
| 72 | + for i, exp := range ec.ExpectFindings { |
| 73 | + if !matchExpectation(exp, reviewResult.Findings) { |
| 74 | + er.Failures = append(er.Failures, |
| 75 | + fmt.Sprintf("expect_findings[%d]: no finding matched %s", i, describeExpectation(exp))) |
| 76 | + } |
| 77 | + } |
| 78 | + |
| 79 | + // Check denials: none should be matched by any finding. |
| 80 | + for i, deny := range ec.DenyFindings { |
| 81 | + if matchDenial(deny, reviewResult.Findings) { |
| 82 | + er.Failures = append(er.Failures, |
| 83 | + fmt.Sprintf("deny_findings[%d]: found unexpected match for %s", i, describeDenial(deny))) |
| 84 | + } |
| 85 | + } |
| 86 | + |
| 87 | + er.Passed = len(er.Failures) == 0 |
| 88 | + results = append(results, er) |
| 89 | + } |
| 90 | + |
| 91 | + return results, nil |
| 92 | +} |
| 93 | + |
| 94 | +// LoadEvalSuite loads eval cases from a JSON file. |
| 95 | +func LoadEvalSuite(path string) (*EvalSuite, error) { |
| 96 | + data, err := os.ReadFile(path) |
| 97 | + if err != nil { |
| 98 | + return nil, fmt.Errorf("loading eval suite: %w", err) |
| 99 | + } |
| 100 | + return ParseEvalSuite(data) |
| 101 | +} |
| 102 | + |
| 103 | +// ParseEvalSuite parses eval cases from JSON bytes. |
| 104 | +func ParseEvalSuite(data []byte) (*EvalSuite, error) { |
| 105 | + var suite EvalSuite |
| 106 | + if err := json.Unmarshal(data, &suite); err != nil { |
| 107 | + return nil, fmt.Errorf("parsing eval suite: %w", err) |
| 108 | + } |
| 109 | + return &suite, nil |
| 110 | +} |
| 111 | + |
| 112 | +// EvalSummary returns pass/fail counts and overall success rate. |
| 113 | +func EvalSummary(results []EvalResult) (passed, failed int, rate float64) { |
| 114 | + for _, r := range results { |
| 115 | + if r.Passed { |
| 116 | + passed++ |
| 117 | + } else { |
| 118 | + failed++ |
| 119 | + } |
| 120 | + } |
| 121 | + total := passed + failed |
| 122 | + if total > 0 { |
| 123 | + rate = float64(passed) / float64(total) |
| 124 | + } |
| 125 | + return |
| 126 | +} |
| 127 | + |
| 128 | +// matchExpectation returns true if at least one finding matches all non-empty |
| 129 | +// fields in the expectation. |
| 130 | +func matchExpectation(exp EvalExpectation, findings []Finding) bool { |
| 131 | + for _, f := range findings { |
| 132 | + if matchesSingleExpectation(exp, f) { |
| 133 | + return true |
| 134 | + } |
| 135 | + } |
| 136 | + return false |
| 137 | +} |
| 138 | + |
| 139 | +func matchesSingleExpectation(exp EvalExpectation, f Finding) bool { |
| 140 | + if exp.Concern != "" && !strings.EqualFold(f.Concern, exp.Concern) { |
| 141 | + return false |
| 142 | + } |
| 143 | + if exp.MinSeverity != "" { |
| 144 | + minSev := ParseSeverity(exp.MinSeverity) |
| 145 | + if !f.Severity.AtLeast(minSev) { |
| 146 | + return false |
| 147 | + } |
| 148 | + } |
| 149 | + if exp.MessageContains != "" && !strings.Contains( |
| 150 | + strings.ToLower(f.Message), strings.ToLower(exp.MessageContains)) { |
| 151 | + return false |
| 152 | + } |
| 153 | + if exp.File != "" && !strings.EqualFold(f.File, exp.File) { |
| 154 | + return false |
| 155 | + } |
| 156 | + return true |
| 157 | +} |
| 158 | + |
| 159 | +// matchDenial returns true if any finding matches all non-empty fields in the denial. |
| 160 | +func matchDenial(deny EvalDenial, findings []Finding) bool { |
| 161 | + for _, f := range findings { |
| 162 | + if matchesSingleDenial(deny, f) { |
| 163 | + return true |
| 164 | + } |
| 165 | + } |
| 166 | + return false |
| 167 | +} |
| 168 | + |
| 169 | +func matchesSingleDenial(deny EvalDenial, f Finding) bool { |
| 170 | + if deny.Concern != "" && !strings.EqualFold(f.Concern, deny.Concern) { |
| 171 | + return false |
| 172 | + } |
| 173 | + if deny.MessageContains != "" && !strings.Contains( |
| 174 | + strings.ToLower(f.Message), strings.ToLower(deny.MessageContains)) { |
| 175 | + return false |
| 176 | + } |
| 177 | + return true |
| 178 | +} |
| 179 | + |
| 180 | +func describeExpectation(exp EvalExpectation) string { |
| 181 | + var parts []string |
| 182 | + if exp.Concern != "" { |
| 183 | + parts = append(parts, fmt.Sprintf("concern=%q", exp.Concern)) |
| 184 | + } |
| 185 | + if exp.MinSeverity != "" { |
| 186 | + parts = append(parts, fmt.Sprintf("min_severity=%q", exp.MinSeverity)) |
| 187 | + } |
| 188 | + if exp.MessageContains != "" { |
| 189 | + parts = append(parts, fmt.Sprintf("message_contains=%q", exp.MessageContains)) |
| 190 | + } |
| 191 | + if exp.File != "" { |
| 192 | + parts = append(parts, fmt.Sprintf("file=%q", exp.File)) |
| 193 | + } |
| 194 | + if len(parts) == 0 { |
| 195 | + return "{}" |
| 196 | + } |
| 197 | + return "{" + strings.Join(parts, ", ") + "}" |
| 198 | +} |
| 199 | + |
| 200 | +func describeDenial(deny EvalDenial) string { |
| 201 | + var parts []string |
| 202 | + if deny.Concern != "" { |
| 203 | + parts = append(parts, fmt.Sprintf("concern=%q", deny.Concern)) |
| 204 | + } |
| 205 | + if deny.MessageContains != "" { |
| 206 | + parts = append(parts, fmt.Sprintf("message_contains=%q", deny.MessageContains)) |
| 207 | + } |
| 208 | + if len(parts) == 0 { |
| 209 | + return "{}" |
| 210 | + } |
| 211 | + return "{" + strings.Join(parts, ", ") + "}" |
| 212 | +} |
0 commit comments