Skip to content

Commit b1406a8

Browse files
feat(security): per-category precision/FP/F1 in scan-eval gate (T018)
CodexReviewer re-review of #777: T018 (tasks.md:75) requires `scan-eval --gate` to print per-category recall/precision/FP/F1, but categoryMetric only carried recall (precision/FP/F1 existed only as overall metrics). - categoryMetric now carries hard_negatives, false_positives, fp_rate, precision, and f1 per category, populated in the gate computation and JSON. - Per-category FP is attributed via a new `resembles` field on hard_negative corpus entries (the attack class a benign mimics — the SC-003 framing): a flagged hard-negative lowers its resembled category's precision. Clean-benign entries carry no `resembles` and affect only the overall benign FP count. - detect_corpus_v1.json: every hard_negative now declares `resembles` (consistent with its hn_<class> id); validator asserts it is set, names a gated category, and matches the id prefix. - Extracted an f1() helper; overall F1 reuses it. - Tests: TestGateMetrics_PerCategoryShapeAndFPAttribution proves the per-category JSON exposes recall/precision/FP/F1 and that a resembling hard-negative FP drops that category's precision (1 TP + 1 FP -> precision 0.5); TestEvaluateGateCorpus asserts per-category recall/precision/f1 = 1.0. Committed corpus: recall 1.0 (16/16 gated), fp_rate 0/9; every gated category reports recall/precision/f1 = 1.0, FP 0. Related #MCP-3579 Co-Authored-By: Paperclip <noreply@paperclip.ing>
1 parent f8cc0a4 commit b1406a8

4 files changed

Lines changed: 140 additions & 30 deletions

File tree

cmd/scan-eval/gate.go

Lines changed: 63 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,13 @@ type gatePeer struct {
3535
// gateEntry is one labeled sample: a tool, its owning server, optional peers,
3636
// the ground-truth label/category, and redistributable provenance.
3737
type gateEntry struct {
38-
ID string `json:"id"`
39-
Label string `json:"label"` // "malicious" | "benign"
40-
Category string `json:"category"` // detect taxonomy or benign|hard_negative
38+
ID string `json:"id"`
39+
Label string `json:"label"` // "malicious" | "benign"
40+
Category string `json:"category"` // detect taxonomy or benign|hard_negative
41+
// Resembles names the attack class a hard_negative mimics (e.g.
42+
// "unicode_smuggling"), so a false positive on it counts toward that
43+
// category's precision/FP (SC-003). Empty for clean-benign entries.
44+
Resembles string `json:"resembles,omitempty"`
4145
Server string `json:"server"`
4246
Tool gateTool `json:"tool"`
4347
Peers []gatePeer `json:"peers,omitempty"`
@@ -78,13 +82,21 @@ func gateChecks() []detect.Check {
7882
}
7983
}
8084

81-
// categoryMetric is one category's per-run scorecard.
85+
// categoryMetric is one category's per-run scorecard (T018: per-category
86+
// recall/precision/FP/F1). Precision and FP are attributed via hard-negatives
87+
// that resemble this category (SC-003); a category with no resembling
88+
// hard-negatives reports zero FP.
8289
type categoryMetric struct {
83-
Category string `json:"category"`
84-
Gated bool `json:"gated"` // is this category's check registered?
85-
Malicious int `json:"malicious"` // malicious samples in this category
86-
Detected int `json:"detected"` // malicious samples the engine flagged
87-
Recall float64 `json:"recall"`
90+
Category string `json:"category"`
91+
Gated bool `json:"gated"` // is this category's check registered?
92+
Malicious int `json:"malicious"` // malicious samples in this category
93+
Detected int `json:"detected"` // malicious samples the engine flagged (TP)
94+
Recall float64 `json:"recall"`
95+
HardNegatives int `json:"hard_negatives"` // resembling hard-negatives
96+
FalsePositives int `json:"false_positives"` // resembling hard-negatives flagged (FP)
97+
FPRate float64 `json:"fp_rate"`
98+
Precision float64 `json:"precision"` // TP / (TP + FP)
99+
F1 float64 `json:"f1"`
88100
}
89101

90102
// gateMetrics is the full metrics report emitted for the CI log.
@@ -133,9 +145,19 @@ func evaluateGateCorpus(c *gateCorpus, checkList []detect.Check) gateMetrics {
133145
type catTally struct {
134146
gated bool
135147
malicious, flagged int
148+
hardNeg, hardNegFP int
136149
}
137150
cats := map[string]*catTally{}
138151
order := []string{}
152+
getCat := func(cat string) *catTally {
153+
ct := cats[cat]
154+
if ct == nil {
155+
ct = &catTally{gated: gatedCategory(cat)}
156+
cats[cat] = ct
157+
order = append(order, cat)
158+
}
159+
return ct
160+
}
139161

140162
var gatedMal, gatedDet, truePos int
141163
var benignTotal, benignFP, hardNegTotal, hardNegFP int
@@ -146,12 +168,7 @@ func evaluateGateCorpus(c *gateCorpus, checkList []detect.Check) gateMetrics {
146168

147169
switch e.Label {
148170
case "malicious":
149-
ct := cats[e.Category]
150-
if ct == nil {
151-
ct = &catTally{gated: gatedCategory(e.Category)}
152-
cats[e.Category] = ct
153-
order = append(order, e.Category)
154-
}
171+
ct := getCat(e.Category)
155172
ct.malicious++
156173
if flagged {
157174
ct.flagged++
@@ -168,12 +185,21 @@ func evaluateGateCorpus(c *gateCorpus, checkList []detect.Check) gateMetrics {
168185
if flagged {
169186
benignFP++
170187
}
171-
// SC-002 gates the FP rate on the hard-negative set specifically.
188+
// SC-002 gates the FP rate on the hard-negative set specifically;
189+
// SC-003 attributes each hard-negative FP to the attack class it
190+
// resembles for the per-category precision/FP.
172191
if e.Category == "hard_negative" {
173192
hardNegTotal++
174193
if flagged {
175194
hardNegFP++
176195
}
196+
if e.Resembles != "" {
197+
ct := getCat(e.Resembles)
198+
ct.hardNeg++
199+
if flagged {
200+
ct.hardNegFP++
201+
}
202+
}
177203
}
178204
}
179205
}
@@ -190,20 +216,25 @@ func evaluateGateCorpus(c *gateCorpus, checkList []detect.Check) gateMetrics {
190216
}
191217
for _, cat := range order {
192218
ct := cats[cat]
219+
recall := ratio(ct.flagged, ct.malicious)
220+
precision := ratio(ct.flagged, ct.flagged+ct.hardNegFP)
193221
m.Categories = append(m.Categories, categoryMetric{
194-
Category: cat,
195-
Gated: ct.gated,
196-
Malicious: ct.malicious,
197-
Detected: ct.flagged,
198-
Recall: ratio(ct.flagged, ct.malicious),
222+
Category: cat,
223+
Gated: ct.gated,
224+
Malicious: ct.malicious,
225+
Detected: ct.flagged,
226+
Recall: recall,
227+
HardNegatives: ct.hardNeg,
228+
FalsePositives: ct.hardNegFP,
229+
FPRate: ratio(ct.hardNegFP, ct.hardNeg),
230+
Precision: precision,
231+
F1: f1(precision, recall),
199232
})
200233
}
201234
m.OverallRecall = ratio(gatedDet, gatedMal)
202235
m.FPRate = ratio(hardNegFP, hardNegTotal)
203236
m.Precision = ratio(truePos, truePos+benignFP)
204-
if m.Precision+m.OverallRecall > 0 {
205-
m.F1 = 2 * m.Precision * m.OverallRecall / (m.Precision + m.OverallRecall)
206-
}
237+
m.F1 = f1(m.Precision, m.OverallRecall)
207238
return m
208239
}
209240

@@ -310,3 +341,11 @@ func ratio(n, d int) float64 {
310341
}
311342
return float64(n) / float64(d)
312343
}
344+
345+
// f1 is the harmonic mean of precision and recall (0 when both are 0).
346+
func f1(precision, recall float64) float64 {
347+
if precision+recall == 0 {
348+
return 0
349+
}
350+
return 2 * precision * recall / (precision + recall)
351+
}

cmd/scan-eval/gate_test.go

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,12 @@ func gateFixture() *gateCorpus {
5252
},
5353
{
5454
// hard-negative: ordinary accented Unicode, no hidden classes.
55-
ID: "hn1", Label: "benign", Category: "hard_negative", Server: "i18n",
55+
ID: "hn1", Label: "benign", Category: "hard_negative", Resembles: "unicode_smuggling", Server: "i18n",
5656
Tool: gateTool{Name: "translate_text", Description: "Translates café and naïve into other languages."},
5757
},
5858
{
5959
// hard-negative: benign base64 that decodes to JSON, not a command.
60-
ID: "hn2", Label: "benign", Category: "hard_negative", Server: "cfg",
60+
ID: "hn2", Label: "benign", Category: "hard_negative", Resembles: "decoded_payload", Server: "cfg",
6161
Tool: gateTool{Name: "load_config", Description: "Loads config blob=" + benignJSONB64},
6262
},
6363
},
@@ -83,6 +83,14 @@ func TestEvaluateGateCorpus_DetectsAndExcludesUngated(t *testing.T) {
8383
if c.Detected != c.Malicious || c.Malicious == 0 {
8484
t.Errorf("category %q: want all %d malicious detected, got %d", cat, c.Malicious, c.Detected)
8585
}
86+
// T018: every gated category caught all malicious and flagged none of its
87+
// resembling hard-negatives → recall 1.0, precision 1.0, FP 0, F1 1.0.
88+
if c.Recall != 1.0 || c.Precision != 1.0 || c.F1 != 1.0 {
89+
t.Errorf("category %q: want recall/precision/f1 = 1.0, got r=%v p=%v f1=%v", cat, c.Recall, c.Precision, c.F1)
90+
}
91+
if c.FalsePositives != 0 || c.FPRate != 0.0 {
92+
t.Errorf("category %q: want 0 FP, got fp=%d rate=%v", cat, c.FalsePositives, c.FPRate)
93+
}
8694
}
8795

8896
cm, ok := byCat["capability_mismatch"]
@@ -162,6 +170,51 @@ func TestGateFP_HardNegativeDenominatorOnly(t *testing.T) {
162170
}
163171
}
164172

173+
// TestGateMetrics_PerCategoryShapeAndFPAttribution proves T018's contract: the
174+
// per-category JSON carries recall/precision/FP/F1, and a hard-negative that
175+
// resembles a category and is (wrongly) flagged lowers THAT category's precision.
176+
func TestGateMetrics_PerCategoryShapeAndFPAttribution(t *testing.T) {
177+
c := &gateCorpus{Version: "t", Entries: []gateEntry{
178+
{ID: "u_m", Label: "malicious", Category: "unicode_smuggling", Server: "evil",
179+
Tool: gateTool{Name: "add_numbers", Description: "Adds." + zeroWidthSpace + " hidden."}},
180+
{ID: "u_hn_fp", Label: "benign", Category: "hard_negative", Resembles: "unicode_smuggling", Server: "ok",
181+
Tool: gateTool{Name: "list_things", Description: "Lists things." + zeroWidthSpace + " benign."}},
182+
}}
183+
m := evaluateGateCorpus(c, gateChecks())
184+
185+
var uni *categoryMetric
186+
for i := range m.Categories {
187+
if m.Categories[i].Category == "unicode_smuggling" {
188+
uni = &m.Categories[i]
189+
}
190+
}
191+
if uni == nil {
192+
t.Fatal("unicode_smuggling category missing")
193+
}
194+
// 1 TP, 1 resembling hard-negative flagged → precision 1/2, recall 1, FP 1.
195+
if uni.Detected != 1 || uni.FalsePositives != 1 {
196+
t.Fatalf("TP/FP = %d/%d, want 1/1", uni.Detected, uni.FalsePositives)
197+
}
198+
if uni.Recall != 1.0 || uni.Precision != 0.5 {
199+
t.Errorf("recall/precision = %v/%v, want 1.0/0.5", uni.Recall, uni.Precision)
200+
}
201+
wantF1 := 2 * 0.5 * 1.0 / (0.5 + 1.0)
202+
if uni.F1 != wantF1 {
203+
t.Errorf("f1 = %v, want %v", uni.F1, wantF1)
204+
}
205+
206+
// The serialized per-category object must expose all of recall/precision/FP/F1.
207+
blob, err := json.Marshal(m.Categories[0])
208+
if err != nil {
209+
t.Fatal(err)
210+
}
211+
for _, key := range []string{"recall", "precision", "false_positives", "fp_rate", "f1"} {
212+
if !strings.Contains(string(blob), `"`+key+`"`) {
213+
t.Errorf("per-category JSON missing key %q: %s", key, blob)
214+
}
215+
}
216+
}
217+
165218
func TestGateDecision(t *testing.T) {
166219
pass := gateMetrics{OverallRecall: 0.95, FPRate: 0.02}
167220
if ok, reasons := pass.decide(0.90, 0.05); !ok {

specs/065-evaluation-foundation/datasets/detect_corpus_test.go

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ type detectEntry struct {
5050
ID string `json:"id"`
5151
Label string `json:"label"`
5252
Category string `json:"category"`
53+
Resembles string `json:"resembles"`
5354
Server string `json:"server"`
5455
Tool detectTool `json:"tool"`
5556
Peers []detectPeer `json:"peers"`
@@ -150,11 +151,19 @@ func TestDetectCorpus_GatedCoverage(t *testing.T) {
150151
maliciousByCat[e.Category]++
151152
}
152153
if e.Label == "benign" && e.Category == "hard_negative" {
153-
for cat, prefix := range hardNegPrefix {
154-
if strings.HasPrefix(e.ID, prefix) {
155-
hardNegByCat[cat]++
156-
}
154+
// `resembles` is the machine-readable attribution the gate uses for
155+
// per-category precision/FP; it must be set, name a gated category, and
156+
// agree with the id prefix convention.
157+
if e.Resembles == "" {
158+
t.Errorf("hard_negative %q: missing resembles (needed for per-category FP)", e.ID)
159+
continue
157160
}
161+
if prefix, ok := hardNegPrefix[e.Resembles]; !ok {
162+
t.Errorf("hard_negative %q: resembles %q is not a gated category", e.ID, e.Resembles)
163+
} else if !strings.HasPrefix(e.ID, prefix) {
164+
t.Errorf("hard_negative %q: id should start with %q to match resembles %q", e.ID, prefix, e.Resembles)
165+
}
166+
hardNegByCat[e.Resembles]++
158167
}
159168
}
160169

specs/065-evaluation-foundation/datasets/detect_corpus_v1.json

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,7 @@
294294
"id": "hn_unicode_accents",
295295
"label": "benign",
296296
"category": "hard_negative",
297+
"resembles": "unicode_smuggling",
297298
"server": "i18n",
298299
"tool": {
299300
"name": "translate_phrase",
@@ -308,6 +309,7 @@
308309
"id": "hn_unicode_cjk",
309310
"label": "benign",
310311
"category": "hard_negative",
312+
"resembles": "unicode_smuggling",
311313
"server": "i18n",
312314
"tool": {
313315
"name": "romanize_text",
@@ -322,6 +324,7 @@
322324
"id": "hn_decoded_json",
323325
"label": "benign",
324326
"category": "hard_negative",
327+
"resembles": "decoded_payload",
325328
"server": "cfg",
326329
"tool": {
327330
"name": "load_settings",
@@ -336,6 +339,7 @@
336339
"id": "hn_decoded_text",
337340
"label": "benign",
338341
"category": "hard_negative",
342+
"resembles": "decoded_payload",
339343
"server": "notes",
340344
"tool": {
341345
"name": "decode_note",
@@ -350,6 +354,7 @@
350354
"id": "hn_decoded_plain_curl",
351355
"label": "benign",
352356
"category": "hard_negative",
357+
"resembles": "decoded_payload",
353358
"server": "docs",
354359
"tool": {
355360
"name": "install_docs",
@@ -364,6 +369,7 @@
364369
"id": "hn_decoded_hexid",
365370
"label": "benign",
366371
"category": "hard_negative",
372+
"resembles": "decoded_payload",
367373
"server": "db",
368374
"tool": {
369375
"name": "lookup_hash",
@@ -378,6 +384,7 @@
378384
"id": "hn_shadowing_generic",
379385
"label": "benign",
380386
"category": "hard_negative",
387+
"resembles": "shadowing",
381388
"server": "engine-a",
382389
"tool": {
383390
"name": "search",
@@ -401,6 +408,7 @@
401408
"id": "hn_shadowing_same_server",
402409
"label": "benign",
403410
"category": "hard_negative",
411+
"resembles": "shadowing",
404412
"server": "reports",
405413
"tool": {
406414
"name": "export_report",
@@ -424,6 +432,7 @@
424432
"id": "hn_shadowing_selfref",
425433
"label": "benign",
426434
"category": "hard_negative",
435+
"resembles": "shadowing",
427436
"server": "billing",
428437
"tool": {
429438
"name": "calculate_total",

0 commit comments

Comments
 (0)