feat(recipes): high-crap-score with graph-estimated coverage (plan 2)

SutuSebastian · SutuSebastian · commit 3fc823a8cc0d · 2026-06-10T12:32:32.000+03:00
Spike locks 85/40/0% reachability tiers on fixtures/minimal; ships
high-crap-score recipe (measured override when coverage ingested),
golden + script tests, and high-complexity-untested cross-link.
diff --git a/.changeset/high-crap-score.md b/.changeset/high-crap-score.md
@@ -0,0 +1,5 @@
+---
+"@stainless-code/codemap": patch
+---
+
+Add `high-crap-score` recipe: CRAP ranking with measured coverage when ingested, or graph-estimated 85/40/0% tiers from test reachability otherwise.
diff --git a/docs/plans/agent-enrichment-wave.md b/docs/plans/agent-enrichment-wave.md
@@ -91,4 +91,4 @@ Each PR: `harden-pr full` → merge. Do not batch plans 1–4 into one PR.
 
 ## Current slice
 
-**Active:** Plan 1 shipped in [**PR #174**](https://github.com/stainless-code/codemap/pull/174) (awaiting merge) — next: Plan 2 spike **2.0** (`graph-estimated-crap.md`).
+**Active:** Plan 2 **in flight** on `feat/high-crap-score` — slices **2.0–2.3** (`graph-estimated-crap.md`); PR **#C** when complete.
diff --git a/docs/plans/graph-estimated-crap.md b/docs/plans/graph-estimated-crap.md
@@ -32,9 +32,21 @@ recipe high-crap-score (SQL only)
   → CRAP formula → rows + coverage_source column
 ```
 
-### Tracer bullet (slice 1)
+### Spike results (slice 2.0, `fixtures/minimal`)
 
-Recipe SQL + `.md` on fixture index without coverage ingest (tiers only). Golden row asserting `coverage_source: estimated`. Second golden with `ingest-coverage` → `measured` overrides.
+`scripts/spike-crap-reachability.sql` + `scripts/spike-crap-reachability.test.mjs` lock tier counts on function-shaped symbols:
+
+| Tier | Count | Example                                                                  |
+| ---- | ----- | ------------------------------------------------------------------------ |
+| 85%  | 1     | `labyrinth` — direct `bindings` ref from `smoke.test.ts`                 |
+| 40%  | 4     | `deeplyNested`, `relay`, … — `complexity-fixture.ts` reachable from test |
+| 0%   | 39    | `createClient`, `get`, … — not dependency-reachable from tests           |
+
+Reachability walk: `test_suites` + `*.test.*` / `*.spec.*` globs → recursive `dependencies` fan-out (value edges only).
+
+### Tracer bullet (slice 2.1)
+
+Recipe SQL + `.md` on fixture index without coverage ingest (tiers only). Golden row asserting `coverage_source: estimated`. `scripts/high-crap-score-measured.test.mjs` asserts `ingest-coverage` → `measured` overrides.
 
 ### Out of scope (v1)
 
@@ -105,10 +117,10 @@ bun test scripts/query-golden-coverage-matrix.test.mjs   # after golden scenario
 
 ## Acceptance
 
-- [ ] Without coverage ingest: symbols in files imported by tests get tier 40/85; isolated files get 0%
-- [ ] With coverage ingest: `coverage_source = measured` and CRAP uses real `coverage_pct`
-- [ ] `codemap query --recipe high-crap-score --json` works; SARIF compatible via `--format sarif`
-- [ ] No new pass/fail primitive
+- [x] Without coverage ingest: symbols in files imported by tests get tier 40/85; isolated files get 0%
+- [x] With coverage ingest: `coverage_source = measured` and CRAP uses real `coverage_pct`
+- [x] `codemap query --recipe high-crap-score --json` works; SARIF compatible via `--format sarif`
+- [x] No new pass/fail primitive
 
 ---
 
diff --git a/fixtures/golden/minimal/high-crap-score.json b/fixtures/golden/minimal/high-crap-score.json
@@ -0,0 +1,13 @@
+[
+  {
+    "name": "labyrinth",
+    "kind": "function",
+    "file_path": "src/lib/complexity-fixture.ts",
+    "line_start": 22,
+    "line_end": 83,
+    "complexity": 19,
+    "effective_coverage_pct": 85,
+    "coverage_source": "estimated",
+    "crap_score": 20.22
+  }
+]
diff --git a/fixtures/golden/scenarios.json b/fixtures/golden/scenarios.json
@@ -593,6 +593,12 @@
       "prompt": "High cyclomatic complexity + low coverage",
       "recipe": "high-complexity-untested"
     },
+    {
+      "id": "high-crap-score",
+      "prompt": "High CRAP score with graph-estimated coverage tiers (min_crap=15)",
+      "recipe": "high-crap-score",
+      "params": { "min_crap": 15 }
+    },
     {
       "id": "text-in-deprecated-functions",
       "prompt": "FTS TODO/FIXME/HACK in @deprecated functions with low coverage (requires fts5)",
diff --git a/scripts/high-crap-score-measured.test.mjs b/scripts/high-crap-score-measured.test.mjs
@@ -0,0 +1,30 @@
+import { describe, expect, it } from "bun:test";
+import { join } from "node:path";
+
+/**
+ * Plan 2 slice 2.2 — measured coverage overrides graph tiers when ingest ran.
+ * Run via `bun run test:scripts` (golden runner already ingests coverage in setup).
+ */
+import { $ } from "bun";
+
+const REPO_ROOT = join(import.meta.dir, "..");
+
+describe("high-crap-score measured override", () => {
+  it("uses coverage_source measured when coverage row exists (now @ 100%)", async () => {
+    await $`bun src/index.ts ingest-coverage coverage/coverage-final.json --root fixtures/minimal`
+      .cwd(REPO_ROOT)
+      .quiet();
+    const result =
+      await $`bun src/index.ts query --recipe high-crap-score --json --params min_crap=1 --root fixtures/minimal`
+        .cwd(REPO_ROOT)
+        .quiet();
+    expect(result.exitCode).toBe(0);
+    const rows = JSON.parse(result.stdout.toString());
+    const nowRow = rows.find(
+      (r) => r.name === "now" && r.file_path === "src/utils/date.ts",
+    );
+    expect(nowRow).toBeDefined();
+    expect(nowRow.coverage_source).toBe("measured");
+    expect(nowRow.effective_coverage_pct).toBe(100);
+  });
+});
diff --git a/scripts/spike-crap-reachability.sql b/scripts/spike-crap-reachability.sql
@@ -0,0 +1,71 @@
+-- Plan 2 slice 2.0 spike: graph-estimated coverage tiers on fixtures/minimal.
+-- Run: codemap query --json "$(cat scripts/spike-crap-reachability.sql)" --root fixtures/minimal
+-- Expected function/method tier counts: 85% → labyrinth (direct test ref); 40% → complexity-fixture peers (reachable); 0% → rest.
+WITH RECURSIVE
+test_files(path) AS (
+  SELECT DISTINCT f.path
+  FROM files f
+  WHERE EXISTS (
+      SELECT 1
+      FROM test_suites ts
+      WHERE ts.file_path = f.path
+    )
+    OR f.path GLOB '*.test.ts'
+    OR f.path GLOB '*.test.tsx'
+    OR f.path GLOB '*.spec.ts'
+    OR f.path GLOB '*.spec.tsx'
+    OR f.path GLOB '*.test.js'
+    OR f.path GLOB '*.spec.js'
+    OR f.path GLOB '*.test.jsx'
+    OR f.path GLOB '*.spec.jsx'
+),
+reachable_files(file_path, depth, visited) AS (
+  SELECT path, 0, char(30) || path || char(30)
+  FROM test_files
+  UNION ALL
+  SELECT
+    d.to_path,
+    rf.depth + 1,
+    rf.visited || d.to_path || char(30)
+  FROM dependencies d
+  JOIN reachable_files rf ON d.from_path = rf.file_path
+  WHERE rf.depth < 50
+    AND instr(rf.visited, char(30) || d.to_path || char(30)) = 0
+),
+symbol_tiers AS (
+  SELECT
+    s.name,
+    s.file_path,
+    s.complexity,
+    CASE
+      WHEN EXISTS (
+        SELECT 1
+        FROM "references" r
+        JOIN bindings b ON b.reference_id = r.id
+        JOIN test_files tf ON tf.path = r.file_path
+        WHERE b.resolved_symbol_id = s.id
+      )
+      OR EXISTS (
+        SELECT 1
+        FROM calls c2
+        JOIN test_files tf ON tf.path = c2.file_path
+        WHERE c2.callee_symbol_id = s.id
+          AND (c2.provenance IS NULL OR c2.provenance = 'ast')
+      )
+      THEN 85
+      WHEN EXISTS (
+        SELECT 1
+        FROM reachable_files rf
+        WHERE rf.file_path = s.file_path
+      )
+      THEN 40
+      ELSE 0
+    END AS estimated_pct
+  FROM symbols s
+  WHERE s.complexity IS NOT NULL
+    AND s.kind IN ('function', 'method')
+)
+SELECT estimated_pct, COUNT(*) AS symbol_count
+FROM symbol_tiers
+GROUP BY estimated_pct
+ORDER BY estimated_pct DESC
diff --git a/scripts/spike-crap-reachability.test.mjs b/scripts/spike-crap-reachability.test.mjs
@@ -0,0 +1,32 @@
+import { describe, expect, it } from "bun:test";
+/**
+ * Plan 2 slice 2.0 — locks reachability tier counts on fixtures/minimal.
+ * Run via `bun run test:scripts`.
+ */
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+import { $ } from "bun";
+
+const REPO_ROOT = join(import.meta.dir, "..");
+const SPIKE_SQL = readFileSync(
+  join(REPO_ROOT, "scripts/spike-crap-reachability.sql"),
+  "utf-8",
+);
+
+describe("spike-crap-reachability (fixtures/minimal)", () => {
+  it("assigns 85/40/0% tiers to 1/4/39 function-shaped symbols", async () => {
+    const result =
+      await $`bun src/index.ts query --json ${SPIKE_SQL} --root fixtures/minimal`
+        .cwd(REPO_ROOT)
+        .quiet();
+    expect(result.exitCode).toBe(0);
+    const rows = JSON.parse(result.stdout.toString());
+    const byTier = Object.fromEntries(
+      rows.map((r) => [r.estimated_pct, r.symbol_count]),
+    );
+    expect(byTier[85]).toBe(1);
+    expect(byTier[40]).toBe(4);
+    expect(byTier[0]).toBe(39);
+  });
+});
diff --git a/templates/recipes/high-complexity-untested.md b/templates/recipes/high-complexity-untested.md
@@ -22,6 +22,10 @@ McCabe formula: `1 + (decision points)`. Branching nodes counted by Codemap's pa
 
 Each row also includes **SonarSource cognitive complexity** for the same symbol (nesting-heavy control flow scores higher than flat branch chains). The recipe **filter** still uses cyclomatic `>= 10`; use `high-cognitive-complexity` when cognitive score alone is the gate.
 
+## Without `ingest-coverage`
+
+`COALESCE(coverage_pct, 0)` treats missing coverage as **0%**, so every high-complexity symbol appears undertested. Prefer **`high-crap-score`** when coverage is not ingested — it uses graph-estimated tiers (85/40/0%) from test reachability instead of assuming zero coverage.
+
 ## Why the joint signal
 
 - High complexity alone surfaces too many false positives — a heavily-branched config-loader or visitor pattern is fine if it's well-tested.
diff --git a/templates/recipes/high-crap-score.md b/templates/recipes/high-crap-score.md
@@ -0,0 +1,31 @@
+---
+params:
+  - name: min_crap
+    type: number
+    required: false
+    default: 30
+    description: Minimum CRAP score threshold (industry default 30)
+actions:
+  - type: review-crap-score
+    auto_fixable: false
+    description: "High CRAP (complex + undertested) — add tests or simplify before refactor. Check coverage_source: measured rows used ingested coverage; estimated rows use graph tiers only."
+---
+
+# high-crap-score
+
+Ranks symbols by **CRAP score** — `CC² × (1 - effective_coverage/100)³ + CC` where `CC = symbols.complexity`.
+
+**Coverage precedence:** ingested `coverage` rows win (`coverage_source: measured`). Otherwise graph-estimated tiers (`coverage_source: estimated`):
+
+| Tier    | When                                                                                          |
+| ------- | --------------------------------------------------------------------------------------------- |
+| **85%** | Symbol directly referenced from a test file (`bindings`-resolved `references` or AST `calls`) |
+| **40%** | Symbol's `file_path` is dependency-reachable from any test file                               |
+| **0%**  | Otherwise                                                                                     |
+
+Estimates are **heuristics**, not execution coverage — prefer `codemap ingest-coverage` before CI gates. Composes with `high-complexity-untested` (cyclomatic + measured-only today).
+
+```bash
+codemap query --recipe high-crap-score --json
+codemap query --recipe high-crap-score --params min_crap=15 --json
+```
diff --git a/templates/recipes/high-crap-score.sql b/templates/recipes/high-crap-score.sql
@@ -0,0 +1,112 @@
+-- CRAP score (complexity × undertest risk) with measured or graph-estimated coverage.
+-- Formula: CC² × (1 - effective_coverage/100)³ + CC  (CC = symbols.complexity).
+-- Without ingest-coverage, effective coverage uses static tiers from test reachability:
+-- 85% direct reference from a test file; 40% file dependency-reachable from tests; 0% otherwise.
+WITH RECURSIVE
+params(min_crap) AS (
+  SELECT COALESCE(?, 30)
+),
+test_files(path) AS (
+  SELECT DISTINCT f.path
+  FROM files f
+  WHERE EXISTS (
+      SELECT 1
+      FROM test_suites ts
+      WHERE ts.file_path = f.path
+    )
+    OR f.path GLOB '*.test.ts'
+    OR f.path GLOB '*.test.tsx'
+    OR f.path GLOB '*.spec.ts'
+    OR f.path GLOB '*.spec.tsx'
+    OR f.path GLOB '*.test.js'
+    OR f.path GLOB '*.spec.js'
+    OR f.path GLOB '*.test.jsx'
+    OR f.path GLOB '*.spec.jsx'
+),
+reachable_files(file_path, depth, visited) AS (
+  SELECT path, 0, char(30) || path || char(30)
+  FROM test_files
+  UNION ALL
+  SELECT
+    d.to_path,
+    rf.depth + 1,
+    rf.visited || d.to_path || char(30)
+  FROM dependencies d
+  JOIN reachable_files rf ON d.from_path = rf.file_path
+  WHERE rf.depth < 50
+    AND instr(rf.visited, char(30) || d.to_path || char(30)) = 0
+),
+effective AS (
+  SELECT
+    s.name,
+    s.kind,
+    s.file_path,
+    s.line_start,
+    s.line_end,
+    s.complexity,
+    ROUND(
+      COALESCE(
+        c.coverage_pct,
+        CASE
+          WHEN EXISTS (
+            SELECT 1
+            FROM "references" r
+            JOIN bindings b ON b.reference_id = r.id
+            JOIN test_files tf ON tf.path = r.file_path
+            WHERE b.resolved_symbol_id = s.id
+          )
+          OR EXISTS (
+            SELECT 1
+            FROM calls c2
+            JOIN test_files tf ON tf.path = c2.file_path
+            WHERE c2.callee_symbol_id = s.id
+              AND (c2.provenance IS NULL OR c2.provenance = 'ast')
+          )
+          THEN 85.0
+          WHEN EXISTS (
+            SELECT 1
+            FROM reachable_files rf
+            WHERE rf.file_path = s.file_path
+          )
+          THEN 40.0
+          ELSE 0.0
+        END
+      ),
+      1
+    ) AS effective_coverage_pct,
+    CASE
+      WHEN c.coverage_pct IS NOT NULL THEN 'measured'
+      ELSE 'estimated'
+    END AS coverage_source
+  FROM symbols s
+  LEFT JOIN coverage c
+    ON c.file_path = s.file_path
+   AND c.name = s.name
+   AND c.line_start = s.line_start
+  WHERE s.complexity IS NOT NULL
+),
+scored AS (
+  SELECT
+    e.*,
+    ROUND(
+      e.complexity * e.complexity * POWER(1 - e.effective_coverage_pct / 100.0, 3)
+      + e.complexity,
+      2
+    ) AS crap_score
+  FROM effective e
+)
+SELECT
+  s.name,
+  s.kind,
+  s.file_path,
+  s.line_start,
+  s.line_end,
+  s.complexity,
+  s.effective_coverage_pct,
+  s.coverage_source,
+  s.crap_score
+FROM scored s
+CROSS JOIN params p
+WHERE s.crap_score >= p.min_crap
+ORDER BY s.crap_score DESC, s.complexity DESC, s.file_path, s.name
+LIMIT 50

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +"@stainless-code/codemap": patch
 +---
++
 +Add `high-crap-score` recipe: CRAP ranking with measured coverage when ingested, or graph-estimated 85/40/0% tiers from test reachability otherwise.
Original file line number	Diff line number	Diff line change
@@ -91,4 +91,4 @@ Each PR: `harden-pr full` → merge. Do not batch plans 1–4 into one PR.
`91`	`91`
`92`	`92`	`## Current slice`
`93`	`93`
`94`		-Active: Plan 1 shipped in [PR #174](https://github.com/stainless-code/codemap/pull/174) (awaiting merge) — next: Plan 2 spike 2.0 (`graph-estimated-crap.md`).
	`94`	+Active: Plan 2 in flight on `feat/high-crap-score` — slices 2.0–2.3 (`graph-estimated-crap.md`); PR #C when complete.