andreinknv
diff --git a/‎__tests__/biomarkers.test.ts‎
Lines changed: 43 additions & 0 deletions b/‎__tests__/biomarkers.test.ts‎
Lines changed: 43 additions & 0 deletions
diff --git a/‎__tests__/churn.test.ts‎
Lines changed: 99 additions & 1 deletion b/‎__tests__/churn.test.ts‎
Lines changed: 99 additions & 1 deletion
diff --git a/‎__tests__/cli-mcp-alignment.test.ts‎
Lines changed: 100 additions & 0 deletions b/‎__tests__/cli-mcp-alignment.test.ts‎
Lines changed: 100 additions & 0 deletions
diff --git a/‎__tests__/dead-code-parser.test.ts‎
Lines changed: 42 additions & 2 deletions b/‎__tests__/dead-code-parser.test.ts‎
Lines changed: 42 additions & 2 deletions
diff --git a/‎__tests__/mcp-changed-since.test.ts‎
Lines changed: 21 additions & 5 deletions b/‎__tests__/mcp-changed-since.test.ts‎
Lines changed: 21 additions & 5 deletions
@@ -1186,4 +1186,47 @@ describe('codegraph_biomarkers: F-D — minCentrality empty hint', () => {
     // we're checking is that the "no symbol matched" path does NOT fire.
     expect(text).not.toMatch(/No symbol matched/);
   });
+
+  // audit Group 2 #6 — mode=symbol must NOT present a non-existent
+  // symbol as a clean "Code Health 10/10". `gnarlyTwo` is not a real
+  // symbol; the FTS fallback resolves it to `gnarlyOne`, so the
+  // response must carry a visible "Fuzzy fallback" banner.
+  it('mode=symbol surfaces a fuzzy-fallback banner for a non-existent name', async () => {
+    const result = await handler.execute('codegraph_biomarkers', {
+      mode: 'symbol',
+      symbol: 'gnarlyTwo',
+    });
+    const text = result.content[0]?.text ?? '';
+    // Either it didn't resolve at all (not-found) OR it resolved
+    // fuzzily — in which case the banner is mandatory. What it must
+    // NOT do is render a bare clean-health line with no signal.
+    const isNotFound = /not found/i.test(text);
+    const hasBanner = /Fuzzy fallback/.test(text);
+    expect(isNotFound || hasBanner).toBe(true);
+    if (hasBanner) expect(text).toMatch(/gnarlyTwo/);
+  });
+
+  it('mode=symbol does NOT add a fuzzy banner for an exact symbol', async () => {
+    const result = await handler.execute('codegraph_biomarkers', {
+      mode: 'symbol',
+      symbol: 'gnarlyOne',
+    });
+    const text = result.content[0]?.text ?? '';
+    expect(text).not.toMatch(/Fuzzy fallback/);
+  });
+
+  // Batched mode-symbol — a non-existent name in the batch must carry
+  // the banner on its own section, not a silent 10/10.
+  it('mode=symbol batched flags a fuzzy match without affecting real symbols', async () => {
+    const result = await handler.execute('codegraph_biomarkers', {
+      mode: 'symbol',
+      symbols: ['gnarlyOne', 'gnarlyTwo'],
+    });
+    const text = result.content[0]?.text ?? '';
+    // The real symbol's section carries no banner; the fuzzy one does
+    // (or it reported no match — both acceptable, never a silent 10/10).
+    const bannerCount = (text.match(/Fuzzy fallback/g) ?? []).length;
+    const noMatchCount = (text.match(/no symbol matched/g) ?? []).length;
+    expect(bannerCount + noMatchCount).toBeGreaterThanOrEqual(1);
+  });
 });
@@ -13,8 +13,14 @@ import {
 } from '../src/churn/index.js';
 import { getCurrentHeadSha as getGitHead } from '../src/git-utils.js';
 import { DatabaseConnection } from '../src/db/index.js';
-import { QueryBuilder } from '../src/db/queries.js';
+import { QueryBuilder, qbTransaction } from '../src/db/queries.js';
 import { applyChurnDeltas, clearChurn } from '../src/db/queries-history.js';
+import {
+  upsertFile,
+  getFileByPath,
+  removeFileFromIndex,
+  removeFileFromIndexInTx,
+} from '../src/db/queries-files.js';
 import { getMetadata, setMetadata } from '../src/db/queries-metadata.js';
 import { HOOK as ChurnHook } from '../src/index-hooks/churn.js';
 
@@ -369,3 +375,95 @@ describe.skipIf(!HAS_GIT)('churn hook self-heal on algo-version mismatch', () =>
     }
   });
 });
+
+/**
+ * Regression: re-extracting a file during `sync` must NOT wipe its
+ * mined churn columns.
+ *
+ * The sync re-extract path (`extraction-phases.ts`) evicts a changed
+ * file's prior extraction then re-`upsertFile`s the same path. Pre-fix
+ * the evict (`removeFileFromIndexInTx`) deleted the whole `files` row,
+ * so the follow-up upsert took its INSERT branch — where the
+ * churn-managed columns (`commit_count` / `first_seen_ts` /
+ * `last_touched_ts`) fall back to their schema defaults. Every sync
+ * that touched a file therefore reset that file's churn to 0; over a
+ * long-lived index every file decayed to 0 and `hotspots` went
+ * permanently empty. The fix: the re-extract evict deletes only the
+ * file's nodes, leaving the row for the upsert's ON CONFLICT UPDATE
+ * branch (which deliberately preserves churn). A genuine removal
+ * (`removeFileFromIndex`) still drops the row.
+ */
+describe('re-extract evict preserves mined churn columns', () => {
+  function setupDb(): { db: DatabaseConnection; q: QueryBuilder } {
+    const dbDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-churn-reextract-'));
+    const db = DatabaseConnection.initialize(path.join(dbDir, 'test.db'));
+    const q = new QueryBuilder(db.getDb());
+    return { db, q };
+  }
+
+  function seedFile(q: QueryBuilder, p: string): void {
+    upsertFile(q, {
+      path: p,
+      contentHash: 'hash-v1',
+      language: 'typescript',
+      size: 10,
+      modifiedAt: 1,
+      indexedAt: 1,
+      nodeCount: 0,
+    });
+  }
+
+  it('keeps commit_count / first_seen_ts / last_touched_ts across removeFileFromIndexInTx + upsertFile', () => {
+    const { db, q } = setupDb();
+    try {
+      seedFile(q, 'a.ts');
+      // Mining persisted real churn for the file.
+      applyChurnDeltas(q, [
+        { path: 'a.ts', commitCountDelta: 7, lastTouchedTs: 1764547200, firstSeenTs: 1735689600 },
+      ]);
+
+      // Simulate the sync re-extract path: evict the prior extraction,
+      // then re-upsert the same path with a fresh (post-edit) record.
+      qbTransaction(q, () => {
+        removeFileFromIndexInTx(q, 'a.ts');
+        upsertFile(q, {
+          path: 'a.ts',
+          contentHash: 'hash-v2',
+          language: 'typescript',
+          size: 20,
+          modifiedAt: 2,
+          indexedAt: 2,
+          nodeCount: 3,
+        });
+      });
+
+      const after = getFileByPath(q, 'a.ts');
+      expect(after).not.toBeNull();
+      // Non-churn columns reflect the re-extraction.
+      expect(after!.contentHash).toBe('hash-v2');
+      expect(after!.nodeCount).toBe(3);
+      // Churn columns survive untouched.
+      expect(after!.commitCount).toBe(7);
+      expect(after!.firstSeenTs).toBe(1735689600);
+      expect(after!.lastTouchedTs).toBe(1764547200);
+    } finally {
+      db.close();
+    }
+  });
+
+  it('removeFileFromIndex still fully drops the files row (genuine removal)', () => {
+    const { db, q } = setupDb();
+    try {
+      seedFile(q, 'gone.ts');
+      applyChurnDeltas(q, [
+        { path: 'gone.ts', commitCountDelta: 3, lastTouchedTs: 1764547200, firstSeenTs: 1735689600 },
+      ]);
+
+      removeFileFromIndex(q, 'gone.ts');
+
+      expect(getFileByPath(q, 'gone.ts')).toBeNull();
+    } finally {
+      db.close();
+    }
+  });
+});
@@ -22,10 +22,12 @@
  */
 
 import { describe, it, expect } from 'vitest';
+import { execFileSync } from 'child_process';
 import * as fs from 'fs';
 import * as path from 'path';
 import type { Command } from 'commander';
 import { getToolModules } from '../src/mcp/tools/registry.js';
+import { isReadOnlySql } from '../src/mcp/tools/sql.js';
 
 /**
  * Identifiers known to be intentionally one-sided — each entry
@@ -275,3 +277,101 @@ describe('CLI ↔ MCP surface alignment', () => {
     ).toEqual([]);
   });
 });
+
+/**
+ * Read-only gate on `codegraph_sql` — a value-setting PRAGMA
+ * (`PRAGMA user_version = 5`) is a write and must be rejected, while
+ * bare introspection PRAGMAs stay allowed. The original gate matched
+ * only the pragma name against the allowlist and ignored the trailing
+ * `= value` assignment, so allowlisted value-form pragmas
+ * (`user_version` / `schema_version` / `page_size`) were writable.
+ */
+describe('codegraph_sql read-only gate — value-PRAGMA rejection', () => {
+  it('rejects value-setting PRAGMAs even when the pragma name is allowlisted', () => {
+    expect(isReadOnlySql('PRAGMA user_version = 5')).toBe(false);
+    expect(isReadOnlySql('PRAGMA user_version=5')).toBe(false);
+    expect(isReadOnlySql('  pragma   schema_version  =  9 ')).toBe(false);
+    expect(isReadOnlySql('PRAGMA page_size = 4096')).toBe(false);
+    expect(isReadOnlySql('PRAGMA user_version = 5;')).toBe(false);
+  });
+
+  it('still allows bare introspection PRAGMAs and single quoted/identifier args', () => {
+    expect(isReadOnlySql('PRAGMA user_version')).toBe(true);
+    expect(isReadOnlySql('PRAGMA user_version;')).toBe(true);
+    expect(isReadOnlySql('PRAGMA table_info(nodes)')).toBe(true);
+    expect(isReadOnlySql("PRAGMA table_info('nodes')")).toBe(true);
+    expect(isReadOnlySql('PRAGMA integrity_check(20)')).toBe(true);
+  });
+
+  it('still rejects non-allowlisted pragma names and plain writes', () => {
+    expect(isReadOnlySql('PRAGMA cache_size = 99999')).toBe(false);
+    expect(isReadOnlySql('PRAGMA journal_mode = WAL')).toBe(false);
+    expect(isReadOnlySql('DELETE FROM nodes')).toBe(false);
+    expect(isReadOnlySql('SELECT * FROM nodes')).toBe(true);
+  });
+});
+
+/**
+ * CLI behaviour parity — spawns the real CLI (`tsx src/bin/codegraph.ts`)
+ * against this repo's own index. These guard four audited
+ * CLI-vs-MCP divergences:
+ *   - `sql` exits non-zero on a rejected / invalid query (scripts can
+ *     detect failure).
+ *   - `find --by name` exact mode returns the container + members set
+ *     the MCP tool returns, not a fuzzy relevance rank.
+ *   - `coverage <symbol>` positional selects symbol mode.
+ *   - `role` with no args produces the project-wide distribution table.
+ */
+describe('CLI behaviour parity (spawned)', () => {
+  const repoRoot = path.join(__dirname, '..');
+  const cliEntry = path.join(repoRoot, 'src', 'bin', 'codegraph.ts');
+  const indexed = fs.existsSync(path.join(repoRoot, '.codegraph'));
+
+  /** Run the CLI, returning stdout+stderr and the exit code. */
+  function runCli(cliArgs: string[]): { out: string; code: number } {
+    try {
+      const out = execFileSync('npx', ['tsx', cliEntry, ...cliArgs], {
+        cwd: repoRoot,
+        encoding: 'utf-8',
+        stdio: ['ignore', 'pipe', 'pipe'],
+      });
+      return { out, code: 0 };
+    } catch (err) {
+      const e = err as { status?: number; stdout?: string; stderr?: string };
+      return { out: (e.stdout ?? '') + (e.stderr ?? ''), code: e.status ?? 1 };
+    }
+  }
+
+  it.skipIf(!indexed)('sql exits non-zero on a rejected write query', () => {
+    const { code } = runCli(['sql', 'DELETE FROM nodes']);
+    expect(code).not.toBe(0);
+  }, 60_000);
+
+  it.skipIf(!indexed)('sql exits non-zero on an invalid (no such table) query', () => {
+    const { code } = runCli(['sql', 'SELECT * FROM no_such_table_xyz']);
+    expect(code).not.toBe(0);
+  }, 60_000);
+
+  it.skipIf(!indexed)('find --by name exact returns the container + its members', () => {
+    const { out, code } = runCli(['find', '--by', 'name', 'GraphTraverser']);
+    expect(code).toBe(0);
+    // MCP exact mode lists the class then its member methods, not a
+    // fuzzy relevance rank with `(NN%)` scores and unrelated imports.
+    expect(out).toContain('GraphTraverser (class)');
+    expect(out).toContain('traverseBFS (method)');
+    expect(out).not.toMatch(/\(\d+%\)/);
+  }, 60_000);
+
+  it.skipIf(!indexed)('coverage with a bare positional symbol selects symbol mode', () => {
+    const { out, code } = runCli(['coverage', 'computeMetrics']);
+    expect(code).toBe(0);
+    expect(out).toContain('Coverage for `computeMetrics`');
+    expect(out).not.toContain('lowest first');
+  }, 60_000);
+
+  it.skipIf(!indexed)('role with no args produces the project-wide distribution table', () => {
+    const { out, code } = runCli(['role']);
+    expect(code).toBe(0);
+    expect(out).toContain('Role distribution (project-wide)');
+  }, 60_000);
+});
@@ -13,7 +13,7 @@
  */
 
 import { describe, it, expect } from 'vitest';
-import { parseBatchJudges } from '../src/llm/dead-code.js';
+import { parseBatchJudges, truncateReason } from '../src/llm/dead-code.js';
 
 describe('parseBatchJudges', () => {
   it('parses a clean results document into a position→verdict map', () => {
@@ -46,7 +46,9 @@ describe('parseBatchJudges', () => {
   it('truncates a very long reason string', () => {
     const longReason = 'x'.repeat(500);
     const m = parseBatchJudges(`{"results":[{"i":0,"verdict":"dead","confidence":0.5,"reason":"${longReason}"}]}`, 1);
-    expect(m.get(0)!.reason.length).toBeLessThanOrEqual(200);
+    // Cap is 280 + a 1-char ellipsis; a single unbroken token is hard-sliced.
+    expect(m.get(0)!.reason.length).toBeLessThanOrEqual(281);
+    expect(m.get(0)!.reason.endsWith('…')).toBe(true);
   });
 
   it('drops an entry whose index is out of range', () => {
@@ -60,3 +62,41 @@ describe('parseBatchJudges', () => {
     expect(parseBatchJudges('[{"i":0,"verdict":"dead","confidence":0.5,"reason":"x"}]', 1).size).toBe(0); // bare array, not object-rooted
   });
 });
+
+describe('truncateReason', () => {
+  it('leaves a reason within the cap untouched', () => {
+    expect(truncateReason('no callers, not a framework hook')).toBe('no callers, not a framework hook');
+  });
+
+  it('trims on a word boundary, never mid-word', () => {
+    // A realistic over-cap rationale built from whole words.
+    const reason = ('not reachable through any dynamic dispatch or framework hook '
+      + 'and not part of the public API surface ').repeat(4).trim();
+    const out = truncateReason(reason);
+    expect(out.length).toBeLessThanOrEqual(281);
+    expect(out.endsWith('…')).toBe(true);
+    // The character before the ellipsis is a whole word — not a slice
+    // through one. Reconstruct the head and confirm it is a prefix of
+    // the original at a space boundary.
+    const head = out.slice(0, -1);
+    expect(reason.startsWith(head)).toBe(true);
+    expect(reason[head.length]).toBe(' '); // cut landed on whitespace
+  });
+
+  it('does not produce the mid-word truncation from the audit repro', () => {
+    // The pre-fix bug sliced "...used internally by other parts of the
+    // codebase" to "...the codebas". The word-boundary trim must never
+    // leave a partial trailing word. This reason is long enough (>280
+    // chars) to actually trip the cap.
+    const reason = 'The function lineHasBuildContextHint is used within the project, '
+      + 'as indicated by its presence in the static reference graph. It is not marked '
+      + 'as exported, but it is likely used internally by other parts of the codebase, '
+      + 'and removing it would probably break something somewhere down the line too.';
+    const out = truncateReason(reason);
+    expect(out.endsWith('…')).toBe(true);
+    expect(out).not.toMatch(/codebas…$/); // not sliced mid-word
+    const head = out.slice(0, -1);
+    expect(reason.startsWith(head)).toBe(true);
+    expect(reason[head.length]).toBe(' ');
+  });
+});
@@ -105,11 +105,27 @@ describe('codegraph_changed_since (#11a)', () => {
     const result = await handler.execute('codegraph_changed_since', { since: old });
     const text = result.content[0]?.text ?? '';
     expect(text).toMatch(/Comparing on-disk file mtime against `since` threshold/);
-    // Bucket header renamed "Modified" → "Content-changed" across both
-    // the index-comparison path AND the explicit-`since` path so the
-    // section semantic is consistent regardless of which threshold
-    // mode the caller used.
-    expect(text).toMatch(/### Content-changed \(\d+\)/);
+    // audit Group 2 #3: the `since` path compares wall-clock mtime, NOT
+    // content-hash — an mtime bump after a checkout is not a content
+    // change. The bucket must be labelled by what it actually measures,
+    // so the explicit-`since` section is "Modified after threshold (by
+    // mtime)", distinct from the content-hash "Content-changed" bucket
+    // of the no-arg path. The two paths must NOT share a label.
+    expect(text).toMatch(/### Modified after threshold \(by mtime\) \(\d+\)/);
+    expect(text).not.toMatch(/### Content-changed/);
+  });
+
+  it("no-arg path keeps the content-hash 'Content-changed' bucket label", async () => {
+    // Guard the other side of the audit Group 2 #3 fix: the no-arg path
+    // genuinely compares SHA256 against `files.content_hash`, so its
+    // bucket correctly says "Content-changed".
+    fs.writeFileSync(path.join(dir, 'src', 'a.ts'), 'export function a() { return 999; }\n');
+    const future = Math.floor(Date.now() / 1000) + 60;
+    fs.utimesSync(path.join(dir, 'src', 'a.ts'), future, future);
+    const result = await handler.execute('codegraph_changed_since', {});
+    const text = result.content[0]?.text ?? '';
+    expect(text).toMatch(/### Content-changed \(1\)/);
+    expect(text).not.toMatch(/Modified after threshold/);
   });
 
   it("explicit `since` (numeric-string unix ms) works the same", async () => {