Skip to content

Commit 38887ee

Browse files
andreinknvclaude
andcommitted
feat: PR colbymchenry#112 (centrality + churn + hotspots) on top of refactors
Lands centrality (PageRank) and churn (git history) as registered IndexHooks (`afterIndexAll` + `afterSync`) instead of CodeGraph private methods. Adds: - Migration 004: nodes.centrality + files.{commit_count,loc, first_seen_ts,last_touched_ts} + indexes - src/centrality/ + src/churn/ (pure modules) - src/index-hooks/centrality.ts + churn.ts (registered hooks) - CodeGraph public methods: getCentrality, getTopCentralNodes, getCentralityRank, getFileChurn, getHotspots - codegraph_hotspots MCP tool wired through ToolModule registry + handleHotspots on ToolHandler - Updated regression-guard tests (index-hooks, mcp-tool-registry) to reflect newly registered hooks/tools Tests: 440/440 pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent d807e23 commit 38887ee

23 files changed

Lines changed: 1294 additions & 15 deletions

__tests__/centrality.test.ts

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
import { describe, it, expect } from 'vitest';
2+
import { computePageRank, PR_DAMPING, PR_ITERATIONS } from '../src/centrality';
3+
4+
function asNodes(ids: string[]) {
5+
return ids.map((id) => ({ id }));
6+
}
7+
8+
describe('computePageRank', () => {
9+
it('returns empty result for an empty graph', () => {
10+
const r = computePageRank([], []);
11+
expect(r.scores.size).toBe(0);
12+
expect(r.iterations).toBe(0);
13+
});
14+
15+
it('assigns uniform rank to N isolated nodes', () => {
16+
const r = computePageRank(asNodes(['a', 'b', 'c', 'd']), []);
17+
expect(r.scores.size).toBe(4);
18+
// 4 isolated nodes — all dangling — should each end up with 1/N.
19+
for (const v of r.scores.values()) {
20+
expect(v).toBeCloseTo(0.25, 6);
21+
}
22+
});
23+
24+
it('rewards being reached (sinks accumulate rank)', () => {
25+
// a -> b -> c. c has no outgoing, so it accumulates the most.
26+
const r = computePageRank(
27+
asNodes(['a', 'b', 'c']),
28+
[
29+
{ source: 'a', target: 'b' },
30+
{ source: 'b', target: 'c' },
31+
]
32+
);
33+
const a = r.scores.get('a')!;
34+
const b = r.scores.get('b')!;
35+
const c = r.scores.get('c')!;
36+
expect(c).toBeGreaterThan(b);
37+
expect(b).toBeGreaterThan(a);
38+
});
39+
40+
it('star: hub ranks above all leaves; leaves are equal', () => {
41+
const leaves = ['l1', 'l2', 'l3', 'l4', 'l5', 'l6', 'l7', 'l8', 'l9'];
42+
const edges = leaves.map((l) => ({ source: l, target: 'hub' }));
43+
const r = computePageRank(asNodes([...leaves, 'hub']), edges);
44+
const hub = r.scores.get('hub')!;
45+
for (const l of leaves) {
46+
const lv = r.scores.get(l)!;
47+
expect(hub).toBeGreaterThan(lv);
48+
}
49+
// Leaves are symmetric — should be within 1e-9.
50+
const first = r.scores.get(leaves[0])!;
51+
for (const l of leaves.slice(1)) {
52+
expect(r.scores.get(l)!).toBeCloseTo(first, 9);
53+
}
54+
});
55+
56+
it('cycle: all nodes have approximately equal rank', () => {
57+
const r = computePageRank(
58+
asNodes(['a', 'b', 'c']),
59+
[
60+
{ source: 'a', target: 'b' },
61+
{ source: 'b', target: 'c' },
62+
{ source: 'c', target: 'a' },
63+
]
64+
);
65+
const a = r.scores.get('a')!;
66+
const b = r.scores.get('b')!;
67+
const c = r.scores.get('c')!;
68+
// Symmetric → all equal at convergence.
69+
expect(a).toBeCloseTo(b, 6);
70+
expect(b).toBeCloseTo(c, 6);
71+
});
72+
73+
it('total rank sums to ~1 (mass is conserved)', () => {
74+
const r = computePageRank(
75+
asNodes(['a', 'b', 'c', 'd', 'e']),
76+
[
77+
{ source: 'a', target: 'b' },
78+
{ source: 'b', target: 'c' },
79+
{ source: 'd', target: 'c' },
80+
{ source: 'e', target: 'd' },
81+
{ source: 'a', target: 'e' },
82+
]
83+
);
84+
let sum = 0;
85+
for (const v of r.scores.values()) sum += v;
86+
expect(sum).toBeCloseTo(1, 6);
87+
});
88+
89+
it('preserves mass across two disconnected components', () => {
90+
const r = computePageRank(
91+
asNodes(['a', 'b', 'c', 'd']),
92+
[
93+
{ source: 'a', target: 'b' },
94+
{ source: 'c', target: 'd' },
95+
]
96+
);
97+
let sum = 0;
98+
for (const v of r.scores.values()) sum += v;
99+
expect(sum).toBeCloseTo(1, 6);
100+
// Within each component, the sink ranks above the source.
101+
expect(r.scores.get('b')!).toBeGreaterThan(r.scores.get('a')!);
102+
expect(r.scores.get('d')!).toBeGreaterThan(r.scores.get('c')!);
103+
});
104+
105+
it('drops edges referencing unknown nodes', () => {
106+
// 'ghost' is not in the node set — that edge should be ignored,
107+
// not crash and not pollute scores.
108+
const r = computePageRank(
109+
asNodes(['a', 'b']),
110+
[
111+
{ source: 'a', target: 'b' },
112+
{ source: 'a', target: 'ghost' },
113+
{ source: 'ghost', target: 'b' },
114+
]
115+
);
116+
expect(r.scores.size).toBe(2);
117+
expect(r.scores.get('b')!).toBeGreaterThan(r.scores.get('a')!);
118+
let sum = 0;
119+
for (const v of r.scores.values()) sum += v;
120+
expect(sum).toBeCloseTo(1, 6);
121+
});
122+
123+
it('reports iteration count and duration', () => {
124+
const r = computePageRank(asNodes(['a', 'b']), [{ source: 'a', target: 'b' }]);
125+
expect(r.iterations).toBe(PR_ITERATIONS);
126+
expect(r.durationMs).toBeGreaterThanOrEqual(0);
127+
});
128+
129+
it('damping constant is the textbook 0.85', () => {
130+
// Sentinel — protects against accidental tuning that would invalidate
131+
// the spike findings the PR was justified on.
132+
expect(PR_DAMPING).toBe(0.85);
133+
});
134+
});

__tests__/churn.test.ts

Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
2+
import * as fs from 'fs';
3+
import * as os from 'os';
4+
import * as path from 'path';
5+
import { execFileSync } from 'child_process';
6+
import {
7+
mineChurn,
8+
getGitHead,
9+
readFileLoc,
10+
MAX_FILES_PER_COMMIT,
11+
LAST_MINED_CHURN_HEAD_KEY,
12+
} from '../src/churn';
13+
14+
let HAS_GIT = true;
15+
try {
16+
execFileSync('git', ['--version'], { stdio: 'ignore' });
17+
} catch {
18+
HAS_GIT = false;
19+
}
20+
21+
let tempDir: string;
22+
23+
function git(...args: string[]): string {
24+
return execFileSync('git', args, {
25+
cwd: tempDir,
26+
encoding: 'utf-8',
27+
env: {
28+
...process.env,
29+
GIT_AUTHOR_NAME: 'Test',
30+
GIT_AUTHOR_EMAIL: 'test@example.com',
31+
GIT_COMMITTER_NAME: 'Test',
32+
GIT_COMMITTER_EMAIL: 'test@example.com',
33+
GIT_AUTHOR_DATE: process.env.GIT_AUTHOR_DATE,
34+
GIT_COMMITTER_DATE: process.env.GIT_COMMITTER_DATE,
35+
},
36+
stdio: ['pipe', 'pipe', 'pipe'],
37+
}).trim();
38+
}
39+
40+
function commitAt(date: string, paths: string[], content?: string) {
41+
for (const p of paths) {
42+
const abs = path.join(tempDir, p);
43+
fs.mkdirSync(path.dirname(abs), { recursive: true });
44+
fs.writeFileSync(abs, content ?? `data for ${p} at ${date}\n`);
45+
}
46+
git('add', ...paths);
47+
// Pin both author and committer dates so timestamps are deterministic.
48+
process.env.GIT_AUTHOR_DATE = date;
49+
process.env.GIT_COMMITTER_DATE = date;
50+
git('commit', '-m', `commit at ${date}`);
51+
delete process.env.GIT_AUTHOR_DATE;
52+
delete process.env.GIT_COMMITTER_DATE;
53+
}
54+
55+
beforeEach(() => {
56+
tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-churn-'));
57+
if (HAS_GIT) {
58+
git('init', '-q', '-b', 'main');
59+
git('config', 'commit.gpgsign', 'false');
60+
}
61+
});
62+
63+
afterEach(() => {
64+
delete process.env.GIT_AUTHOR_DATE;
65+
delete process.env.GIT_COMMITTER_DATE;
66+
fs.rmSync(tempDir, { recursive: true, force: true });
67+
});
68+
69+
describe.skipIf(!HAS_GIT)('mineChurn', () => {
70+
it('returns empty + null head when not in a git repo', () => {
71+
const nonGit = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-nogit-'));
72+
try {
73+
const r = mineChurn(nonGit, new Set(['foo.ts']), null);
74+
expect(r.currentHead).toBeNull();
75+
expect(r.deltas.size).toBe(0);
76+
expect(r.needsFullRescan).toBe(false);
77+
} finally {
78+
fs.rmSync(nonGit, { recursive: true, force: true });
79+
}
80+
});
81+
82+
it('counts commits per indexed file, ignores files not in index', () => {
83+
commitAt('2025-01-01T00:00:00', ['a.ts', 'b.ts']);
84+
commitAt('2025-01-02T00:00:00', ['a.ts']);
85+
commitAt('2025-01-03T00:00:00', ['a.ts', 'b.ts', 'c.ts']);
86+
87+
const r = mineChurn(tempDir, new Set(['a.ts', 'b.ts']), null);
88+
expect(r.deltas.get('a.ts')?.commitCountDelta).toBe(3);
89+
expect(r.deltas.get('b.ts')?.commitCountDelta).toBe(2);
90+
expect(r.deltas.has('c.ts')).toBe(false);
91+
});
92+
93+
it('records first-seen / last-touched as min/max of commit timestamps', () => {
94+
commitAt('2025-01-01T00:00:00Z', ['a.ts']);
95+
commitAt('2025-06-01T00:00:00Z', ['a.ts']);
96+
commitAt('2025-12-01T00:00:00Z', ['a.ts']);
97+
98+
const r = mineChurn(tempDir, new Set(['a.ts']), null);
99+
const d = r.deltas.get('a.ts')!;
100+
// 2025-01-01 UTC = 1735689600
101+
expect(d.firstSeenTs).toBe(1735689600);
102+
// 2025-12-01 UTC = 1764547200
103+
expect(d.lastTouchedTs).toBe(1764547200);
104+
});
105+
106+
it('skips commits touching more than MAX_FILES_PER_COMMIT files', () => {
107+
const bigBatch: string[] = [];
108+
for (let i = 0; i < MAX_FILES_PER_COMMIT + 1; i++) bigBatch.push(`f${i}.ts`);
109+
commitAt('2025-01-01T00:00:00Z', bigBatch);
110+
// Then a normal commit on one of the same files.
111+
commitAt('2025-02-01T00:00:00Z', ['f0.ts']);
112+
113+
const r = mineChurn(tempDir, new Set(bigBatch), null);
114+
// First commit was skipped; only the second one should count.
115+
expect(r.deltas.get('f0.ts')?.commitCountDelta).toBe(1);
116+
// Files only seen in the skipped commit produce no delta at all.
117+
expect(r.deltas.has('f50.ts')).toBe(false);
118+
});
119+
120+
it('incremental mining returns only commits since the given sha', () => {
121+
commitAt('2025-01-01T00:00:00Z', ['a.ts']);
122+
const sha1 = getGitHead(tempDir)!;
123+
commitAt('2025-01-02T00:00:00Z', ['a.ts']);
124+
commitAt('2025-01-03T00:00:00Z', ['a.ts']);
125+
126+
const incr = mineChurn(tempDir, new Set(['a.ts']), sha1);
127+
// Only the two commits *after* sha1 should be counted.
128+
expect(incr.deltas.get('a.ts')?.commitCountDelta).toBe(2);
129+
expect(incr.needsFullRescan).toBe(false);
130+
});
131+
132+
it('returns needsFullRescan=true when sinceSha is unreachable', () => {
133+
commitAt('2025-01-01T00:00:00Z', ['a.ts']);
134+
const fakeSha = '0'.repeat(40);
135+
const r = mineChurn(tempDir, new Set(['a.ts']), fakeSha);
136+
expect(r.needsFullRescan).toBe(true);
137+
expect(r.deltas.size).toBe(0);
138+
expect(r.currentHead).not.toBeNull();
139+
});
140+
141+
it('returns empty deltas when sinceSha equals current head (no-op)', () => {
142+
commitAt('2025-01-01T00:00:00Z', ['a.ts']);
143+
const head = getGitHead(tempDir)!;
144+
const r = mineChurn(tempDir, new Set(['a.ts']), head);
145+
expect(r.currentHead).toBe(head);
146+
expect(r.deltas.size).toBe(0);
147+
expect(r.needsFullRescan).toBe(false);
148+
});
149+
150+
it('handles paths with spaces and unicode safely (NUL-delimited)', () => {
151+
commitAt('2025-01-01T00:00:00Z', ['name with space.ts']);
152+
commitAt('2025-01-02T00:00:00Z', ['ünïcødë.ts']);
153+
154+
const r = mineChurn(
155+
tempDir,
156+
new Set(['name with space.ts', 'ünïcødë.ts']),
157+
null
158+
);
159+
expect(r.deltas.get('name with space.ts')?.commitCountDelta).toBe(1);
160+
expect(r.deltas.get('ünïcødë.ts')?.commitCountDelta).toBe(1);
161+
});
162+
163+
it('LAST_MINED_CHURN_HEAD_KEY is stable (used as project_metadata key)', () => {
164+
expect(LAST_MINED_CHURN_HEAD_KEY).toBe('last_mined_churn_head');
165+
});
166+
});
167+
168+
describe('readFileLoc', () => {
169+
it('returns 0 for an empty file', () => {
170+
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-loc-'));
171+
try {
172+
const f = path.join(dir, 'empty.txt');
173+
fs.writeFileSync(f, '');
174+
expect(readFileLoc(dir, 'empty.txt')).toBe(0);
175+
} finally {
176+
fs.rmSync(dir, { recursive: true, force: true });
177+
}
178+
});
179+
180+
it('counts newline-terminated lines', () => {
181+
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-loc-'));
182+
try {
183+
fs.writeFileSync(path.join(dir, 'x.txt'), 'a\nb\nc\n');
184+
expect(readFileLoc(dir, 'x.txt')).toBe(3);
185+
} finally {
186+
fs.rmSync(dir, { recursive: true, force: true });
187+
}
188+
});
189+
190+
it('counts a final no-newline chunk as one extra line', () => {
191+
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-loc-'));
192+
try {
193+
fs.writeFileSync(path.join(dir, 'x.txt'), 'a\nb\nc');
194+
expect(readFileLoc(dir, 'x.txt')).toBe(3);
195+
} finally {
196+
fs.rmSync(dir, { recursive: true, force: true });
197+
}
198+
});
199+
200+
it('returns 0 for a missing file (does not throw)', () => {
201+
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-loc-'));
202+
try {
203+
expect(readFileLoc(dir, 'no-such-file.txt')).toBe(0);
204+
} finally {
205+
fs.rmSync(dir, { recursive: true, force: true });
206+
}
207+
});
208+
});

__tests__/foundation.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ describe('Database Connection', () => {
305305

306306
const version = db.getSchemaVersion();
307307
expect(version).not.toBeNull();
308-
expect(version?.version).toBe(3);
308+
expect(version?.version).toBe(4);
309309

310310
db.close();
311311
});

__tests__/index-hooks.test.ts

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,18 +42,39 @@ const fakeSyncResult: SyncResult = {
4242
};
4343

4444
describe('index-hooks registry — runner', () => {
45-
it('main ships with no registered hooks', () => {
46-
expect(getRegisteredHooks().length).toBe(0);
45+
it('registered hooks expose stable {name, afterIndexAll|afterSync} shape', () => {
46+
const hooks = getRegisteredHooks();
47+
expect(hooks.length).toBeGreaterThanOrEqual(0);
48+
for (const h of hooks) {
49+
expect(typeof h.name).toBe('string');
50+
expect(h.afterIndexAll === undefined || typeof h.afterIndexAll === 'function').toBe(true);
51+
expect(h.afterSync === undefined || typeof h.afterSync === 'function').toBe(true);
52+
}
4753
});
4854

49-
it('runAfterIndexAll on an empty registry returns an empty outcome list', async () => {
55+
it('runAfterIndexAll returns one outcome per registered hook, swallowing per-hook errors', async () => {
56+
// Registered hooks will throw on the fake `{} as any` ctx; the
57+
// runner contract is to catch + report each error so one bad
58+
// hook never fails the whole pass.
5059
const outcomes = await runAfterIndexAll(makeFakeContext());
51-
expect(outcomes).toEqual([]);
60+
const expectedCount = getRegisteredHooks().filter((h) => h.afterIndexAll).length;
61+
expect(outcomes.length).toBe(expectedCount);
62+
for (const o of outcomes) {
63+
expect(typeof o.name).toBe('string');
64+
expect(o.phase).toBe('indexAll');
65+
expect(typeof o.durationMs).toBe('number');
66+
}
5267
});
5368

54-
it('runAfterSync on an empty registry returns an empty outcome list', async () => {
69+
it('runAfterSync returns one outcome per registered hook, swallowing per-hook errors', async () => {
5570
const outcomes = await runAfterSync(makeFakeContext(), fakeSyncResult);
56-
expect(outcomes).toEqual([]);
71+
const expectedCount = getRegisteredHooks().filter((h) => h.afterSync).length;
72+
expect(outcomes.length).toBe(expectedCount);
73+
for (const o of outcomes) {
74+
expect(typeof o.name).toBe('string');
75+
expect(o.phase).toBe('sync');
76+
expect(typeof o.durationMs).toBe('number');
77+
}
5778
});
5879
});
5980

0 commit comments

Comments
 (0)