|
| 1 | +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; |
| 2 | +import * as fs from 'fs'; |
| 3 | +import * as os from 'os'; |
| 4 | +import * as path from 'path'; |
| 5 | +import { execFileSync } from 'child_process'; |
| 6 | +import { |
| 7 | + mineChurn, |
| 8 | + getGitHead, |
| 9 | + readFileLoc, |
| 10 | + MAX_FILES_PER_COMMIT, |
| 11 | + LAST_MINED_CHURN_HEAD_KEY, |
| 12 | +} from '../src/churn'; |
| 13 | + |
| 14 | +let HAS_GIT = true; |
| 15 | +try { |
| 16 | + execFileSync('git', ['--version'], { stdio: 'ignore' }); |
| 17 | +} catch { |
| 18 | + HAS_GIT = false; |
| 19 | +} |
| 20 | + |
| 21 | +let tempDir: string; |
| 22 | + |
| 23 | +function git(...args: string[]): string { |
| 24 | + return execFileSync('git', args, { |
| 25 | + cwd: tempDir, |
| 26 | + encoding: 'utf-8', |
| 27 | + env: { |
| 28 | + ...process.env, |
| 29 | + GIT_AUTHOR_NAME: 'Test', |
| 30 | + GIT_AUTHOR_EMAIL: 'test@example.com', |
| 31 | + GIT_COMMITTER_NAME: 'Test', |
| 32 | + GIT_COMMITTER_EMAIL: 'test@example.com', |
| 33 | + GIT_AUTHOR_DATE: process.env.GIT_AUTHOR_DATE, |
| 34 | + GIT_COMMITTER_DATE: process.env.GIT_COMMITTER_DATE, |
| 35 | + }, |
| 36 | + stdio: ['pipe', 'pipe', 'pipe'], |
| 37 | + }).trim(); |
| 38 | +} |
| 39 | + |
| 40 | +function commitAt(date: string, paths: string[], content?: string) { |
| 41 | + for (const p of paths) { |
| 42 | + const abs = path.join(tempDir, p); |
| 43 | + fs.mkdirSync(path.dirname(abs), { recursive: true }); |
| 44 | + fs.writeFileSync(abs, content ?? `data for ${p} at ${date}\n`); |
| 45 | + } |
| 46 | + git('add', ...paths); |
| 47 | + // Pin both author and committer dates so timestamps are deterministic. |
| 48 | + process.env.GIT_AUTHOR_DATE = date; |
| 49 | + process.env.GIT_COMMITTER_DATE = date; |
| 50 | + git('commit', '-m', `commit at ${date}`); |
| 51 | + delete process.env.GIT_AUTHOR_DATE; |
| 52 | + delete process.env.GIT_COMMITTER_DATE; |
| 53 | +} |
| 54 | + |
| 55 | +beforeEach(() => { |
| 56 | + tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-churn-')); |
| 57 | + if (HAS_GIT) { |
| 58 | + git('init', '-q', '-b', 'main'); |
| 59 | + git('config', 'commit.gpgsign', 'false'); |
| 60 | + } |
| 61 | +}); |
| 62 | + |
| 63 | +afterEach(() => { |
| 64 | + delete process.env.GIT_AUTHOR_DATE; |
| 65 | + delete process.env.GIT_COMMITTER_DATE; |
| 66 | + fs.rmSync(tempDir, { recursive: true, force: true }); |
| 67 | +}); |
| 68 | + |
| 69 | +describe.skipIf(!HAS_GIT)('mineChurn', () => { |
| 70 | + it('returns empty + null head when not in a git repo', () => { |
| 71 | + const nonGit = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-nogit-')); |
| 72 | + try { |
| 73 | + const r = mineChurn(nonGit, new Set(['foo.ts']), null); |
| 74 | + expect(r.currentHead).toBeNull(); |
| 75 | + expect(r.deltas.size).toBe(0); |
| 76 | + expect(r.needsFullRescan).toBe(false); |
| 77 | + } finally { |
| 78 | + fs.rmSync(nonGit, { recursive: true, force: true }); |
| 79 | + } |
| 80 | + }); |
| 81 | + |
| 82 | + it('counts commits per indexed file, ignores files not in index', () => { |
| 83 | + commitAt('2025-01-01T00:00:00', ['a.ts', 'b.ts']); |
| 84 | + commitAt('2025-01-02T00:00:00', ['a.ts']); |
| 85 | + commitAt('2025-01-03T00:00:00', ['a.ts', 'b.ts', 'c.ts']); |
| 86 | + |
| 87 | + const r = mineChurn(tempDir, new Set(['a.ts', 'b.ts']), null); |
| 88 | + expect(r.deltas.get('a.ts')?.commitCountDelta).toBe(3); |
| 89 | + expect(r.deltas.get('b.ts')?.commitCountDelta).toBe(2); |
| 90 | + expect(r.deltas.has('c.ts')).toBe(false); |
| 91 | + }); |
| 92 | + |
| 93 | + it('records first-seen / last-touched as min/max of commit timestamps', () => { |
| 94 | + commitAt('2025-01-01T00:00:00Z', ['a.ts']); |
| 95 | + commitAt('2025-06-01T00:00:00Z', ['a.ts']); |
| 96 | + commitAt('2025-12-01T00:00:00Z', ['a.ts']); |
| 97 | + |
| 98 | + const r = mineChurn(tempDir, new Set(['a.ts']), null); |
| 99 | + const d = r.deltas.get('a.ts')!; |
| 100 | + // 2025-01-01 UTC = 1735689600 |
| 101 | + expect(d.firstSeenTs).toBe(1735689600); |
| 102 | + // 2025-12-01 UTC = 1764547200 |
| 103 | + expect(d.lastTouchedTs).toBe(1764547200); |
| 104 | + }); |
| 105 | + |
| 106 | + it('skips commits touching more than MAX_FILES_PER_COMMIT files', () => { |
| 107 | + const bigBatch: string[] = []; |
| 108 | + for (let i = 0; i < MAX_FILES_PER_COMMIT + 1; i++) bigBatch.push(`f${i}.ts`); |
| 109 | + commitAt('2025-01-01T00:00:00Z', bigBatch); |
| 110 | + // Then a normal commit on one of the same files. |
| 111 | + commitAt('2025-02-01T00:00:00Z', ['f0.ts']); |
| 112 | + |
| 113 | + const r = mineChurn(tempDir, new Set(bigBatch), null); |
| 114 | + // First commit was skipped; only the second one should count. |
| 115 | + expect(r.deltas.get('f0.ts')?.commitCountDelta).toBe(1); |
| 116 | + // Files only seen in the skipped commit produce no delta at all. |
| 117 | + expect(r.deltas.has('f50.ts')).toBe(false); |
| 118 | + }); |
| 119 | + |
| 120 | + it('incremental mining returns only commits since the given sha', () => { |
| 121 | + commitAt('2025-01-01T00:00:00Z', ['a.ts']); |
| 122 | + const sha1 = getGitHead(tempDir)!; |
| 123 | + commitAt('2025-01-02T00:00:00Z', ['a.ts']); |
| 124 | + commitAt('2025-01-03T00:00:00Z', ['a.ts']); |
| 125 | + |
| 126 | + const incr = mineChurn(tempDir, new Set(['a.ts']), sha1); |
| 127 | + // Only the two commits *after* sha1 should be counted. |
| 128 | + expect(incr.deltas.get('a.ts')?.commitCountDelta).toBe(2); |
| 129 | + expect(incr.needsFullRescan).toBe(false); |
| 130 | + }); |
| 131 | + |
| 132 | + it('returns needsFullRescan=true when sinceSha is unreachable', () => { |
| 133 | + commitAt('2025-01-01T00:00:00Z', ['a.ts']); |
| 134 | + const fakeSha = '0'.repeat(40); |
| 135 | + const r = mineChurn(tempDir, new Set(['a.ts']), fakeSha); |
| 136 | + expect(r.needsFullRescan).toBe(true); |
| 137 | + expect(r.deltas.size).toBe(0); |
| 138 | + expect(r.currentHead).not.toBeNull(); |
| 139 | + }); |
| 140 | + |
| 141 | + it('returns empty deltas when sinceSha equals current head (no-op)', () => { |
| 142 | + commitAt('2025-01-01T00:00:00Z', ['a.ts']); |
| 143 | + const head = getGitHead(tempDir)!; |
| 144 | + const r = mineChurn(tempDir, new Set(['a.ts']), head); |
| 145 | + expect(r.currentHead).toBe(head); |
| 146 | + expect(r.deltas.size).toBe(0); |
| 147 | + expect(r.needsFullRescan).toBe(false); |
| 148 | + }); |
| 149 | + |
| 150 | + it('handles paths with spaces and unicode safely (NUL-delimited)', () => { |
| 151 | + commitAt('2025-01-01T00:00:00Z', ['name with space.ts']); |
| 152 | + commitAt('2025-01-02T00:00:00Z', ['ünïcødë.ts']); |
| 153 | + |
| 154 | + const r = mineChurn( |
| 155 | + tempDir, |
| 156 | + new Set(['name with space.ts', 'ünïcødë.ts']), |
| 157 | + null |
| 158 | + ); |
| 159 | + expect(r.deltas.get('name with space.ts')?.commitCountDelta).toBe(1); |
| 160 | + expect(r.deltas.get('ünïcødë.ts')?.commitCountDelta).toBe(1); |
| 161 | + }); |
| 162 | + |
| 163 | + it('LAST_MINED_CHURN_HEAD_KEY is stable (used as project_metadata key)', () => { |
| 164 | + expect(LAST_MINED_CHURN_HEAD_KEY).toBe('last_mined_churn_head'); |
| 165 | + }); |
| 166 | +}); |
| 167 | + |
| 168 | +describe('readFileLoc', () => { |
| 169 | + it('returns 0 for an empty file', () => { |
| 170 | + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-loc-')); |
| 171 | + try { |
| 172 | + const f = path.join(dir, 'empty.txt'); |
| 173 | + fs.writeFileSync(f, ''); |
| 174 | + expect(readFileLoc(dir, 'empty.txt')).toBe(0); |
| 175 | + } finally { |
| 176 | + fs.rmSync(dir, { recursive: true, force: true }); |
| 177 | + } |
| 178 | + }); |
| 179 | + |
| 180 | + it('counts newline-terminated lines', () => { |
| 181 | + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-loc-')); |
| 182 | + try { |
| 183 | + fs.writeFileSync(path.join(dir, 'x.txt'), 'a\nb\nc\n'); |
| 184 | + expect(readFileLoc(dir, 'x.txt')).toBe(3); |
| 185 | + } finally { |
| 186 | + fs.rmSync(dir, { recursive: true, force: true }); |
| 187 | + } |
| 188 | + }); |
| 189 | + |
| 190 | + it('counts a final no-newline chunk as one extra line', () => { |
| 191 | + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-loc-')); |
| 192 | + try { |
| 193 | + fs.writeFileSync(path.join(dir, 'x.txt'), 'a\nb\nc'); |
| 194 | + expect(readFileLoc(dir, 'x.txt')).toBe(3); |
| 195 | + } finally { |
| 196 | + fs.rmSync(dir, { recursive: true, force: true }); |
| 197 | + } |
| 198 | + }); |
| 199 | + |
| 200 | + it('returns 0 for a missing file (does not throw)', () => { |
| 201 | + const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-loc-')); |
| 202 | + try { |
| 203 | + expect(readFileLoc(dir, 'no-such-file.txt')).toBe(0); |
| 204 | + } finally { |
| 205 | + fs.rmSync(dir, { recursive: true, force: true }); |
| 206 | + } |
| 207 | + }); |
| 208 | +}); |
0 commit comments