Skip to content

Commit 21b0b6f

Browse files
authored
revert: "fix(native): read mtime via BigInt nanoseconds (#1079)" (#1080)
* revert: "fix(native): read mtime via BigInt nanoseconds to match Rust truncation (#1079)" This reverts commit 3575880. #1079 paid a real perf cost (BigInt-mode statSync on every file in change detection) for what is in practice a no-op: at large epoch values, JS Math.floor(stat.mtimeMs) can read N+1 where Rust wrote N, busting the fast-skip optimization. But fast-skip only gates whether to re-hash the file — when it misses, the hash is recomputed from contents and matches what's stored, so the graph is byte-identical either way. The fix changes optimization hit rate, not correctness. The 0.026% miss-rate window the fix targeted does not justify the constant-cost BigInt overhead applied to 100% of stat calls on the change-detection hot path. Post-merge benchmark gate confirmed the regression: no-op rebuild went from +70% (warning) to +108% (fail). Reopens #1075. The right path is documenting that fast-skip is best-effort (the on-disk hash is the source of truth for graph correctness) rather than chasing exact integer parity with Rust. * refactor(builder): floor mtime inside fileStat helper (#1080) Encapsulate `Math.floor(stat.mtimeMs)` inside the `fileStat` helper so every consumer of the integer DB column gets a pre-floored value by default. Eliminates the risk that a future call site reads `stat.mtimeMs` and stores it un-floored, which would silently write a non-integer (or rounded-up integer) into the DB and cause spurious fast-skip misses on the next build. All six existing call sites simplified from `Math.floor(stat.mtimeMs)` to `stat.mtime`. Behaviour unchanged. Addresses Greptile P2 feedback on the revert PR.
1 parent 3575880 commit 21b0b6f

3 files changed

Lines changed: 20 additions & 75 deletions

File tree

src/domain/graph/builder/helpers.ts

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -222,18 +222,17 @@ export function fileHash(content: string): string {
222222
}
223223

224224
/**
225-
* Stat a file, returning integer-truncated mtime in ms (and size).
225+
* Stat a file, returning { mtime, size } or null on error.
226226
*
227-
* Reads via BigInt nanoseconds and truncates with integer math so the value
228-
* matches Rust's `Duration::as_millis() as i64` exactly. `Math.floor(stat.mtimeMs)`
229-
* cannot be substituted: at large epoch values the f64 `mtimeMs` rounds, so a
230-
* Rust-written `file_hashes.mtime` of N can read back as N+1 in JS and bust the
231-
* fast-skip path on every native→JS handoff.
227+
* `mtime` is `Math.floor(stat.mtimeMs)` so it matches the integer column
228+
* stored in the DB. Floor-once-here keeps every consumer honest: storing or
229+
* comparing a non-floored `mtimeMs` against the integer DB column would cause
230+
* spurious fast-skip misses on the next build.
232231
*/
233232
export function fileStat(filePath: string): { mtime: number; size: number } | null {
234233
try {
235-
const s = fs.statSync(filePath, { bigint: true });
236-
return { mtime: Number(s.mtimeNs / 1_000_000n), size: Number(s.size) };
234+
const s = fs.statSync(filePath);
235+
return { mtime: Math.floor(s.mtimeMs), size: s.size };
237236
} catch {
238237
return null;
239238
}

src/domain/graph/builder/stages/insert-nodes.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ export function buildFileHashes(
152152

153153
// Also include metadata-only updates (self-heal mtime/size without re-parse)
154154
for (const item of metadataUpdates) {
155-
const mtime = item.stat ? Math.floor(item.stat.mtime) : 0;
155+
const mtime = item.stat ? item.stat.mtime : 0;
156156
const size = item.stat ? item.stat.size : 0;
157157
fileHashes.push({ file: item.relPath, hash: item.hash, mtime, size });
158158
}
@@ -389,7 +389,7 @@ function updateFileHashes(
389389

390390
// Also update metadata-only entries (self-heal mtime/size without re-parse)
391391
for (const item of metadataUpdates) {
392-
const mtime = item.stat ? Math.floor(item.stat.mtime) : 0;
392+
const mtime = item.stat ? item.stat.mtime : 0;
393393
const size = item.stat ? item.stat.size : 0;
394394
upsertHash.run(item.relPath, item.hash, mtime, size);
395395
}

tests/builder/detect-changes.test.ts

Lines changed: 11 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,9 @@
44
import fs from 'node:fs';
55
import os from 'node:os';
66
import path from 'node:path';
7-
import { afterAll, afterEach, beforeAll, describe, expect, it, vi } from 'vitest';
7+
import { afterAll, beforeAll, describe, expect, it } from 'vitest';
88
import { closeDb, initSchema, openDb } from '../../src/db/index.js';
99
import { PipelineContext } from '../../src/domain/graph/builder/context.js';
10-
import { fileStat } from '../../src/domain/graph/builder/helpers.js';
1110
import {
1211
detectChanges,
1312
detectNoChanges,
@@ -63,12 +62,12 @@ describe('detectChanges stage', () => {
6362
const content = fs.readFileSync(path.join(dir, 'a.js'), 'utf-8');
6463
const { createHash } = await import('node:crypto');
6564
const hash = createHash('md5').update(content).digest('hex');
66-
const stat = fs.statSync(path.join(dir, 'a.js'), { bigint: true });
65+
const stat = fs.statSync(path.join(dir, 'a.js'));
6766
db.prepare('INSERT INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)').run(
6867
'a.js',
6968
hash,
70-
Number(stat.mtimeNs / 1_000_000n),
71-
Number(stat.size),
69+
Math.floor(stat.mtimeMs),
70+
stat.size,
7271
);
7372

7473
// Write journal header so journal check doesn't confuse things
@@ -159,16 +158,15 @@ describe('detectNoChanges fast-skip', () => {
159158
relPath: string,
160159
filePath: string,
161160
): { mtime: number; size: number } {
162-
const stat = fs.statSync(filePath, { bigint: true });
163-
const mtime = Number(stat.mtimeNs / 1_000_000n);
164-
const size = Number(stat.size);
161+
const stat = fs.statSync(filePath);
162+
const mtime = Math.floor(stat.mtimeMs);
165163
db.prepare('INSERT INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)').run(
166164
relPath,
167165
'deadbeef',
168166
mtime,
169-
size,
167+
stat.size,
170168
);
171-
return { mtime, size };
169+
return { mtime, size: stat.size };
172170
}
173171

174172
it('returns false when file_hashes is empty (first build)', () => {
@@ -223,12 +221,12 @@ describe('detectNoChanges fast-skip', () => {
223221
const db = openDb(path.join(dbDir, 'graph.db'));
224222
initSchema(db);
225223
const file = seedFile(dir, 'a.js', 'export const a = 1;');
226-
const stat = fs.statSync(file, { bigint: true });
224+
const stat = fs.statSync(file);
227225
db.prepare('INSERT INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)').run(
228226
'a.js',
229227
'deadbeef',
230-
Number(stat.mtimeNs / 1_000_000n) + 1000, // skewed mtime
231-
Number(stat.size),
228+
Math.floor(stat.mtimeMs) + 1000, // skewed mtime
229+
stat.size,
232230
);
233231

234232
expect(detectNoChanges(db, [file], dir)).toBe(false);
@@ -278,56 +276,4 @@ describe('detectNoChanges fast-skip', () => {
278276
closeDb(db);
279277
fs.rmSync(dir, { recursive: true, force: true });
280278
});
281-
282-
// Pins down the BigInt-nanosecond truncation the helper uses to match Rust's
283-
// `Duration::as_millis() as i64`. We can't trigger the f64 ULP rounding bug
284-
// with a freshly-created file (the failure window is ~256 ns out of every ms,
285-
// ~0.026% of values), so instead we stub `fs.statSync` to return a hand-picked
286-
// BigInt `mtimeNs` whose f64-mtimeMs path diverges from the BigInt path:
287-
// ns = 1748400000000999808n (≈ 2025-05-28 epoch ns)
288-
// BigInt: Number(ns / 1_000_000n) === 1748400000000
289-
// f64 (broken): Math.floor(Number(ns) / 1e6) === 1748400000001
290-
// Reverting `fileStat` to `Math.floor(stat.mtimeMs)` would flip the result to
291-
// N+1 and fail the assertion deterministically — re-introducing #1075 (the
292-
// Rust-written `file_hashes.mtime` of N reading back as N+1 in JS, busting
293-
// the fast-skip path on every native→JS handoff).
294-
describe('fileStat #1075 mtime truncation', () => {
295-
afterEach(() => {
296-
vi.restoreAllMocks();
297-
});
298-
299-
it('matches Rust Duration::as_millis() truncation at f64-rounding boundary', () => {
300-
// Hand-picked epoch ns where Number(ns)/1e6 rounds up across a ms boundary.
301-
const badMtimeNs = 1748400000000999808n;
302-
const truncatedMs = 1748400000000;
303-
const roundedMs = 1748400000001;
304-
305-
// Sanity: confirm the chosen value actually triggers the divergence; if a
306-
// future Node.js release changes f64 rounding, this baseline assertion
307-
// catches it before we trust the spy-based test below.
308-
expect(Number(badMtimeNs / 1_000_000n)).toBe(truncatedMs);
309-
expect(Math.floor(Number(badMtimeNs) / 1e6)).toBe(roundedMs);
310-
311-
const stubStats = {
312-
mtimeNs: badMtimeNs,
313-
mtimeMs: Number(badMtimeNs) / 1e6,
314-
size: 42n,
315-
} as unknown as fs.BigIntStats;
316-
vi.spyOn(fs, 'statSync').mockReturnValue(stubStats);
317-
318-
// BigInt path must win: N, not N+1.
319-
expect(fileStat('/fake/path.js')?.mtime).toBe(truncatedMs);
320-
});
321-
322-
it('returns the BigInt-truncated mtime for a real file on disk', () => {
323-
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-fileStat-trunc-'));
324-
const file = seedFile(dir, 'a.js', 'export const a = 1;');
325-
326-
const big = fs.statSync(file, { bigint: true });
327-
const expected = Number(big.mtimeNs / 1_000_000n);
328-
expect(fileStat(file)?.mtime).toBe(expected);
329-
330-
fs.rmSync(dir, { recursive: true, force: true });
331-
});
332-
});
333279
});

0 commit comments

Comments
 (0)