Skip to content

Commit d0094a1

Browse files
anandgupta42claude
andcommitted
wip: add telemetry instrumentation to FileTime and tighten tolerance
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent a114234 commit d0094a1

4 files changed

Lines changed: 137 additions & 16 deletions

File tree

packages/opencode/src/altimate/telemetry/index.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,34 @@ export namespace Telemetry {
481481
masked_args?: string
482482
duration_ms: number
483483
}
484+
// altimate_change start — FileTime observability: drift + assertion outcome tracking
485+
// Tracks the gap between Node.js wall-clock and filesystem mtime at read time.
486+
// Use to monitor whether the mtime clock-source change (PR #611) is preventing
487+
// false stale-file errors, and detect environments with significant drift.
488+
// KQL: customEvents | where name == "filetime_drift" | extend drift = todouble(customMeasurements.drift_ms)
489+
| {
490+
type: "filetime_drift"
491+
timestamp: number
492+
session_id: string
493+
/** Absolute difference in ms between Date.now() and filesystem mtime */
494+
drift_ms: number
495+
/** True if filesystem mtime is ahead of wall-clock (the problematic direction) */
496+
mtime_ahead: boolean
497+
}
498+
// Tracks FileTime.assert() outcomes: "stale" when a file fails the check.
499+
// High volume of "stale" with low delta_ms suggests tolerance is too tight.
500+
// KQL: customEvents | where name == "filetime_assert" | extend delta = todouble(customMeasurements.delta_ms)
501+
| {
502+
type: "filetime_assert"
503+
timestamp: number
504+
session_id: string
505+
outcome: "stale"
506+
/** mtime - readTime in ms (how far ahead the file's mtime is) */
507+
delta_ms: number
508+
/** Current tolerance threshold in ms */
509+
tolerance_ms: number
510+
}
511+
// altimate_change end
484512
// altimate_change start — sql quality telemetry for issue prevention metrics
485513
| {
486514
type: "sql_quality"

packages/opencode/src/file/time.ts

Lines changed: 83 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,48 @@ import { Instance } from "../project/instance"
22
import { Log } from "../util/log"
33
import { Flag } from "../flag/flag"
44
import { Filesystem } from "../util/filesystem"
5+
// altimate_change start — telemetry for FileTime drift measurement
6+
import { Telemetry } from "../altimate/telemetry"
7+
// altimate_change end
58

69
export namespace FileTime {
710
const log = Log.create({ service: "file.time" })
11+
12+
// altimate_change start — FileTime clock source change documentation
13+
//
14+
// CHANGE: read() now records the file's filesystem mtime instead of Date.now().
15+
//
16+
// WHY: The original code used `new Date()` (wall-clock) as the "last read" timestamp,
17+
// then assert() compared it against `Filesystem.stat(file).mtime`. On WSL (NTFS-over-9P),
18+
// networked drives, and some macOS APFS mounts, the filesystem clock drifts 400ms–1.2s
19+
// behind Node.js's clock. This caused mtime > readTime even for the file's own write,
20+
// triggering false "modified since last read" errors. One user hit 782 consecutive retries.
21+
//
22+
// FIX: Both timestamps now come from the same clock (filesystem mtime), eliminating
23+
// cross-clock skew. The tolerance is kept at 50ms (upstream default) since same-clock
24+
// comparisons don't need a larger window.
25+
//
26+
// TRADE-OFF: On coarse-resolution filesystems (HFS+ = 1s, NFS with stale cache,
27+
// Docker overlayfs after copy-up), two writes within the same resolution window
28+
// produce identical mtimes — so we'd miss a real external modification. This is a
29+
// false-negative risk (missed edit) vs the false-positive risk (retry loop) we're
30+
// fixing. Acceptable because: (a) the file gets re-read on the next attempt anyway,
31+
// (b) HFS+ is rare (macOS defaulted to APFS since 2017), and (c) the wall-clock
32+
// approach was actively causing 782-retry production loops on WSL.
33+
//
34+
// MONITORING: filetime_drift telemetry event tracks the gap between wall-clock and mtime
35+
// at read time. If drift_ms is consistently 0, this change has no effect (good). If
36+
// drift_ms shows large values, this change is preventing false positives (also good).
37+
// If file_stale errors increase post-deploy, the tolerance may need adjustment.
38+
//
39+
// UPSTREAM: sst/opencode issues #19040, #14183, #20354 track the same problem.
40+
// Upstream is pursuing processor-level recovery (PR #19099) rather than fixing the clock
41+
// source. Both approaches are complementary.
42+
//
43+
// ROLLBACK: Set OPENCODE_DISABLE_FILETIME_CHECK=true to bypass all checks, or revert
44+
// this change to restore `new Date()` behavior with a wider tolerance.
45+
// altimate_change end
46+
847
// Per-session read times plus per-file write locks.
948
// All tools that overwrite existing files should run their
1049
// assert/read/write/update sequence inside withLock(filepath, ...)
@@ -26,12 +65,32 @@ export namespace FileTime {
2665
log.info("read", { sessionID, file })
2766
const { read } = state()
2867
read[sessionID] = read[sessionID] || {}
29-
// Use the file's actual mtime instead of wall-clock time to avoid
30-
// clock drift between Node.js and the filesystem (especially on WSL,
31-
// networked drives, and macOS APFS). This eliminates the race condition
32-
// where mtime > new Date() due to filesystem clock skew.
68+
// altimate_change start — use filesystem mtime instead of wall-clock (see doc block above)
69+
const wallClock = new Date()
3370
const mtime = Filesystem.stat(file)?.mtime
34-
read[sessionID][file] = mtime ?? new Date()
71+
read[sessionID][file] = mtime ?? wallClock
72+
73+
// Track drift between wall-clock and filesystem mtime for monitoring.
74+
// This lets us measure the real-world impact of the clock source change
75+
// and detect environments where drift is significant.
76+
if (mtime) {
77+
const driftMs = Math.abs(wallClock.getTime() - mtime.getTime())
78+
if (driftMs > 10) {
79+
// Only emit when drift is non-trivial (>10ms) to avoid noise
80+
try {
81+
Telemetry.track({
82+
type: "filetime_drift",
83+
timestamp: Date.now(),
84+
session_id: sessionID,
85+
drift_ms: driftMs,
86+
mtime_ahead: mtime.getTime() > wallClock.getTime(),
87+
})
88+
} catch {
89+
// Telemetry must never break file operations
90+
}
91+
}
92+
}
93+
// altimate_change end
3594
}
3695

3796
export function get(sessionID: string, file: string) {
@@ -66,14 +125,27 @@ export namespace FileTime {
66125
const time = get(sessionID, filepath)
67126
if (!time) throw new Error(`You must read file ${filepath} before overwriting it. Use the Read tool first`)
68127
const mtime = Filesystem.stat(filepath)?.mtime
69-
// Allow a 2s tolerance for filesystem clock drift.
70-
// WSL (NTFS-over-9P) and networked drives routinely show 400ms–1.2s gaps
71-
// between Node.js Date.now() and filesystem mtime. The previous 50ms
72-
// tolerance caused massive retry loops (782 retries in one session).
73-
if (mtime && mtime.getTime() > time.getTime() + 2000) {
128+
// altimate_change start — keep upstream's 50ms tolerance (sufficient now that both
129+
// timestamps come from the same filesystem clock). Track assertion outcomes for monitoring.
130+
const toleranceMs = 50
131+
const deltaMs = mtime ? mtime.getTime() - time.getTime() : 0
132+
if (mtime && deltaMs > toleranceMs) {
133+
try {
134+
Telemetry.track({
135+
type: "filetime_assert",
136+
timestamp: Date.now(),
137+
session_id: sessionID,
138+
outcome: "stale",
139+
delta_ms: deltaMs,
140+
tolerance_ms: toleranceMs,
141+
})
142+
} catch {
143+
// Telemetry must never mask the stale-file error
144+
}
74145
throw new Error(
75-
`File ${filepath} has been modified since it was last read.\nLast modification: ${mtime.toISOString()}\nLast read: ${time.toISOString()}\n\nPlease read the file again before modifying it.`,
146+
`File ${filepath} has been modified since it was last read.\nLast modification: ${mtime.toISOString()}\nLast read: ${time.toISOString()}\nDelta: ${deltaMs}ms (tolerance: ${toleranceMs}ms)\n\nPlease read the file again before modifying it.`,
76147
)
77148
}
149+
// altimate_change end
78150
}
79151
}

packages/opencode/test/file/time.test.ts

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,9 @@ describe("file/time", () => {
112112
fn: async () => {
113113
FileTime.read(sessionID, filepath)
114114

115-
// Modify file and set mtime 5s in the future to exceed 2s tolerance
115+
// Simulate external modification by setting mtime 5s ahead of the
116+
// file's current mtime (read() stores mtime, assert() compares
117+
// against current mtime — both from filesystem clock).
116118
await fs.writeFile(filepath, "modified content", "utf-8")
117119
const future = new Date(Date.now() + 5000)
118120
await fs.utimes(filepath, future, future)
@@ -122,7 +124,7 @@ describe("file/time", () => {
122124
})
123125
})
124126

125-
test("includes timestamps in error message", async () => {
127+
test("includes timestamps and delta in error message", async () => {
126128
await using tmp = await tmpdir()
127129
const filepath = path.join(tmp.path, "file.txt")
128130
await fs.writeFile(filepath, "content", "utf-8")
@@ -132,7 +134,6 @@ describe("file/time", () => {
132134
fn: async () => {
133135
FileTime.read(sessionID, filepath)
134136
await fs.writeFile(filepath, "modified", "utf-8")
135-
// Set mtime 5 seconds in the future to exceed the 2s tolerance
136137
const future = new Date(Date.now() + 5000)
137138
await fs.utimes(filepath, future, future)
138139

@@ -145,6 +146,8 @@ describe("file/time", () => {
145146
expect(error).toBeDefined()
146147
expect(error!.message).toContain("Last modification:")
147148
expect(error!.message).toContain("Last read:")
149+
expect(error!.message).toContain("Delta:")
150+
expect(error!.message).toContain("tolerance:")
148151
},
149152
})
150153
})
@@ -347,8 +350,8 @@ describe("file/time", () => {
347350

348351
const originalStat = Filesystem.stat(filepath)
349352

353+
// Simulate external modification with mtime far ahead
350354
await fs.writeFile(filepath, "modified", "utf-8")
351-
// Set mtime 5 seconds in the future to exceed the 2s tolerance
352355
const future = new Date(Date.now() + 5000)
353356
await fs.utimes(filepath, future, future)
354357

@@ -359,5 +362,23 @@ describe("file/time", () => {
359362
},
360363
})
361364
})
365+
366+
test("read() stores filesystem mtime, not wall-clock time", async () => {
367+
await using tmp = await tmpdir()
368+
const filepath = path.join(tmp.path, "file.txt")
369+
await fs.writeFile(filepath, "content", "utf-8")
370+
371+
await Instance.provide({
372+
directory: tmp.path,
373+
fn: async () => {
374+
const statBefore = Filesystem.stat(filepath)
375+
FileTime.read(sessionID, filepath)
376+
const readTime = FileTime.get(sessionID, filepath)
377+
378+
// read() should store the file's mtime, not wall-clock
379+
expect(readTime!.getTime()).toBe(statBefore!.mtime.getTime())
380+
},
381+
})
382+
})
362383
})
363384
})

packages/opencode/test/tool/edit.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ describe("tool.edit", () => {
239239
FileTime.read(ctx.sessionID, filepath)
240240

241241
// Simulate external modification with mtime 5s in the future
242-
// to exceed the 2s tolerance in FileTime.assert()
242+
// to exceed the 50ms tolerance in FileTime.assert()
243243
await fs.writeFile(filepath, "modified externally", "utf-8")
244244
const future = new Date(Date.now() + 5000)
245245
await fs.utimes(filepath, future, future)

0 commit comments

Comments
 (0)