3434//
3535// Test seam: the public API is on a single exported object so tests can
3636// `mock.method(atomicWriter, '_writeFile', ...)` to inject failures.
37- import { writeFileSync , renameSync , statSync , mkdirSync , existsSync , unlinkSync , } from 'node:fs' ;
38- import { dirname } from 'node:path' ;
37+ import { writeFileSync , renameSync , statSync , mkdirSync , existsSync , unlinkSync , readdirSync , } from 'node:fs' ;
38+ import { dirname , basename } from 'node:path' ;
3939// Multi-LLM review of PR #109 findings 1+2: `finalMtimeMs = _stat(yaml)`
4040// breaks the safety invariant in two scenarios — (a) slow writes where
4141// the actual YAML mtime exceeds `projectedMtimeMs` and step 5 happens
@@ -56,6 +56,23 @@ import { dirname } from 'node:path';
5656 * 1 second satisfies both with a safe margin.
5757 */
5858export const FUTURE_MTIME_BUFFER_MS = 1_000 ;
59+ /**
60+ * GH #111: orphan .tmp files older than this threshold are eligible for
61+ * cleanup. Any process that's been alive for 5 minutes hasn't crashed
62+ * mid-pairWrite — the orphan must be from a prior crashed run. Concurrent
63+ * pairWrite calls don't collide because each call uses a unique stamp,
64+ * but a stale orphan from a crashed process would otherwise stick around
65+ * forever. 5 minutes is conservative (allows long fsync queues / CI
66+ * antivirus stalls).
67+ */
68+ export const ORPHAN_MAX_AGE_MS = 5 * 60 * 1_000 ;
69+ /** Generate a unique tmp-file stamp per pairWrite call. Crash-resistant
70+ * and concurrent-safe — two pairWrites for the same action path won't
71+ * collide because each owns its own tmp namespace. */
72+ function generateTmpStamp ( ) {
73+ const rand = Math . random ( ) . toString ( 36 ) . slice ( 2 , 10 ) ;
74+ return `${ process . pid } .${ Date . now ( ) . toString ( 36 ) } .${ rand } ` ;
75+ }
5976/**
6077 * Atomic write of a (YAML, sidecar) pair using sidecar-first ordering.
6178 * Returns the resolved paths plus the final on-disk mtime. Throws if any
@@ -76,8 +93,13 @@ export const FUTURE_MTIME_BUFFER_MS = 1_000;
7693function pairWriteImpl ( yamlPath , yamlContent , sidecarPath , state ) {
7794 ensureDir ( yamlPath ) ;
7895 ensureDir ( sidecarPath ) ;
79- const yamlTmp = `${ yamlPath } .tmp` ;
80- const sidecarTmp = `${ sidecarPath } .tmp` ;
96+ // GH #111: unique stamp per call so two concurrent pairWrites against
97+ // the same action id never share a tmp namespace. Without this, B's
98+ // cleanupOrphans could unlink A's in-flight .tmp file and produce an
99+ // opaque ENOENT during A's rename.
100+ const stamp = generateTmpStamp ( ) ;
101+ const yamlTmp = `${ yamlPath } .tmp.${ stamp } ` ;
102+ const sidecarTmp = `${ sidecarPath } .tmp.${ stamp } ` ;
81103 // Step 1+2: sidecar with projected future mtime, atomic rename.
82104 const projectedMtimeMs = Date . now ( ) + FUTURE_MTIME_BUFFER_MS ;
83105 const projectedState = {
@@ -123,21 +145,41 @@ function ensureDir(filePath) {
123145 atomicWriter . _mkdir ( dir ) ;
124146}
125147/**
126- * Best-effort cleanup of orphaned `.tmp` files left by a crashed previous
127- * call. Called by `pairWrite` before each operation. Idempotent.
148+ * Best-effort cleanup of orphaned `.tmp.<stamp>` files left by a crashed
149+ * previous call (GH #111). Called by `pairWrite` before each operation.
150+ * Idempotent.
151+ *
152+ * Only removes `.tmp.<stamp>` files matching the target path's prefix
153+ * AND older than ORPHAN_MAX_AGE_MS — concurrent writers' fresh tmp files
154+ * are untouched. A crashed process's stale tmp file becomes eligible
155+ * after 5 minutes, well past any plausible pairWrite duration.
128156 *
129- * Routes through `atomicWriter._unlink` / `_exists` so PR #109 review
130- * finding (E) — "tests can't simulate mkdir/unlink failures" — is
131- * resolved: the seam is now complete across all fs operations the
132- * writer performs.
157+ * Routes through `atomicWriter._readdir` / `_statMtimeMs` / `_unlink`
158+ * so tests can mock cleanup behavior deterministically.
133159 */
134160function cleanupOrphans ( yamlPath , sidecarPath ) {
135- for ( const orphan of [ `${ yamlPath } .tmp` , `${ sidecarPath } .tmp` ] ) {
136- if ( atomicWriter . _exists ( orphan ) ) {
161+ const now = Date . now ( ) ;
162+ for ( const targetPath of [ yamlPath , sidecarPath ] ) {
163+ const dir = dirname ( targetPath ) ;
164+ const prefix = `${ basename ( targetPath ) } .tmp.` ;
165+ let entries ;
166+ try {
167+ entries = atomicWriter . _readdir ( dir ) ;
168+ }
169+ catch {
170+ continue ; // dir doesn't exist yet — no orphans possible
171+ }
172+ for ( const entry of entries ) {
173+ if ( ! entry . startsWith ( prefix ) )
174+ continue ;
175+ const orphanPath = `${ dir } /${ entry } ` ;
137176 try {
138- atomicWriter . _unlink ( orphan ) ;
177+ const mtimeMs = atomicWriter . _statMtimeMs ( orphanPath ) ;
178+ if ( now - mtimeMs < ORPHAN_MAX_AGE_MS )
179+ continue ; // fresh — likely a concurrent writer's
180+ atomicWriter . _unlink ( orphanPath ) ;
139181 }
140- catch { /* ignore */ }
182+ catch { /* best-effort */ }
141183 }
142184 }
143185}
@@ -172,6 +214,10 @@ export const atomicWriter = {
172214 _unlink ( path ) {
173215 unlinkSync ( path ) ;
174216 } ,
217+ /** Underlying `fs.readdirSync(path)`. Used by GH #111 prefix-scan cleanup. */
218+ _readdir ( path ) {
219+ return readdirSync ( path ) ;
220+ } ,
175221 /**
176222 * Atomic pair-write. Cleans up any orphaned `.tmp` files before
177223 * starting. Throws on the first failed step — caller decides whether
0 commit comments