Skip to content

Commit 96fe140

Browse files
cpcloudclaude
andcommitted
Treat Windows os.replace PermissionError as non-fatal cache miss
On Windows, os.replace raises PermissionError when another process holds the target open. Instead of retrying, silently drop the write — correct semantics for a cache (a missed write isn't corruption). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 4aa42e0 commit 96fe140

File tree

2 files changed

+12
-11
lines changed

2 files changed

+12
-11
lines changed

cuda_core/cuda/core/utils/_program_cache.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -575,16 +575,14 @@ def __setitem__(self, key: object, value: object) -> None:
575575
fh.write(record)
576576
fh.flush()
577577
os.fsync(fh.fileno())
578-
# On Windows, os.replace can raise PermissionError when another
579-
# process holds the target open. Retry a few times with backoff.
580-
for attempt in range(5):
581-
try:
582-
os.replace(tmp_path, target)
583-
break
584-
except PermissionError:
585-
if attempt == 4:
586-
raise
587-
time.sleep(0.01 * (2**attempt))
578+
os.replace(tmp_path, target)
579+
except PermissionError:
580+
# On Windows, os.replace raises PermissionError when another
581+
# process holds the target open. Treat as a non-fatal cache
582+
# write failure — the entry simply won't be cached this time.
583+
with contextlib.suppress(FileNotFoundError):
584+
tmp_path.unlink()
585+
return
588586
except BaseException:
589587
with contextlib.suppress(FileNotFoundError):
590588
tmp_path.unlink()

cuda_core/tests/test_program_cache_multiprocess.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,10 @@ def test_concurrent_writers_same_key_no_corruption(tmp_path):
6161
assert p.exitcode == 0, f"worker exited with {p.exitcode}"
6262

6363
with FileStreamProgramCache(root) as cache:
64-
got = cache[b"shared"] # must not raise; payload is one of the writers'
64+
# At least one writer must have succeeded; on Windows some writes
65+
# may silently fail due to PermissionError on os.replace.
66+
got = cache.get(b"shared")
67+
assert got is not None, "no writer succeeded"
6568
assert bytes(got.code).startswith(b"v")
6669

6770

0 commit comments

Comments
 (0)