Skip to content

Commit 85b79c2

Browse files
committed
fixup! feat(core.utils): clear() preserves young temp files (concurrent-writer race)
1 parent 945506e commit 85b79c2

File tree

2 files changed

+48
-15
lines changed

2 files changed

+48
-15
lines changed

cuda_core/cuda/core/utils/_program_cache.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1233,15 +1233,10 @@ def clear(self) -> None:
12331233
for path in list(self._iter_entry_paths()):
12341234
with contextlib.suppress(FileNotFoundError):
12351235
path.unlink()
1236-
# The user explicitly asked to wipe this cache, so also drop every
1237-
# temp file we can see (whether stale or in flight from this process).
1238-
# Other processes' in-flight writes will still complete to ``entries``
1239-
# via ``os.replace``, but their staging files are intentionally gone.
1240-
if self._tmp.exists():
1241-
for tmp in list(self._tmp.iterdir()):
1242-
if tmp.is_file():
1243-
with contextlib.suppress(FileNotFoundError):
1244-
tmp.unlink()
1236+
# Sweep ONLY stale temp files. Deleting a young temp would race with
1237+
# another process between ``mkstemp`` and ``os.replace`` and turn its
1238+
# write into ``FileNotFoundError`` instead of a successful commit.
1239+
self._sweep_stale_tmp_files()
12451240
# Remove empty subdirs (best-effort; concurrent writers may re-create).
12461241
if self._entries.exists():
12471242
for sub in sorted(self._entries.iterdir(), reverse=True):

cuda_core/tests/test_program_cache.py

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1411,21 +1411,59 @@ def test_filestream_cache_sweeps_stale_tmp_files_on_open(tmp_path):
14111411
assert not ancient.exists(), "ancient temp file should have been swept"
14121412

14131413

1414-
def test_filestream_cache_clear_drops_all_tmp_files(tmp_path):
1415-
"""clear() is an explicit user wipe, so it removes every temp file too
1416-
-- including young ones (other processes' in-flight writes will still
1417-
complete to entries/, just without their staging file)."""
1418-
from cuda.core.utils import FileStreamProgramCache
1414+
def test_filestream_cache_clear_preserves_young_tmp_files(tmp_path):
1415+
"""clear() must not delete young temp files: another process could be
1416+
mid-write between ``mkstemp`` and ``os.replace``, and unlinking under
1417+
it turns the writer's harmless rename into ``FileNotFoundError``.
1418+
Stale temps (older than the threshold) are still swept."""
1419+
import os as _os
1420+
1421+
from cuda.core.utils import FileStreamProgramCache, _program_cache
14191422

14201423
root = tmp_path / "fc"
14211424
with FileStreamProgramCache(root) as cache:
14221425
cache[b"k"] = _fake_object_code(b"v")
14231426
young_tmp = root / "tmp" / "entry-young"
14241427
young_tmp.write_bytes(b"in-flight")
1428+
ancient_tmp = root / "tmp" / "entry-ancient"
1429+
ancient_tmp.write_bytes(b"crashed")
1430+
ancient_mtime = time.time() - _program_cache._TMP_STALE_AGE_SECONDS - 60
1431+
_os.utime(ancient_tmp, (ancient_mtime, ancient_mtime))
14251432

14261433
with FileStreamProgramCache(root) as cache:
14271434
cache.clear()
1428-
assert not young_tmp.exists()
1435+
# Committed entry is gone, ancient orphan is gone, young temp survives.
1436+
assert list((root / "entries").rglob("*.*")) == [] # no committed entry files
1437+
assert young_tmp.exists()
1438+
assert not ancient_tmp.exists()
1439+
1440+
1441+
def test_filestream_cache_clear_does_not_break_concurrent_writer(tmp_path):
1442+
"""Simulate a writer that has already produced a temp file but has not
1443+
yet executed ``os.replace``; a concurrent ``clear()`` from another
1444+
cache instance must NOT unlink that temp, so the writer's
1445+
``os.replace`` still succeeds."""
1446+
import os as _os
1447+
1448+
from cuda.core.utils import FileStreamProgramCache
1449+
1450+
root = tmp_path / "fc"
1451+
with FileStreamProgramCache(root) as cache:
1452+
cache[b"seed"] = _fake_object_code(b"seed")
1453+
1454+
# Stage a temp file that mimics an in-flight write.
1455+
inflight_tmp = root / "tmp" / "entry-inflight"
1456+
inflight_tmp.write_bytes(b"\x80\x05fake-pickle") # contents do not matter
1457+
1458+
# Concurrent clear() from another cache handle.
1459+
with FileStreamProgramCache(root) as other:
1460+
other.clear()
1461+
1462+
# The writer can now finish: rename the staged file into entries/.
1463+
target = root / "entries" / "ab" / "cdef"
1464+
target.parent.mkdir(parents=True, exist_ok=True)
1465+
_os.replace(inflight_tmp, target)
1466+
assert target.exists()
14291467

14301468

14311469
def test_filestream_cache_size_cap_counts_tmp_files(tmp_path):

0 commit comments

Comments
 (0)