Skip to content

Commit 5a0f9df

Browse files
committed
refactor(core.utils): drop on-disk schema-version mechanism
This is a pre-release branch -- nobody has caches on disk that need to survive a version bump, and the wipe-on-mismatch path is where both recent roborev findings have come from. Drop the entire schema-file mechanism: no _FILESTREAM_BACKEND_SCHEMA, no _FILESTREAM_SCHEMA_VERSION, no SCHEMA_VERSION file, no schema-mismatch wipe in __init__. ``_KEY_SCHEMA_VERSION`` stays (still mixed into every cache key's hash). A bump still invalidates old entries -- their hash is now different, so lookups under the same logical key miss and route to a new on-disk path. Old entries become orphans, harmless, reaped on the next eviction pass against the size cap. If a real format-incompatible change ever lands post-release, that PR can introduce a fresh, scoped versioning mechanism designed against the real backwards-compat scenario instead of speculation. Tests: removed wipe-on-mismatch and locked-marker tests (no longer applicable). Replaced the key-schema-bump test with one that asserts the new orphan-harmlessly semantic.
1 parent 7533b87 commit 5a0f9df

2 files changed

Lines changed: 31 additions & 124 deletions

File tree

cuda_core/cuda/core/utils/_program_cache.py

Lines changed: 15 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -997,19 +997,8 @@ def _evict_to_caps(self) -> None:
997997
# ---------------------------------------------------------------------------
998998

999999

1000-
# Composite of (on-disk-format version, key schema version): a bump in either
1001-
# one forces wipe-on-open. ``_KEY_SCHEMA_VERSION`` participates so that
1002-
# changes to the cache-key encoding can never leave orphaned entries on
1003-
# disk -- the new key would never collide with the old hash, but the file
1004-
# would otherwise sit forever counting against the size cap.
1005-
#
1006-
# Bumped from 2 -> 3 when entries switched from pickled records to raw
1007-
# binary; old caches are auto-wiped on first open by the new code.
1008-
_FILESTREAM_BACKEND_SCHEMA = 3
1009-
_FILESTREAM_SCHEMA_VERSION = f"{_FILESTREAM_BACKEND_SCHEMA}.{_KEY_SCHEMA_VERSION}"
10101000
_ENTRIES_SUBDIR = "entries"
10111001
_TMP_SUBDIR = "tmp"
1012-
_SCHEMA_FILE = "SCHEMA_VERSION"
10131002
# Temp files older than this are assumed to belong to a crashed writer and
10141003
# are eligible for cleanup. Picked large enough that no real ``os.replace``
10151004
# write should still be in flight (writes are bounded by mkstemp + write +
@@ -1317,23 +1306,21 @@ class FileStreamProgramCache(ProgramCacheResource):
13171306
13181307
.. note:: **Cross-version sharing.**
13191308
1320-
``_FILESTREAM_SCHEMA_VERSION`` encodes both the on-disk storage
1321-
format and the key-schema version, so a cache written by an
1322-
incompatible version is wiped on open (bumping either
1323-
``_KEY_SCHEMA_VERSION`` or ``_FILESTREAM_BACKEND_SCHEMA`` forces
1324-
cleanup instead of leaving orphaned entries on disk).
1325-
1326-
Within a single schema version the cache is safe to share across
1327-
``cuda.core`` patch releases: every entry's key encodes the
1328-
relevant backend/compiler/runtime fingerprints for its compilation
1329-
path (NVRTC entries pin the NVRTC version, NVVM entries pin the
1330-
libNVVM library and IR versions, PTX/linker entries pin the chosen
1331-
linker backend and its version -- and, when the cuLink/driver
1332-
backend is selected, the driver version too; nvJitLink-backed PTX
1333-
entries are deliberately driver-version independent). Entries are
1334-
stored verbatim as the compiled binary, so cross-patch sharing
1335-
only requires that the compiler-pinning surface above stays stable
1336-
-- there is no Python-pickle compatibility involved.
1309+
The cache is safe to share across ``cuda.core`` patch releases:
1310+
every entry's key encodes the relevant backend/compiler/runtime
1311+
fingerprints for its compilation path (NVRTC entries pin the
1312+
NVRTC version, NVVM entries pin the libNVVM library and IR
1313+
versions, PTX/linker entries pin the chosen linker backend and
1314+
its version -- and, when the cuLink/driver backend is selected,
1315+
the driver version too; nvJitLink-backed PTX entries are
1316+
deliberately driver-version independent). Bumping
1317+
``_KEY_SCHEMA_VERSION`` (mixed into every key's hash) produces
1318+
new keys that don't collide with old entries, so old entries
1319+
become invisible to lookups and are reaped on the next size-cap
1320+
eviction pass. Entries are stored verbatim as the compiled
1321+
binary, so cross-patch sharing only requires that the
1322+
compiler-pinning surface above stays stable -- there is no
1323+
Python-pickle compatibility involved.
13371324
13381325
Parameters
13391326
----------
@@ -1361,37 +1348,10 @@ def __init__(
13611348
self._root = Path(path) if path is not None else _default_cache_dir()
13621349
self._entries = self._root / _ENTRIES_SUBDIR
13631350
self._tmp = self._root / _TMP_SUBDIR
1364-
self._schema_path = self._root / _SCHEMA_FILE
13651351
self._max_size_bytes = max_size_bytes
13661352
self._root.mkdir(parents=True, exist_ok=True)
13671353
self._entries.mkdir(exist_ok=True)
13681354
self._tmp.mkdir(exist_ok=True)
1369-
expected = str(_FILESTREAM_SCHEMA_VERSION)
1370-
if not self._schema_path.exists():
1371-
self._schema_path.write_text(expected)
1372-
else:
1373-
existing = self._schema_path.read_text().strip()
1374-
if existing != expected:
1375-
# Schema mismatch: wipe incompatible entries. Losing cache
1376-
# contents is safe; returning bytes from an old format is not.
1377-
# Tolerate Windows sharing violations -- another process may
1378-
# be reading an old entry; in that case leave the OLD schema
1379-
# marker so the next open re-runs the wipe. Advancing the
1380-
# marker before every old entry is gone would cause future
1381-
# opens to skip the wipe and let lookups return bytes from
1382-
# the incompatible format.
1383-
wipe_complete = True
1384-
for entry in list(self._iter_entry_paths()):
1385-
try:
1386-
_unlink_with_sharing_retry(entry)
1387-
except FileNotFoundError:
1388-
pass
1389-
except PermissionError as exc:
1390-
if not _is_windows_sharing_violation(exc):
1391-
raise
1392-
wipe_complete = False
1393-
if wipe_complete:
1394-
self._schema_path.write_text(expected)
13951355
# Opportunistic startup sweep of orphaned temp files left by any
13961356
# crashed writers. Age-based so concurrent in-flight writes from
13971357
# other processes are preserved.

cuda_core/tests/test_program_cache.py

Lines changed: 16 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -837,43 +837,6 @@ def _denied(self, *args, **kwargs):
837837
cache[b"c"] = b"c" * 100
838838

839839

840-
def test_filestream_cache_schema_mismatch_keeps_old_marker_when_locked(tmp_path, monkeypatch):
841-
"""If a schema-mismatch wipe fails to remove an entry due to a Windows
842-
sharing violation, the old schema marker must NOT be advanced --
843-
otherwise a future open skips the wipe and a future lookup may return
844-
bytes from the incompatible old format."""
845-
from pathlib import Path
846-
847-
from cuda.core.utils import FileStreamProgramCache, _program_cache
848-
849-
root = tmp_path / "fc"
850-
# Seed a cache with one entry under the current schema.
851-
with FileStreamProgramCache(root) as cache:
852-
cache[b"k"] = b"v"
853-
854-
# Pretend the on-disk schema is from an old version.
855-
schema_path = root / "SCHEMA_VERSION"
856-
schema_path.write_text("0.0")
857-
expected_marker = schema_path.read_text()
858-
assert expected_marker == "0.0"
859-
860-
# Make every unlink raise a Windows sharing violation. Re-opening should
861-
# attempt the wipe, fail to remove the locked entry, and LEAVE the old
862-
# schema marker so a later open re-runs the wipe.
863-
monkeypatch.setattr(_program_cache, "_IS_WINDOWS", True)
864-
865-
def _always_locked(self, *args, **kwargs):
866-
exc = PermissionError("sharing violation")
867-
exc.winerror = 32
868-
raise exc
869-
870-
monkeypatch.setattr(Path, "unlink", _always_locked)
871-
872-
# The wipe is best-effort but must not silently advance the marker.
873-
FileStreamProgramCache(root)
874-
assert schema_path.read_text() == "0.0", "schema marker advanced despite locked entries surviving the wipe"
875-
876-
877840
@pytest.mark.parametrize(
878841
"option_kw",
879842
[
@@ -1498,7 +1461,7 @@ def test_filestream_cache_uses_default_dir_when_path_omitted(tmp_path, monkeypat
14981461
with FileStreamProgramCache() as cache:
14991462
cache[b"k"] = b"hello"
15001463
assert cache[b"k"] == b"hello"
1501-
assert (tmp_path / "default-fc" / "SCHEMA_VERSION").is_file()
1464+
assert (tmp_path / "default-fc" / "entries").is_dir()
15021465

15031466

15041467
def test_filestream_cache_sweeps_stale_tmp_files_on_open(tmp_path):
@@ -1786,47 +1749,31 @@ def test_filestream_cache_unbounded_by_default(tmp_path):
17861749
assert len(cache) == 20
17871750

17881751

1789-
def test_filestream_cache_wipes_on_schema_mismatch(tmp_path):
1790-
"""A cache written with an older schema must be wiped on open, not
1791-
silently mixed with a newer format."""
1792-
from cuda.core.utils import FileStreamProgramCache
1793-
1794-
root = tmp_path / "fc"
1795-
with FileStreamProgramCache(root) as cache:
1796-
cache[b"k"] = _fake_object_code(b"old-payload")
1797-
# Simulate an older schema by rewriting the version marker.
1798-
(root / "SCHEMA_VERSION").write_text("0")
1799-
1800-
with FileStreamProgramCache(root) as cache:
1801-
assert len(cache) == 0
1802-
assert b"k" not in cache
1803-
# Marker should be back at the current version.
1804-
assert (root / "SCHEMA_VERSION").read_text().strip() != "0"
1805-
1806-
1807-
def test_filestream_cache_schema_version_encodes_key_schema(tmp_path, monkeypatch):
1808-
"""As with the SQLite backend, bumping ``_KEY_SCHEMA_VERSION`` alone
1809-
must invalidate the on-disk cache so orphaned entries from the old
1810-
key-hash format do not linger after an upgrade."""
1752+
def test_filestream_cache_key_schema_bump_orphans_old_entries(tmp_path, monkeypatch):
1753+
"""``_KEY_SCHEMA_VERSION`` is mixed into every key's hash. Bumping it
1754+
produces new keys that don't collide with old entries on disk -- so
1755+
after the bump, lookups under the same logical key miss (the new
1756+
hash points to a different path), and the old file is harmless until
1757+
eviction reaps it."""
18111758
from cuda.core.utils import FileStreamProgramCache, _program_cache
18121759

18131760
root = tmp_path / "fc"
18141761
with FileStreamProgramCache(root) as cache:
18151762
cache[b"k"] = _fake_object_code(b"old-payload")
1816-
path = cache._path_for_key(b"k")
1817-
assert path.exists()
1763+
old_path = cache._path_for_key(b"k")
1764+
assert old_path.exists()
18181765

18191766
monkeypatch.setattr(_program_cache, "_KEY_SCHEMA_VERSION", _program_cache._KEY_SCHEMA_VERSION + 1)
1820-
monkeypatch.setattr(
1821-
_program_cache,
1822-
"_FILESTREAM_SCHEMA_VERSION",
1823-
f"{_program_cache._FILESTREAM_BACKEND_SCHEMA}.{_program_cache._KEY_SCHEMA_VERSION}",
1824-
)
18251767

18261768
with FileStreamProgramCache(root) as cache:
1827-
assert len(cache) == 0
1769+
# Same logical key, but the schema-version bump changes the hash,
1770+
# so the lookup hits a different on-disk path and misses.
18281771
assert b"k" not in cache
1829-
assert not path.exists()
1772+
new_path = cache._path_for_key(b"k")
1773+
assert new_path != old_path
1774+
# Old entry is still on disk (not wiped); will be reaped on next
1775+
# eviction pass once the cap is hit.
1776+
assert old_path.exists()
18301777

18311778

18321779
# ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)