Skip to content

Commit 9b8d2cf

Browse files
committed
remove __contains__ from ProgramCacheResource and both backends
The ``if key in cache: data = cache[key]`` idiom is racy across processes (an evictor can unlink the entry between the membership check and the read), and exposing ``__contains__`` invites that pattern. ``cache.get(key)`` answers both questions in a single filesystem-level operation, so a successful return always carries the bytes. Drop the abstract method from the ABC and the implementations from ``InMemoryProgramCache`` and ``FileStreamProgramCache``; rewrite the test suite from ``b"k" in cache`` / ``b"k" not in cache`` to ``cache.get(b"k") is [not] None``. Class docstring now states the contract explicitly: there is no ``__contains__`` and ``get`` is the recommended lookup. The ``__contains__``-specific LRU/atime tests are deleted; the ``__getitem__`` LRU tests already exercise the relevant recency-update paths.
1 parent 760f530 commit 9b8d2cf

3 files changed

Lines changed: 46 additions & 116 deletions

File tree

cuda_core/cuda/core/utils/_program_cache.py

Lines changed: 12 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -139,14 +139,14 @@ class ProgramCacheResource(abc.ABC):
139139
.. note:: **Concurrent-access idiom.**
140140
141141
Use :meth:`get` (or ``data = cache[key]`` inside a ``try /
142-
except KeyError``) rather than the two-call pattern ``if key
143-
in cache: data = cache[key]``. The two-call pattern is racy
144-
across processes: another writer can ``os.replace`` over the
145-
entry, or eviction can unlink it, between the membership
146-
check and the read. ``get`` answers both questions in a
147-
single filesystem-level operation, so a successful return
148-
always carries the bytes. ``__contains__`` is provided for
149-
diagnostics and tests, not for guarding reads.
142+
except KeyError``) for lookups. There is intentionally no
143+
``__contains__``: the obvious ``if key in cache: data =
144+
cache[key]`` idiom is racy across processes (another writer
145+
can ``os.replace`` over the entry, or eviction can unlink
146+
it, between the check and the read), and exposing
147+
``__contains__`` invites that pattern. ``get`` answers
148+
both questions in one filesystem-level operation, so a
149+
successful return always carries the bytes.
150150
"""
151151

152152
@abc.abstractmethod
@@ -167,10 +167,6 @@ def __setitem__(self, key: bytes | str, value: bytes | bytearray | memoryview |
167167
write time so the cached entry holds the bytes, not a path.
168168
"""
169169

170-
@abc.abstractmethod
171-
def __contains__(self, key: bytes | str) -> bool:
172-
"""Return ``True`` if ``key`` is in the cache."""
173-
174170
@abc.abstractmethod
175171
def __delitem__(self, key: bytes | str) -> None:
176172
"""Remove the entry associated with ``key``.
@@ -936,9 +932,10 @@ class InMemoryProgramCache(ProgramCacheResource):
936932
937933
Notes
938934
-----
939-
Recency is updated on :meth:`__getitem__`; :meth:`__contains__` is
940-
read-only and does not shift LRU order, matching
941-
:class:`FileStreamProgramCache`.
935+
Recency is updated on :meth:`__getitem__`; ``get`` is the
936+
recommended lookup since the cache deliberately omits
937+
``__contains__`` (the ``if key in cache: ...`` idiom is racy
938+
across processes; see :class:`ProgramCacheResource`).
942939
943940
Thread safety: a :class:`threading.RLock` serialises every method,
944941
so the cache can be shared across threads without external
@@ -986,14 +983,6 @@ def __setitem__(self, key: object, value: bytes | bytearray | memoryview | Objec
986983
self._total_bytes += size
987984
self._evict_to_caps()
988985

989-
def __contains__(self, key: object) -> bool:
990-
# Validate the key (mirror FileStream's behaviour: a non-str,
991-
# non-bytes key is a programming error and should surface, not
992-
# quietly report "not present").
993-
k = _as_key_bytes(key)
994-
with self._lock:
995-
return k in self._entries
996-
997986
def __delitem__(self, key: object) -> None:
998987
k = _as_key_bytes(key)
999988
with self._lock:
@@ -1418,24 +1407,6 @@ def _path_for_key(self, key: object) -> Path:
14181407

14191408
# -- mapping API ---------------------------------------------------------
14201409

1421-
def __contains__(self, key: object) -> bool:
1422-
# Membership is a stat-only check: it must not read the payload (a
1423-
# full file read just to answer ``key in cache`` is wasteful) and
1424-
# must not bump atime (otherwise probing keeps cold entries hot
1425-
# and skews LRU eviction).
1426-
#
1427-
# Like every cross-process file lookup, ``key in cache`` followed
1428-
# by ``cache[key]`` is inherently racy: the entry may be evicted
1429-
# by another process between the two calls. The previous version
1430-
# of this method routed through ``__getitem__`` and did a full
1431-
# read; that did not close the race (the file could vanish between
1432-
# the read and the next caller's read), it just paid the cost
1433-
# twice. The cache layer does no payload validation -- entries
1434-
# are raw binary, not framed records -- so a successful ``exists``
1435-
# check is the same answer ``__getitem__`` would give for a
1436-
# non-racing caller.
1437-
return self._path_for_key(key).exists()
1438-
14391410
def __getitem__(self, key: object) -> bytes:
14401411
path = self._path_for_key(key)
14411412
try:

cuda_core/tests/test_program_cache.py

Lines changed: 33 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -763,7 +763,7 @@ def _flaky_unlink(self, *args, **kwargs):
763763
monkeypatch.setattr(Path, "unlink", _flaky_unlink)
764764
del cache[b"k"] # succeeds on third attempt
765765
assert calls["n"] == 3
766-
assert b"k" not in cache
766+
assert cache.get(b"k") is None
767767

768768

769769
def test_filestream_cache_unlink_propagates_non_windows_permission_error(tmp_path, monkeypatch):
@@ -1223,7 +1223,7 @@ def test_filestream_cache_empty_on_create(tmp_path):
12231223

12241224
with FileStreamProgramCache(tmp_path / "fc") as cache:
12251225
assert len(cache) == 0
1226-
assert b"nope" not in cache
1226+
assert cache.get(b"nope") is None
12271227
with pytest.raises(KeyError):
12281228
cache[b"nope"]
12291229

@@ -1236,7 +1236,7 @@ def test_filestream_cache_roundtrip(tmp_path):
12361236

12371237
with FileStreamProgramCache(tmp_path / "fc") as cache:
12381238
cache[b"k1"] = _fake_object_code(b"v1", name="x")
1239-
assert b"k1" in cache
1239+
assert cache.get(b"k1") is not None
12401240
assert cache[b"k1"] == b"v1"
12411241

12421242

@@ -1246,7 +1246,7 @@ def test_filestream_cache_delete(tmp_path):
12461246
with FileStreamProgramCache(tmp_path / "fc") as cache:
12471247
cache[b"k"] = _fake_object_code()
12481248
del cache[b"k"]
1249-
assert b"k" not in cache
1249+
assert cache.get(b"k") is None
12501250
with pytest.raises(KeyError):
12511251
del cache[b"k"]
12521252

@@ -1281,31 +1281,6 @@ def test_filestream_cache_persists_across_reopen(tmp_path):
12811281
assert cache[b"k"] == b"persisted"
12821282

12831283

1284-
def test_filestream_cache_contains_does_not_read_or_promote_lru(tmp_path):
1285-
"""``__contains__`` is a stat-only check: it must not read the payload
1286-
(a full file read just to answer membership is wasteful) and must not
1287-
bump atime (otherwise probing keeps cold entries hot and inverts LRU
1288-
eviction relative to genuine reads)."""
1289-
from cuda.core.utils import FileStreamProgramCache
1290-
1291-
cap = 250
1292-
with FileStreamProgramCache(tmp_path / "fc", max_size_bytes=cap) as cache:
1293-
cache[b"a"] = b"a" * 100
1294-
cache[b"b"] = b"b" * 100
1295-
1296-
# Hammer membership for 'a' (cold) -- must NOT promote it.
1297-
for _ in range(50):
1298-
assert b"a" in cache
1299-
1300-
# The next size-cap-triggering write should evict 'a' (the genuinely
1301-
# oldest by atime). If membership had bumped atime, 'b' would evict
1302-
# instead.
1303-
cache[b"c"] = b"c" * 100
1304-
assert b"a" not in cache
1305-
assert b"b" in cache
1306-
assert b"c" in cache
1307-
1308-
13091284
def test_filestream_cache_permission_error_propagates_on_posix(tmp_path, monkeypatch):
13101285
"""On non-Windows, PermissionError from os.replace is a real config error
13111286
and must not be silently swallowed."""
@@ -1376,7 +1351,7 @@ def _denied(src, dst):
13761351
cache[b"k"] = _fake_object_code(b"v")
13771352
else:
13781353
cache[b"k"] = _fake_object_code(b"v") # swallowed
1379-
assert b"k" not in cache
1354+
assert cache.get(b"k") is None
13801355

13811356

13821357
def test_filestream_cache_atomic_no_half_written_file(tmp_path, monkeypatch):
@@ -1394,7 +1369,7 @@ def _boom(src, dst):
13941369
with pytest.raises(RuntimeError, match="crash"):
13951370
cache[b"k"] = _fake_object_code(b"v")
13961371
monkeypatch.undo()
1397-
assert b"k" not in cache
1372+
assert cache.get(b"k") is None
13981373

13991374

14001375
def test_filestream_cache_prune_only_if_stat_unchanged(tmp_path):
@@ -1803,8 +1778,8 @@ def test_filestream_cache_size_cap_counts_tmp_files(tmp_path):
18031778
# the temp file's bytes count toward the cap now.
18041779
time.sleep(0.02)
18051780
cache[b"c"] = _fake_object_code(b"C" * 200, name="c")
1806-
assert b"a" not in cache
1807-
assert b"c" in cache
1781+
assert cache.get(b"a") is None
1782+
assert cache.get(b"c") is not None
18081783

18091784

18101785
def test_filestream_cache_handles_long_keys(tmp_path):
@@ -1818,8 +1793,8 @@ def test_filestream_cache_handles_long_keys(tmp_path):
18181793
with FileStreamProgramCache(tmp_path / "fc") as cache:
18191794
cache[long_bytes_key] = _fake_object_code(b"b", name="nb")
18201795
cache[long_str_key] = _fake_object_code(b"s", name="ns")
1821-
assert long_bytes_key in cache
1822-
assert long_str_key in cache
1796+
assert cache.get(long_bytes_key) is not None
1797+
assert cache.get(long_str_key) is not None
18231798
assert cache[long_bytes_key] == b"b"
18241799
assert cache[long_str_key] == b"s"
18251800

@@ -1829,8 +1804,8 @@ def test_filestream_cache_accepts_str_keys(tmp_path):
18291804

18301805
with FileStreamProgramCache(tmp_path / "fc") as cache:
18311806
cache["my-key"] = _fake_object_code(b"v")
1832-
assert "my-key" in cache
1833-
assert b"my-key" in cache
1807+
assert cache.get("my-key") is not None
1808+
assert cache.get(b"my-key") is not None
18341809

18351810

18361811
def test_filestream_cache_size_cap_evicts_oldest(tmp_path):
@@ -1848,8 +1823,8 @@ def test_filestream_cache_size_cap_evicts_oldest(tmp_path):
18481823
cache[b"c"] = b"C" * 2000
18491824

18501825
with FileStreamProgramCache(root, max_size_bytes=cap) as cache:
1851-
assert b"a" not in cache
1852-
assert b"c" in cache
1826+
assert cache.get(b"a") is None
1827+
assert cache.get(b"c") is not None
18531828

18541829

18551830
def test_filestream_cache_atime_lru_promotes_recently_read(tmp_path):
@@ -1874,10 +1849,10 @@ def test_filestream_cache_atime_lru_promotes_recently_read(tmp_path):
18741849

18751850
with FileStreamProgramCache(root, max_size_bytes=cap) as cache:
18761851
# 'b' is now the oldest atime -- evicted.
1877-
assert b"b" not in cache
1852+
assert cache.get(b"b") is None
18781853
# 'a' was read after 'b' so its atime is newer -- survives.
1879-
assert b"a" in cache
1880-
assert b"c" in cache
1854+
assert cache.get(b"a") is not None
1855+
assert cache.get(b"c") is not None
18811856

18821857

18831858
def test_filestream_cache_unbounded_by_default(tmp_path):
@@ -1952,12 +1927,12 @@ def test_cache_roundtrip_with_real_compilation(tmp_path, init_cuda):
19521927

19531928
# First "process": compile and store the binary.
19541929
with FileStreamProgramCache(tmp_path / "fc") as cache:
1955-
assert key not in cache
1930+
assert cache.get(key) is None
19561931
cache[key] = compiled # extracts bytes(compiled.code)
19571932

19581933
# Second "process": reopen, retrieve bytes, rebuild ObjectCode.
19591934
with FileStreamProgramCache(tmp_path / "fc") as cache:
1960-
assert key in cache
1935+
assert cache.get(key) is not None
19611936
cached_bytes = cache[key]
19621937

19631938
assert cached_bytes == bytes(compiled.code)
@@ -1976,7 +1951,7 @@ def test_inmemory_cache_empty_on_create():
19761951

19771952
cache = InMemoryProgramCache()
19781953
assert len(cache) == 0
1979-
assert b"nope" not in cache
1954+
assert cache.get(b"nope") is None
19801955
with pytest.raises(KeyError):
19811956
cache[b"nope"]
19821957

@@ -1986,7 +1961,7 @@ def test_inmemory_cache_roundtrip_object_code():
19861961

19871962
cache = InMemoryProgramCache()
19881963
cache[b"k1"] = _fake_object_code(b"v1", name="x")
1989-
assert b"k1" in cache
1964+
assert cache.get(b"k1") is not None
19901965
assert cache[b"k1"] == b"v1"
19911966

19921967

@@ -2032,8 +2007,8 @@ def test_inmemory_cache_str_and_bytes_keys_alias():
20322007
cache = InMemoryProgramCache()
20332008
cache["k"] = b"v"
20342009
assert cache[b"k"] == b"v"
2035-
assert b"k" in cache
2036-
assert "k" in cache
2010+
assert cache.get(b"k") is not None
2011+
assert cache.get("k") is not None
20372012

20382013

20392014
def test_inmemory_cache_rejects_non_str_non_bytes_key():
@@ -2045,7 +2020,7 @@ def test_inmemory_cache_rejects_non_str_non_bytes_key():
20452020
with pytest.raises(TypeError):
20462021
cache[123]
20472022
with pytest.raises(TypeError):
2048-
123 in cache # noqa: B015
2023+
cache.get(123)
20492024

20502025

20512026
def test_inmemory_cache_rejects_non_bytes_non_object_code_value():
@@ -2062,7 +2037,7 @@ def test_inmemory_cache_delete():
20622037
cache = InMemoryProgramCache()
20632038
cache[b"k"] = _fake_object_code()
20642039
del cache[b"k"]
2065-
assert b"k" not in cache
2040+
assert cache.get(b"k") is None
20662041
with pytest.raises(KeyError):
20672042
del cache[b"k"]
20682043

@@ -2085,7 +2060,7 @@ def test_inmemory_cache_clear():
20852060
cache[b"b"] = _fake_object_code()
20862061
cache.clear()
20872062
assert len(cache) == 0
2088-
assert b"a" not in cache
2063+
assert cache.get(b"a") is None
20892064

20902065

20912066
def test_inmemory_cache_get_returns_default_on_miss():
@@ -2146,9 +2121,9 @@ def test_inmemory_cache_size_cap_evicts_oldest():
21462121
cache[b"a"] = b"a" * 100
21472122
cache[b"b"] = b"b" * 100
21482123
cache[b"c"] = b"c" * 100 # forces eviction of b"a"
2149-
assert b"a" not in cache
2150-
assert b"b" in cache
2151-
assert b"c" in cache
2124+
assert cache.get(b"a") is None
2125+
assert cache.get(b"b") is not None
2126+
assert cache.get(b"c") is not None
21522127

21532128

21542129
def test_inmemory_cache_read_promotes_lru():
@@ -2160,25 +2135,9 @@ def test_inmemory_cache_read_promotes_lru():
21602135
# Read of a promotes it past b in LRU order.
21612136
_ = cache[b"a"]
21622137
cache[b"c"] = b"c" * 100 # b is now the oldest; b should evict
2163-
assert b"a" in cache
2164-
assert b"b" not in cache
2165-
assert b"c" in cache
2166-
2167-
2168-
def test_inmemory_cache_contains_does_not_promote_lru():
2169-
"""``__contains__`` is read-only and must not shift LRU order, mirroring
2170-
FileStream semantics."""
2171-
from cuda.core.utils import InMemoryProgramCache
2172-
2173-
cache = InMemoryProgramCache(max_size_bytes=250)
2174-
cache[b"a"] = b"a" * 100
2175-
cache[b"b"] = b"b" * 100
2176-
# Membership check must NOT promote a in LRU.
2177-
assert b"a" in cache
2178-
cache[b"c"] = b"c" * 100 # a is still oldest -> a evicts
2179-
assert b"a" not in cache
2180-
assert b"b" in cache
2181-
assert b"c" in cache
2138+
assert cache.get(b"a") is not None
2139+
assert cache.get(b"b") is None
2140+
assert cache.get(b"c") is not None
21822141

21832142

21842143
def test_inmemory_cache_oversized_write_evicts_itself():
@@ -2188,7 +2147,7 @@ def test_inmemory_cache_oversized_write_evicts_itself():
21882147

21892148
cache = InMemoryProgramCache(max_size_bytes=10)
21902149
cache[b"big"] = b"x" * 100
2191-
assert b"big" not in cache
2150+
assert cache.get(b"big") is None
21922151
assert len(cache) == 0
21932152

21942153

cuda_core/tests/test_program_cache_multiprocess.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def test_concurrent_writers_distinct_keys_all_survive(tmp_path):
8484
for base in range(n_procs):
8585
for i in range(per_proc):
8686
key = f"proc-{base}-key-{i}".encode()
87-
assert key in cache
87+
assert cache.get(key) is not None
8888

8989

9090
def test_concurrent_reader_never_sees_torn_file(tmp_path):

0 commit comments

Comments
 (0)