Skip to content

Commit 571c17a

Browse files
committed
restore InMemoryProgramCache (bytes-in / bytes-out)
Bring back the in-memory backend that was inadvertently dropped during the SQLite removal + raw-bytes refactor. Adapts the original OrderedDict + RLock LRU implementation to the bytes-API contract: * __setitem__ accepts bytes/bytearray/memoryview/ObjectCode (path-backed too) via _extract_bytes; __getitem__ returns bytes. * Size-only bound (max_size_bytes), matching FileStream's policy. * read promotes LRU; __contains__ does not (mirrors FileStream). * OrderedDict insertion order encodes recency; eviction pops from the oldest end after each write that crosses the cap. * Threading.RLock so a single instance is safe across threads. Also wires it into cuda.core.utils' __all__ + lazy attrs and adds an autosummary entry to api.rst. Tests added in tests/test_program_cache.py cover empty, roundtrip (ObjectCode and bytes-likes), key/value type validation, delete/clear, get/update, overwrite size accounting, negative-cap rejection, size-cap eviction, read-promotes-LRU, contains-does-not-promote, oversized self-eviction, and unbounded mode.
1 parent 46c4dfb commit 571c17a

4 files changed

Lines changed: 366 additions & 0 deletions

File tree

cuda_core/cuda/core/utils/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
__all__ = [
1111
"FileStreamProgramCache",
12+
"InMemoryProgramCache",
1213
"ProgramCacheResource",
1314
"StridedMemoryView",
1415
"args_viewable_as_strided_memory",
@@ -21,6 +22,7 @@
2122
_LAZY_CACHE_ATTRS = frozenset(
2223
{
2324
"FileStreamProgramCache",
25+
"InMemoryProgramCache",
2426
"ProgramCacheResource",
2527
"make_program_cache_key",
2628
}

cuda_core/cuda/core/utils/_program_cache.py

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141

4242
__all__ = [
4343
"FileStreamProgramCache",
44+
"InMemoryProgramCache",
4445
"ProgramCacheResource",
4546
"make_program_cache_key",
4647
]
@@ -871,6 +872,133 @@ def _probe(label: str, fn):
871872
return hasher.digest()
872873

873874

875+
# ---------------------------------------------------------------------------
876+
# In-memory backend
877+
# ---------------------------------------------------------------------------
878+
879+
880+
class InMemoryProgramCache(ProgramCacheResource):
881+
"""In-memory program cache with LRU eviction.
882+
883+
Suitable for single-process workflows that want to avoid disk I/O --
884+
a typical application compiles its kernels once per process and
885+
looks them up many times. Entries live only for the lifetime of
886+
the process; use :class:`FileStreamProgramCache` when the cache
887+
should persist across runs.
888+
889+
Like :class:`FileStreamProgramCache`, this backend is bytes-in /
890+
bytes-out: ``__setitem__`` accepts ``bytes``, ``bytearray``,
891+
``memoryview``, or any :class:`~cuda.core.ObjectCode` (path-backed
892+
too -- the file is read at write time so the cached entry holds the
893+
binary content, not a path). ``__getitem__`` returns ``bytes``.
894+
895+
Parameters
896+
----------
897+
max_size_bytes:
898+
Optional cap on the sum of stored payload sizes. When exceeded,
899+
LRU eviction runs until the total fits. ``None`` means
900+
unbounded. The size-only bound mirrors
901+
:class:`FileStreamProgramCache`.
902+
903+
Notes
904+
-----
905+
Recency is updated on :meth:`__getitem__`; :meth:`__contains__` is
906+
read-only and does not shift LRU order, matching
907+
:class:`FileStreamProgramCache`.
908+
909+
Thread safety: a :class:`threading.RLock` serialises every method,
910+
so the cache can be shared across threads without external
911+
locking.
912+
"""
913+
914+
def __init__(
915+
self,
916+
*,
917+
max_size_bytes: int | None = None,
918+
) -> None:
919+
if max_size_bytes is not None and max_size_bytes < 0:
920+
raise ValueError("max_size_bytes must be non-negative or None")
921+
self._max_size_bytes = max_size_bytes
922+
# Key insertion order encodes LRU order: oldest first, newest last.
923+
# Each value is ``(payload_bytes, payload_size)``; caching the size
924+
# avoids recomputing ``len(data)`` on every eviction pass.
925+
self._entries: collections.OrderedDict[bytes, tuple[bytes, int]] = collections.OrderedDict()
926+
self._total_bytes = 0
927+
# Reentrant so helper methods that also take the lock can nest
928+
# without deadlocking.
929+
self._lock = threading.RLock()
930+
931+
def __getitem__(self, key: object) -> bytes:
932+
k = _as_key_bytes(key)
933+
with self._lock:
934+
try:
935+
data, _size = self._entries[k]
936+
except KeyError:
937+
raise KeyError(key) from None
938+
# Touch LRU: a real read promotes the entry to "most recent"
939+
# so eviction prefers genuinely cold entries.
940+
self._entries.move_to_end(k)
941+
return data
942+
943+
def __setitem__(
944+
self, key: object, value: bytes | bytearray | memoryview | ObjectCode
945+
) -> None:
946+
data = _extract_bytes(value)
947+
size = len(data)
948+
k = _as_key_bytes(key)
949+
with self._lock:
950+
existing = self._entries.pop(k, None)
951+
if existing is not None:
952+
self._total_bytes -= existing[1]
953+
self._entries[k] = (data, size)
954+
self._total_bytes += size
955+
self._evict_to_caps()
956+
957+
def __contains__(self, key: object) -> bool:
958+
# Validate the key (mirror FileStream's behaviour: a non-str,
959+
# non-bytes key is a programming error and should surface, not
960+
# quietly report "not present").
961+
k = _as_key_bytes(key)
962+
with self._lock:
963+
return k in self._entries
964+
965+
def __delitem__(self, key: object) -> None:
966+
k = _as_key_bytes(key)
967+
with self._lock:
968+
try:
969+
_data, size = self._entries.pop(k)
970+
except KeyError:
971+
raise KeyError(key) from None
972+
self._total_bytes -= size
973+
974+
def __len__(self) -> int:
975+
with self._lock:
976+
return len(self._entries)
977+
978+
def clear(self) -> None:
979+
with self._lock:
980+
self._entries.clear()
981+
self._total_bytes = 0
982+
983+
# -- eviction ------------------------------------------------------------
984+
985+
def _evict_to_caps(self) -> None:
986+
"""Evict oldest entries until the size cap is satisfied.
987+
988+
Called from ``__setitem__`` after an insert/update. Pops from
989+
the front of the OrderedDict (oldest first). If the
990+
just-inserted entry on its own exceeds ``max_size_bytes``, the
991+
loop will evict it too -- mirroring
992+
:class:`FileStreamProgramCache` (a write that cannot fit does
993+
not survive its own size-cap pass).
994+
"""
995+
if self._max_size_bytes is None:
996+
return
997+
while self._entries and self._total_bytes > self._max_size_bytes:
998+
_k, (_data, size) = self._entries.popitem(last=False)
999+
self._total_bytes -= size
1000+
1001+
8741002
# ---------------------------------------------------------------------------
8751003
# FileStream backend
8761004
# ---------------------------------------------------------------------------

cuda_core/docs/source/api.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,4 +265,5 @@ avoid recompiling identical source + options + target without writing the
265265
:toctree: generated/
266266

267267
ProgramCacheResource
268+
InMemoryProgramCache
268269
FileStreamProgramCache

0 commit comments

Comments
 (0)