restore InMemoryProgramCache (bytes-in / bytes-out)

cpcloud · cpcloud · commit 571c17aa3f2c · 2026-04-28T10:44:43.000-04:00
Bring back the in-memory backend that was inadvertently dropped during
the SQLite removal + raw-bytes refactor. Adapts the original OrderedDict
+ RLock LRU implementation to the bytes-API contract:

* __setitem__ accepts bytes/bytearray/memoryview/ObjectCode (path-backed
  too) via _extract_bytes; __getitem__ returns bytes.
* Size-only bound (max_size_bytes), matching FileStream's policy.
* read promotes LRU; __contains__ does not (mirrors FileStream).
* OrderedDict insertion order encodes recency; eviction pops from the
  oldest end after each write that crosses the cap.
* Threading.RLock so a single instance is safe across threads.

Also wires it into cuda.core.utils' __all__ + lazy attrs and adds an
autosummary entry to api.rst. Tests added in tests/test_program_cache.py
cover empty, roundtrip (ObjectCode and bytes-likes), key/value type
validation, delete/clear, get/update, overwrite size accounting,
negative-cap rejection, size-cap eviction, read-promotes-LRU,
contains-does-not-promote, oversized self-eviction, and unbounded mode.
diff --git a/cuda_core/cuda/core/utils/__init__.py b/cuda_core/cuda/core/utils/__init__.py
@@ -9,6 +9,7 @@
 
 __all__ = [
     "FileStreamProgramCache",
+    "InMemoryProgramCache",
     "ProgramCacheResource",
     "StridedMemoryView",
     "args_viewable_as_strided_memory",
@@ -21,6 +22,7 @@
 _LAZY_CACHE_ATTRS = frozenset(
     {
         "FileStreamProgramCache",
+        "InMemoryProgramCache",
         "ProgramCacheResource",
         "make_program_cache_key",
     }
diff --git a/cuda_core/cuda/core/utils/_program_cache.py b/cuda_core/cuda/core/utils/_program_cache.py
@@ -41,6 +41,7 @@
 
 __all__ = [
     "FileStreamProgramCache",
+    "InMemoryProgramCache",
     "ProgramCacheResource",
     "make_program_cache_key",
 ]
@@ -871,6 +872,133 @@ def _probe(label: str, fn):
     return hasher.digest()
 
 
+# ---------------------------------------------------------------------------
+# In-memory backend
+# ---------------------------------------------------------------------------
+
+
+class InMemoryProgramCache(ProgramCacheResource):
+    """In-memory program cache with LRU eviction.
+
+    Suitable for single-process workflows that want to avoid disk I/O --
+    a typical application compiles its kernels once per process and
+    looks them up many times. Entries live only for the lifetime of
+    the process; use :class:`FileStreamProgramCache` when the cache
+    should persist across runs.
+
+    Like :class:`FileStreamProgramCache`, this backend is bytes-in /
+    bytes-out: ``__setitem__`` accepts ``bytes``, ``bytearray``,
+    ``memoryview``, or any :class:`~cuda.core.ObjectCode` (path-backed
+    too -- the file is read at write time so the cached entry holds the
+    binary content, not a path). ``__getitem__`` returns ``bytes``.
+
+    Parameters
+    ----------
+    max_size_bytes:
+        Optional cap on the sum of stored payload sizes. When exceeded,
+        LRU eviction runs until the total fits. ``None`` means
+        unbounded. The size-only bound mirrors
+        :class:`FileStreamProgramCache`.
+
+    Notes
+    -----
+    Recency is updated on :meth:`__getitem__`; :meth:`__contains__` is
+    read-only and does not shift LRU order, matching
+    :class:`FileStreamProgramCache`.
+
+    Thread safety: a :class:`threading.RLock` serialises every method,
+    so the cache can be shared across threads without external
+    locking.
+    """
+
+    def __init__(
+        self,
+        *,
+        max_size_bytes: int | None = None,
+    ) -> None:
+        if max_size_bytes is not None and max_size_bytes < 0:
+            raise ValueError("max_size_bytes must be non-negative or None")
+        self._max_size_bytes = max_size_bytes
+        # Key insertion order encodes LRU order: oldest first, newest last.
+        # Each value is ``(payload_bytes, payload_size)``; caching the size
+        # avoids recomputing ``len(data)`` on every eviction pass.
+        self._entries: collections.OrderedDict[bytes, tuple[bytes, int]] = collections.OrderedDict()
+        self._total_bytes = 0
+        # Reentrant so helper methods that also take the lock can nest
+        # without deadlocking.
+        self._lock = threading.RLock()
+
+    def __getitem__(self, key: object) -> bytes:
+        k = _as_key_bytes(key)
+        with self._lock:
+            try:
+                data, _size = self._entries[k]
+            except KeyError:
+                raise KeyError(key) from None
+            # Touch LRU: a real read promotes the entry to "most recent"
+            # so eviction prefers genuinely cold entries.
+            self._entries.move_to_end(k)
+            return data
+
+    def __setitem__(
+        self, key: object, value: bytes | bytearray | memoryview | ObjectCode
+    ) -> None:
+        data = _extract_bytes(value)
+        size = len(data)
+        k = _as_key_bytes(key)
+        with self._lock:
+            existing = self._entries.pop(k, None)
+            if existing is not None:
+                self._total_bytes -= existing[1]
+            self._entries[k] = (data, size)
+            self._total_bytes += size
+            self._evict_to_caps()
+
+    def __contains__(self, key: object) -> bool:
+        # Validate the key (mirror FileStream's behaviour: a non-str,
+        # non-bytes key is a programming error and should surface, not
+        # quietly report "not present").
+        k = _as_key_bytes(key)
+        with self._lock:
+            return k in self._entries
+
+    def __delitem__(self, key: object) -> None:
+        k = _as_key_bytes(key)
+        with self._lock:
+            try:
+                _data, size = self._entries.pop(k)
+            except KeyError:
+                raise KeyError(key) from None
+            self._total_bytes -= size
+
+    def __len__(self) -> int:
+        with self._lock:
+            return len(self._entries)
+
+    def clear(self) -> None:
+        with self._lock:
+            self._entries.clear()
+            self._total_bytes = 0
+
+    # -- eviction ------------------------------------------------------------
+
+    def _evict_to_caps(self) -> None:
+        """Evict oldest entries until the size cap is satisfied.
+
+        Called from ``__setitem__`` after an insert/update. Pops from
+        the front of the OrderedDict (oldest first). If the
+        just-inserted entry on its own exceeds ``max_size_bytes``, the
+        loop will evict it too -- mirroring
+        :class:`FileStreamProgramCache` (a write that cannot fit does
+        not survive its own size-cap pass).
+        """
+        if self._max_size_bytes is None:
+            return
+        while self._entries and self._total_bytes > self._max_size_bytes:
+            _k, (_data, size) = self._entries.popitem(last=False)
+            self._total_bytes -= size
+
+
 # ---------------------------------------------------------------------------
 # FileStream backend
 # ---------------------------------------------------------------------------
diff --git a/cuda_core/docs/source/api.rst b/cuda_core/docs/source/api.rst
@@ -265,4 +265,5 @@ avoid recompiling identical source + options + target without writing the
    :toctree: generated/
 
    ProgramCacheResource
+   InMemoryProgramCache
    FileStreamProgramCache
diff --git a/cuda_core/tests/test_program_cache.py b/cuda_core/tests/test_program_cache.py

Original file line number	Diff line number	Diff line change
`@@ -9,6 +9,7 @@`
`9`	`9`
`10`	`10`	`__all__ = [`
`11`	`11`	`"FileStreamProgramCache",`
	`12`	`+ "InMemoryProgramCache",`
`12`	`13`	`"ProgramCacheResource",`
`13`	`14`	`"StridedMemoryView",`
`14`	`15`	`"args_viewable_as_strided_memory",`
`@@ -21,6 +22,7 @@`
`21`	`22`	`_LAZY_CACHE_ATTRS = frozenset(`
`22`	`23`	`{`
`23`	`24`	`"FileStreamProgramCache",`
	`25`	`+ "InMemoryProgramCache",`
`24`	`26`	`"ProgramCacheResource",`
`25`	`27`	`"make_program_cache_key",`
`26`	`28`	`}`