feat(core.utils): add InMemoryProgramCache backend (#177)

cpcloud · cpcloud · commit 457cab73de96 · 2026-04-22T13:44:41.000-04:00
New in-process cache that stores ObjectCode instances by reference inside an OrderedDict, suitable for workflows that compile kernels once per process and look them up many times without wanting disk I/O. Behaviour: - LRU eviction on both ``max_entries`` and ``max_size_bytes`` (either or both can be set; ``None`` means unbounded on that axis). - ``__getitem__`` promotes the entry; ``__contains__`` is read-only and does not shift LRU order -- matches the persistent backends. - A ``threading.RLock`` serialises every method so the cache can be shared across threads without external locking. - Entries are stored by reference: reads return the same Python object, so callers must treat the returned ObjectCode as read-only. - Rejects non-ObjectCode values and path-backed ObjectCode (same ``_require_object_code`` guard the persistent backends use) to avoid silently caching content that lives elsewhere on disk. Tests cover CRUD, key normalisation, cap validation, LRU touch/contains semantics, combined caps, size accounting on overwrite, degenerate caps (single entry > cap, max_entries=0), and a threaded stress smoke test. Closes #177
diff --git a/cuda_core/cuda/core/utils/__init__.py b/cuda_core/cuda/core/utils/__init__.py
@@ -16,6 +16,7 @@
 # anyway, so treat that as expected.
 _LAZY_CACHE_ATTRS = (
     "FileStreamProgramCache",
+    "InMemoryProgramCache",
     "ProgramCacheResource",
     "SQLiteProgramCache",
     "make_program_cache_key",
diff --git a/cuda_core/cuda/core/utils/_program_cache.py b/cuda_core/cuda/core/utils/_program_cache.py
@@ -2,22 +2,26 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-"""Persistent program caches for cuda.core.
+"""Program caches for cuda.core.
 
-Two concrete backends are provided:
+Three concrete backends are provided:
 
+* :class:`InMemoryProgramCache` -- a pure in-process dict-backed cache with
+  LRU eviction; fastest when the compiled artifacts are only needed for the
+  lifetime of the process.
 * :class:`SQLiteProgramCache` -- a single-file sqlite3 database, best for
   single-process workflows, with LRU eviction and a hard size cap.
 * :class:`FileStreamProgramCache` -- a directory of atomically-written entry
   files, safe across concurrent processes via :func:`os.replace`.
 
-Both implement :class:`ProgramCacheResource`, so callers can swap backends
-without changing the calling code.
+All three implement :class:`ProgramCacheResource`, so callers can swap
+backends without changing the calling code.
 """
 
 from __future__ import annotations
 
 import abc
+import collections
 import collections.abc
 import contextlib
 import errno
@@ -44,6 +48,7 @@
 
 __all__ = [
     "FileStreamProgramCache",
+    "InMemoryProgramCache",
     "ProgramCacheResource",
     "SQLiteProgramCache",
     "make_program_cache_key",
@@ -833,6 +838,144 @@ def _probe(label: str, fn):
     return hasher.digest()
 
 
+# ---------------------------------------------------------------------------
+# In-memory backend
+# ---------------------------------------------------------------------------
+
+
+class InMemoryProgramCache(ProgramCacheResource):
+    """In-memory program cache with LRU eviction.
+
+    Suitable for single-process workflows that want to avoid disk I/O -- a
+    typical application compiles its kernels once per process and looks
+    them up many times. Entries live only for the lifetime of the process;
+    use :class:`SQLiteProgramCache` or :class:`FileStreamProgramCache` when
+    the cache should persist across runs.
+
+    The cache stores :class:`~cuda.core.ObjectCode` instances by reference
+    (no pickling), so reads incur no (de)serialization cost and each hit
+    returns the same Python object. Mutating that object mutates the
+    cached entry.
+
+    Parameters
+    ----------
+    max_entries:
+        Optional cap on the number of stored entries. When exceeded, the
+        least-recently-used entries are evicted until the count fits.
+        ``None`` means unbounded.
+    max_size_bytes:
+        Optional cap on the sum of ``len(entry.code)`` across entries.
+        When exceeded, LRU eviction runs until the total fits. ``None``
+        means unbounded.
+
+    Notes
+    -----
+    Recency is updated on :meth:`__getitem__`; :meth:`__contains__` is
+    read-only and does not shift LRU order, matching the persistent
+    backends.
+
+    Thread safety: a :class:`threading.RLock` serialises every method, so
+    the cache can be shared across threads without external locking.
+    Entries are stored by reference, not copied, so callers that mutate a
+    retrieved ``ObjectCode`` affect the cached entry too; treat returned
+    values as read-only.
+    """
+
+    def __init__(
+        self,
+        *,
+        max_entries: int | None = None,
+        max_size_bytes: int | None = None,
+    ) -> None:
+        if max_entries is not None and max_entries < 0:
+            raise ValueError("max_entries must be non-negative or None")
+        if max_size_bytes is not None and max_size_bytes < 0:
+            raise ValueError("max_size_bytes must be non-negative or None")
+        self._max_entries = max_entries
+        self._max_size_bytes = max_size_bytes
+        # Key insertion order encodes LRU order: oldest first, newest last.
+        # Each value is ``(ObjectCode, payload_size_bytes)``; caching the
+        # size avoids recomputing ``len(code)`` on every eviction pass.
+        self._entries: collections.OrderedDict[bytes, tuple[ObjectCode, int]] = collections.OrderedDict()
+        self._total_bytes = 0
+        # Reentrant so helper methods that also take the lock (future
+        # additions) can nest without deadlocking.
+        self._lock = threading.RLock()
+
+    def __getitem__(self, key: object) -> ObjectCode:
+        k = _as_key_bytes(key)
+        with self._lock:
+            try:
+                value, _size = self._entries[k]
+            except KeyError:
+                raise KeyError(key) from None
+            # Touch LRU: a real read promotes the entry to "most recent"
+            # so eviction prefers genuinely cold entries.
+            self._entries.move_to_end(k)
+            return value
+
+    def __setitem__(self, key: object, value: object) -> None:
+        obj = _require_object_code(value)
+        # _require_object_code already rejected path-backed ObjectCode
+        # (where ``code`` is a str), so ``len(obj.code)`` is well-defined.
+        size = len(obj.code)
+        k = _as_key_bytes(key)
+        with self._lock:
+            existing = self._entries.pop(k, None)
+            if existing is not None:
+                self._total_bytes -= existing[1]
+            self._entries[k] = (obj, size)
+            self._total_bytes += size
+            self._evict_to_caps()
+
+    def __contains__(self, key: object) -> bool:
+        # Validate the key (mirror SQLite/FileStream behaviour: a non-str,
+        # non-bytes key is a programming error and should surface, not
+        # quietly report "not present").
+        k = _as_key_bytes(key)
+        with self._lock:
+            return k in self._entries
+
+    def __delitem__(self, key: object) -> None:
+        k = _as_key_bytes(key)
+        with self._lock:
+            try:
+                _value, size = self._entries.pop(k)
+            except KeyError:
+                raise KeyError(key) from None
+            self._total_bytes -= size
+
+    def __len__(self) -> int:
+        with self._lock:
+            return len(self._entries)
+
+    def clear(self) -> None:
+        with self._lock:
+            self._entries.clear()
+            self._total_bytes = 0
+
+    # -- eviction ------------------------------------------------------------
+
+    def _evict_to_caps(self) -> None:
+        """Evict oldest entries until both caps are satisfied.
+
+        Called from ``__setitem__`` after an insert/update. Pops from the
+        front of the OrderedDict (oldest first), matching the LRU
+        semantics of :class:`SQLiteProgramCache` and
+        :class:`FileStreamProgramCache`. If the just-inserted entry on its
+        own exceeds ``max_size_bytes``, the loop will evict it too; that
+        mirrors the persistent backends (a write that cannot fit does not
+        survive its own size-cap pass).
+        """
+        while self._entries:
+            over_entries = self._max_entries is not None and len(self._entries) > self._max_entries
+            over_bytes = self._max_size_bytes is not None and self._total_bytes > self._max_size_bytes
+            if not over_entries and not over_bytes:
+                return
+            _k, (_value, size) = self._entries.popitem(last=False)
+            self._total_bytes -= size
+
+
 # ---------------------------------------------------------------------------
 # SQLite backend
 # ---------------------------------------------------------------------------
diff --git a/cuda_core/docs/source/api.rst b/cuda_core/docs/source/api.rst
@@ -259,5 +259,6 @@ Program caches
    :toctree: generated/
 
    ProgramCacheResource
+   InMemoryProgramCache
    SQLiteProgramCache
    FileStreamProgramCache
diff --git a/cuda_core/tests/test_program_cache.py b/cuda_core/tests/test_program_cache.py