Skip to content

Commit 457cab7

Browse files
committed
feat(core.utils): add InMemoryProgramCache backend (#177)
New in-process cache that stores ObjectCode instances by reference inside an OrderedDict, suitable for workflows that compile kernels once per process and look them up many times without wanting disk I/O. Behaviour: - LRU eviction on both ``max_entries`` and ``max_size_bytes`` (either or both can be set; ``None`` means unbounded on that axis). - ``__getitem__`` promotes the entry; ``__contains__`` is read-only and does not shift LRU order -- matches the persistent backends. - A ``threading.RLock`` serialises every method so the cache can be shared across threads without external locking. - Entries are stored by reference: reads return the same Python object, so callers must treat the returned ObjectCode as read-only. - Rejects non-ObjectCode values and path-backed ObjectCode (same ``_require_object_code`` guard the persistent backends use) to avoid silently caching content that lives elsewhere on disk. Tests cover CRUD, key normalisation, cap validation, LRU touch/contains semantics, combined caps, size accounting on overwrite, degenerate caps (single entry > cap, max_entries=0), and a threaded stress smoke test. Closes #177
1 parent cad93d0 commit 457cab7

4 files changed

Lines changed: 473 additions & 5 deletions

File tree

cuda_core/cuda/core/utils/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
# anyway, so treat that as expected.
1717
_LAZY_CACHE_ATTRS = (
1818
"FileStreamProgramCache",
19+
"InMemoryProgramCache",
1920
"ProgramCacheResource",
2021
"SQLiteProgramCache",
2122
"make_program_cache_key",

cuda_core/cuda/core/utils/_program_cache.py

Lines changed: 147 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,26 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5-
"""Persistent program caches for cuda.core.
5+
"""Program caches for cuda.core.
66
7-
Two concrete backends are provided:
7+
Three concrete backends are provided:
88
9+
* :class:`InMemoryProgramCache` -- a pure in-process dict-backed cache with
10+
LRU eviction; fastest when the compiled artifacts are only needed for the
11+
lifetime of the process.
912
* :class:`SQLiteProgramCache` -- a single-file sqlite3 database, best for
1013
single-process workflows, with LRU eviction and a hard size cap.
1114
* :class:`FileStreamProgramCache` -- a directory of atomically-written entry
1215
files, safe across concurrent processes via :func:`os.replace`.
1316
14-
Both implement :class:`ProgramCacheResource`, so callers can swap backends
15-
without changing the calling code.
17+
All three implement :class:`ProgramCacheResource`, so callers can swap
18+
backends without changing the calling code.
1619
"""
1720

1821
from __future__ import annotations
1922

2023
import abc
24+
import collections
2125
import collections.abc
2226
import contextlib
2327
import errno
@@ -44,6 +48,7 @@
4448

4549
__all__ = [
4650
"FileStreamProgramCache",
51+
"InMemoryProgramCache",
4752
"ProgramCacheResource",
4853
"SQLiteProgramCache",
4954
"make_program_cache_key",
@@ -833,6 +838,144 @@ def _probe(label: str, fn):
833838
return hasher.digest()
834839

835840

841+
# ---------------------------------------------------------------------------
842+
# In-memory backend
843+
# ---------------------------------------------------------------------------
844+
845+
846+
class InMemoryProgramCache(ProgramCacheResource):
847+
"""In-memory program cache with LRU eviction.
848+
849+
Suitable for single-process workflows that want to avoid disk I/O -- a
850+
typical application compiles its kernels once per process and looks
851+
them up many times. Entries live only for the lifetime of the process;
852+
use :class:`SQLiteProgramCache` or :class:`FileStreamProgramCache` when
853+
the cache should persist across runs.
854+
855+
The cache stores :class:`~cuda.core.ObjectCode` instances by reference
856+
(no pickling), so reads incur no (de)serialization cost and each hit
857+
returns the same Python object. Mutating that object mutates the
858+
cached entry.
859+
860+
Parameters
861+
----------
862+
max_entries:
863+
Optional cap on the number of stored entries. When exceeded, the
864+
least-recently-used entries are evicted until the count fits.
865+
``None`` means unbounded.
866+
max_size_bytes:
867+
Optional cap on the sum of ``len(entry.code)`` across entries.
868+
When exceeded, LRU eviction runs until the total fits. ``None``
869+
means unbounded.
870+
871+
Notes
872+
-----
873+
Recency is updated on :meth:`__getitem__`; :meth:`__contains__` is
874+
read-only and does not shift LRU order, matching the persistent
875+
backends.
876+
877+
Thread safety: a :class:`threading.RLock` serialises every method, so
878+
the cache can be shared across threads without external locking.
879+
Entries are stored by reference, not copied, so callers that mutate a
880+
retrieved ``ObjectCode`` affect the cached entry too; treat returned
881+
values as read-only.
882+
"""
883+
884+
def __init__(
885+
self,
886+
*,
887+
max_entries: int | None = None,
888+
max_size_bytes: int | None = None,
889+
) -> None:
890+
if max_entries is not None and max_entries < 0:
891+
raise ValueError("max_entries must be non-negative or None")
892+
if max_size_bytes is not None and max_size_bytes < 0:
893+
raise ValueError("max_size_bytes must be non-negative or None")
894+
self._max_entries = max_entries
895+
self._max_size_bytes = max_size_bytes
896+
# Key insertion order encodes LRU order: oldest first, newest last.
897+
# Each value is ``(ObjectCode, payload_size_bytes)``; caching the
898+
# size avoids recomputing ``len(code)`` on every eviction pass.
899+
self._entries: collections.OrderedDict[bytes, tuple[ObjectCode, int]] = collections.OrderedDict()
900+
self._total_bytes = 0
901+
# Reentrant so helper methods that also take the lock (future
902+
# additions) can nest without deadlocking.
903+
self._lock = threading.RLock()
904+
905+
def __getitem__(self, key: object) -> ObjectCode:
906+
k = _as_key_bytes(key)
907+
with self._lock:
908+
try:
909+
value, _size = self._entries[k]
910+
except KeyError:
911+
raise KeyError(key) from None
912+
# Touch LRU: a real read promotes the entry to "most recent"
913+
# so eviction prefers genuinely cold entries.
914+
self._entries.move_to_end(k)
915+
return value
916+
917+
def __setitem__(self, key: object, value: object) -> None:
918+
obj = _require_object_code(value)
919+
# _require_object_code already rejected path-backed ObjectCode
920+
# (where ``code`` is a str), so ``len(obj.code)`` is well-defined.
921+
size = len(obj.code)
922+
k = _as_key_bytes(key)
923+
with self._lock:
924+
existing = self._entries.pop(k, None)
925+
if existing is not None:
926+
self._total_bytes -= existing[1]
927+
self._entries[k] = (obj, size)
928+
self._total_bytes += size
929+
self._evict_to_caps()
930+
931+
def __contains__(self, key: object) -> bool:
932+
# Validate the key (mirror SQLite/FileStream behaviour: a non-str,
933+
# non-bytes key is a programming error and should surface, not
934+
# quietly report "not present").
935+
k = _as_key_bytes(key)
936+
with self._lock:
937+
return k in self._entries
938+
939+
def __delitem__(self, key: object) -> None:
940+
k = _as_key_bytes(key)
941+
with self._lock:
942+
try:
943+
_value, size = self._entries.pop(k)
944+
except KeyError:
945+
raise KeyError(key) from None
946+
self._total_bytes -= size
947+
948+
def __len__(self) -> int:
949+
with self._lock:
950+
return len(self._entries)
951+
952+
def clear(self) -> None:
953+
with self._lock:
954+
self._entries.clear()
955+
self._total_bytes = 0
956+
957+
# -- eviction ------------------------------------------------------------
958+
959+
def _evict_to_caps(self) -> None:
960+
"""Evict oldest entries until both caps are satisfied.
961+
962+
Called from ``__setitem__`` after an insert/update. Pops from the
963+
front of the OrderedDict (oldest first), matching the LRU
964+
semantics of :class:`SQLiteProgramCache` and
965+
:class:`FileStreamProgramCache`. If the just-inserted entry on its
966+
own exceeds ``max_size_bytes``, the loop will evict it too; that
967+
mirrors the persistent backends (a write that cannot fit does not
968+
survive its own size-cap pass).
969+
"""
970+
while self._entries:
971+
over_entries = self._max_entries is not None and len(self._entries) > self._max_entries
972+
over_bytes = self._max_size_bytes is not None and self._total_bytes > self._max_size_bytes
973+
if not over_entries and not over_bytes:
974+
return
975+
_k, (_value, size) = self._entries.popitem(last=False)
976+
self._total_bytes -= size
977+
978+
836979
# ---------------------------------------------------------------------------
837980
# SQLite backend
838981
# ---------------------------------------------------------------------------

cuda_core/docs/source/api.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,5 +259,6 @@ Program caches
259259
:toctree: generated/
260260

261261
ProgramCacheResource
262+
InMemoryProgramCache
262263
SQLiteProgramCache
263264
FileStreamProgramCache

0 commit comments

Comments
 (0)