|
2 | 2 | # |
3 | 3 | # SPDX-License-Identifier: Apache-2.0 |
4 | 4 |
|
5 | | -"""Persistent program caches for cuda.core. |
| 5 | +"""Program caches for cuda.core. |
6 | 6 |
|
7 | | -Two concrete backends are provided: |
| 7 | +Three concrete backends are provided: |
8 | 8 |
|
| 9 | +* :class:`InMemoryProgramCache` -- a pure in-process dict-backed cache with |
| 10 | + LRU eviction; fastest when the compiled artifacts are only needed for the |
| 11 | + lifetime of the process. |
9 | 12 | * :class:`SQLiteProgramCache` -- a single-file sqlite3 database, best for |
10 | 13 | single-process workflows, with LRU eviction and a hard size cap. |
11 | 14 | * :class:`FileStreamProgramCache` -- a directory of atomically-written entry |
12 | 15 | files, safe across concurrent processes via :func:`os.replace`. |
13 | 16 |
|
14 | | -Both implement :class:`ProgramCacheResource`, so callers can swap backends |
15 | | -without changing the calling code. |
| 17 | +All three implement :class:`ProgramCacheResource`, so callers can swap |
| 18 | +backends without changing the calling code. |
16 | 19 | """ |
17 | 20 |
|
18 | 21 | from __future__ import annotations |
19 | 22 |
|
20 | 23 | import abc |
| 24 | +import collections |
21 | 25 | import collections.abc |
22 | 26 | import contextlib |
23 | 27 | import errno |
|
44 | 48 |
|
45 | 49 | __all__ = [ |
46 | 50 | "FileStreamProgramCache", |
| 51 | + "InMemoryProgramCache", |
47 | 52 | "ProgramCacheResource", |
48 | 53 | "SQLiteProgramCache", |
49 | 54 | "make_program_cache_key", |
@@ -833,6 +838,144 @@ def _probe(label: str, fn): |
833 | 838 | return hasher.digest() |
834 | 839 |
|
835 | 840 |
|
| 841 | +# --------------------------------------------------------------------------- |
| 842 | +# In-memory backend |
| 843 | +# --------------------------------------------------------------------------- |
| 844 | + |
| 845 | + |
| 846 | +class InMemoryProgramCache(ProgramCacheResource): |
| 847 | + """In-memory program cache with LRU eviction. |
| 848 | +
|
| 849 | + Suitable for single-process workflows that want to avoid disk I/O -- a |
| 850 | + typical application compiles its kernels once per process and looks |
| 851 | + them up many times. Entries live only for the lifetime of the process; |
| 852 | + use :class:`SQLiteProgramCache` or :class:`FileStreamProgramCache` when |
| 853 | + the cache should persist across runs. |
| 854 | +
|
| 855 | + The cache stores :class:`~cuda.core.ObjectCode` instances by reference |
| 856 | + (no pickling), so reads incur no (de)serialization cost and each hit |
| 857 | + returns the same Python object. Mutating that object mutates the |
| 858 | + cached entry. |
| 859 | +
|
| 860 | + Parameters |
| 861 | + ---------- |
| 862 | + max_entries: |
| 863 | + Optional cap on the number of stored entries. When exceeded, the |
| 864 | + least-recently-used entries are evicted until the count fits. |
| 865 | + ``None`` means unbounded. |
| 866 | + max_size_bytes: |
| 867 | + Optional cap on the sum of ``len(entry.code)`` across entries. |
| 868 | + When exceeded, LRU eviction runs until the total fits. ``None`` |
| 869 | + means unbounded. |
| 870 | +
|
| 871 | + Notes |
| 872 | + ----- |
| 873 | + Recency is updated on :meth:`__getitem__`; :meth:`__contains__` is |
| 874 | + read-only and does not shift LRU order, matching the persistent |
| 875 | + backends. |
| 876 | +
|
| 877 | + Thread safety: a :class:`threading.RLock` serialises every method, so |
| 878 | + the cache can be shared across threads without external locking. |
| 879 | + Entries are stored by reference, not copied, so callers that mutate a |
| 880 | + retrieved ``ObjectCode`` affect the cached entry too; treat returned |
| 881 | + values as read-only. |
| 882 | + """ |
| 883 | + |
| 884 | + def __init__( |
| 885 | + self, |
| 886 | + *, |
| 887 | + max_entries: int | None = None, |
| 888 | + max_size_bytes: int | None = None, |
| 889 | + ) -> None: |
| 890 | + if max_entries is not None and max_entries < 0: |
| 891 | + raise ValueError("max_entries must be non-negative or None") |
| 892 | + if max_size_bytes is not None and max_size_bytes < 0: |
| 893 | + raise ValueError("max_size_bytes must be non-negative or None") |
| 894 | + self._max_entries = max_entries |
| 895 | + self._max_size_bytes = max_size_bytes |
| 896 | + # Key insertion order encodes LRU order: oldest first, newest last. |
| 897 | + # Each value is ``(ObjectCode, payload_size_bytes)``; caching the |
| 898 | + # size avoids recomputing ``len(code)`` on every eviction pass. |
| 899 | + self._entries: collections.OrderedDict[bytes, tuple[ObjectCode, int]] = collections.OrderedDict() |
| 900 | + self._total_bytes = 0 |
| 901 | + # Reentrant so helper methods that also take the lock (future |
| 902 | + # additions) can nest without deadlocking. |
| 903 | + self._lock = threading.RLock() |
| 904 | + |
| 905 | + def __getitem__(self, key: object) -> ObjectCode: |
| 906 | + k = _as_key_bytes(key) |
| 907 | + with self._lock: |
| 908 | + try: |
| 909 | + value, _size = self._entries[k] |
| 910 | + except KeyError: |
| 911 | + raise KeyError(key) from None |
| 912 | + # Touch LRU: a real read promotes the entry to "most recent" |
| 913 | + # so eviction prefers genuinely cold entries. |
| 914 | + self._entries.move_to_end(k) |
| 915 | + return value |
| 916 | + |
| 917 | + def __setitem__(self, key: object, value: object) -> None: |
| 918 | + obj = _require_object_code(value) |
| 919 | + # _require_object_code already rejected path-backed ObjectCode |
| 920 | + # (where ``code`` is a str), so ``len(obj.code)`` is well-defined. |
| 921 | + size = len(obj.code) |
| 922 | + k = _as_key_bytes(key) |
| 923 | + with self._lock: |
| 924 | + existing = self._entries.pop(k, None) |
| 925 | + if existing is not None: |
| 926 | + self._total_bytes -= existing[1] |
| 927 | + self._entries[k] = (obj, size) |
| 928 | + self._total_bytes += size |
| 929 | + self._evict_to_caps() |
| 930 | + |
| 931 | + def __contains__(self, key: object) -> bool: |
| 932 | + # Validate the key (mirror SQLite/FileStream behaviour: a non-str, |
| 933 | + # non-bytes key is a programming error and should surface, not |
| 934 | + # quietly report "not present"). |
| 935 | + k = _as_key_bytes(key) |
| 936 | + with self._lock: |
| 937 | + return k in self._entries |
| 938 | + |
| 939 | + def __delitem__(self, key: object) -> None: |
| 940 | + k = _as_key_bytes(key) |
| 941 | + with self._lock: |
| 942 | + try: |
| 943 | + _value, size = self._entries.pop(k) |
| 944 | + except KeyError: |
| 945 | + raise KeyError(key) from None |
| 946 | + self._total_bytes -= size |
| 947 | + |
| 948 | + def __len__(self) -> int: |
| 949 | + with self._lock: |
| 950 | + return len(self._entries) |
| 951 | + |
| 952 | + def clear(self) -> None: |
| 953 | + with self._lock: |
| 954 | + self._entries.clear() |
| 955 | + self._total_bytes = 0 |
| 956 | + |
| 957 | + # -- eviction ------------------------------------------------------------ |
| 958 | + |
| 959 | + def _evict_to_caps(self) -> None: |
| 960 | + """Evict oldest entries until both caps are satisfied. |
| 961 | +
|
| 962 | + Called from ``__setitem__`` after an insert/update. Pops from the |
| 963 | + front of the OrderedDict (oldest first), matching the LRU |
| 964 | + semantics of :class:`SQLiteProgramCache` and |
| 965 | + :class:`FileStreamProgramCache`. If the just-inserted entry on its |
| 966 | + own exceeds ``max_size_bytes``, the loop will evict it too; that |
| 967 | + mirrors the persistent backends (a write that cannot fit does not |
| 968 | + survive its own size-cap pass). |
| 969 | + """ |
| 970 | + while self._entries: |
| 971 | + over_entries = self._max_entries is not None and len(self._entries) > self._max_entries |
| 972 | + over_bytes = self._max_size_bytes is not None and self._total_bytes > self._max_size_bytes |
| 973 | + if not over_entries and not over_bytes: |
| 974 | + return |
| 975 | + _k, (_value, size) = self._entries.popitem(last=False) |
| 976 | + self._total_bytes -= size |
| 977 | + |
| 978 | + |
836 | 979 | # --------------------------------------------------------------------------- |
837 | 980 | # SQLite backend |
838 | 981 | # --------------------------------------------------------------------------- |
|
0 commit comments