Skip to content

Commit c1bf83b

Browse files
committed
Add entry size limit per cache core
1 parent 4f5365d commit c1bf83b

10 files changed

Lines changed: 170 additions & 29 deletions

File tree

README.rst

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ The following parameters will only be applied to decorators defined after `set_d
138138
* `cache_dir`
139139
* `pickle_reload`
140140
* `separate_files`
141+
* `entry_size_limit`
141142

142143
These parameters can be changed at any time and they will apply to all decorators:
143144

@@ -269,6 +270,22 @@ You can specify a maximum allowed age for a cached value on a per-call basis usi
269270
- If the cached value is older than this threshold, a new calculation is triggered and the cache is updated.
270271
- If not, the cached value is returned as usual.
271272

273+
Entry Size Limit
274+
~~~~~~~~~~~~~~~~
275+
You can prevent very large return values from being cached by specifying
276+
``entry_size_limit`` on the decorator. Values larger than this limit are
277+
returned but not stored. The limit accepts an integer number of bytes or a
278+
human readable string like ``"200MB"``.
279+
280+
.. code-block:: python
281+
282+
@cachier(entry_size_limit="10KB")
283+
def load_data():
284+
...
285+
286+
When ``cachier__verbose=True`` is passed to a call that returns a value
287+
exceeding the limit, an informative message is printed.
288+
272289
Ignore Cache
273290
~~~~~~~~~~~~
274291

src/cachier/config.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import hashlib
22
import os
33
import pickle
4+
import re
45
import threading
56
from dataclasses import dataclass, field, replace
67
from datetime import datetime, timedelta
@@ -32,6 +33,27 @@ def _default_cache_dir():
3233
return os.path.expanduser("~/.cachier/")
3334

3435

36+
def parse_bytes(size: Union[int, str, None]) -> Optional[int]:
37+
"""Convert a human friendly size string to bytes."""
38+
if size is None:
39+
return None
40+
if isinstance(size, int):
41+
return size
42+
match = re.fullmatch(r"(?i)\s*(\d+(?:\.\d+)?)\s*([kmgt]?b)?\s*", str(size))
43+
if not match:
44+
raise ValueError(f"Invalid size value: {size}")
45+
number = float(match.group(1))
46+
unit = (match.group(2) or "b").upper()
47+
factor = {
48+
"B": 1,
49+
"KB": 1024,
50+
"MB": 1024**2,
51+
"GB": 1024**3,
52+
"TB": 1024**4,
53+
}[unit]
54+
return int(number * factor)
55+
56+
3557
class LazyCacheDir:
3658
"""Lazily resolve the default cache directory using $XDG_CACHE_HOME."""
3759

@@ -65,6 +87,7 @@ class Params:
6587
allow_none: bool = False
6688
cleanup_stale: bool = False
6789
cleanup_interval: timedelta = timedelta(days=1)
90+
entry_size_limit: Optional[int] = None
6891

6992

7093
_global_params = Params()

src/cachier/core.py

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
HashFunc,
2525
Mongetter,
2626
_update_with_defaults,
27+
parse_bytes,
2728
)
2829
from .cores.base import RecalculationNeeded, _BaseCore
2930
from .cores.memory import _MemoryCore
@@ -60,11 +61,15 @@ def _function_thread(core, key, func, args, kwds):
6061
print(f"Function call failed with the following exception:\n{exc}")
6162

6263

63-
def _calc_entry(core, key, func, args, kwds) -> Optional[Any]:
64+
def _calc_entry(
65+
core, key, func, args, kwds, printer=lambda *_: None
66+
) -> Optional[Any]:
6467
core.mark_entry_being_calculated(key)
6568
try:
6669
func_res = func(*args, **kwds)
67-
core.set_entry(key, func_res)
70+
stored = core.set_entry(key, func_res)
71+
if not stored:
72+
printer("Result exceeds entry_size_limit; not cached")
6873
return func_res
6974
finally:
7075
core.mark_entry_not_calculated(key)
@@ -123,6 +128,7 @@ def cachier(
123128
allow_none: Optional[bool] = None,
124129
cleanup_stale: Optional[bool] = None,
125130
cleanup_interval: Optional[timedelta] = None,
131+
entry_size_limit: Optional[Union[int, str]] = None,
126132
):
127133
"""Wrap as a persistent, stale-free memoization decorator.
128134
@@ -191,6 +197,10 @@ def cachier(
191197
thread. Defaults to False.
192198
cleanup_interval: datetime.timedelta, optional
193199
Minimum time between automatic cleanup runs. Defaults to one day.
200+
entry_size_limit: int or str, optional
201+
Maximum serialized size of a cached value. Values exceeding the limit
202+
are returned but not cached. Human readable strings like ``"10MB"`` are
203+
allowed.
194204
195205
"""
196206
# Check for deprecated parameters
@@ -204,6 +214,9 @@ def cachier(
204214
# Update parameters with defaults if input is None
205215
backend = _update_with_defaults(backend, "backend")
206216
mongetter = _update_with_defaults(mongetter, "mongetter")
217+
size_limit_bytes = parse_bytes(
218+
_update_with_defaults(entry_size_limit, "entry_size_limit")
219+
)
207220
# Override the backend parameter if a mongetter is provided.
208221
if callable(mongetter):
209222
backend = "mongo"
@@ -215,28 +228,34 @@ def cachier(
215228
cache_dir=cache_dir,
216229
separate_files=separate_files,
217230
wait_for_calc_timeout=wait_for_calc_timeout,
231+
entry_size_limit=size_limit_bytes,
218232
)
219233
elif backend == "mongo":
220234
core = _MongoCore(
221235
hash_func=hash_func,
222236
mongetter=mongetter,
223237
wait_for_calc_timeout=wait_for_calc_timeout,
238+
entry_size_limit=size_limit_bytes,
224239
)
225240
elif backend == "memory":
226241
core = _MemoryCore(
227-
hash_func=hash_func, wait_for_calc_timeout=wait_for_calc_timeout
242+
hash_func=hash_func,
243+
wait_for_calc_timeout=wait_for_calc_timeout,
244+
entry_size_limit=size_limit_bytes,
228245
)
229246
elif backend == "sql":
230247
core = _SQLCore(
231248
hash_func=hash_func,
232249
sql_engine=sql_engine,
233250
wait_for_calc_timeout=wait_for_calc_timeout,
251+
entry_size_limit=size_limit_bytes,
234252
)
235253
elif backend == "redis":
236254
core = _RedisCore(
237255
hash_func=hash_func,
238256
redis_client=redis_client,
239257
wait_for_calc_timeout=wait_for_calc_timeout,
258+
entry_size_limit=size_limit_bytes,
240259
)
241260
else:
242261
raise ValueError("specified an invalid core: %s" % backend)
@@ -324,12 +343,12 @@ def _call(*args, max_age: Optional[timedelta] = None, **kwds):
324343
)
325344
key, entry = core.get_entry((), kwargs)
326345
if overwrite_cache:
327-
return _calc_entry(core, key, func, args, kwds)
346+
return _calc_entry(core, key, func, args, kwds, _print)
328347
if entry is None or (
329348
not entry._completed and not entry._processing
330349
):
331350
_print("No entry found. No current calc. Calling like a boss.")
332-
return _calc_entry(core, key, func, args, kwds)
351+
return _calc_entry(core, key, func, args, kwds, _print)
333352
_print("Entry found.")
334353
if _allow_none or entry.value is not None:
335354
_print("Cached result found.")
@@ -362,7 +381,7 @@ def _call(*args, max_age: Optional[timedelta] = None, **kwds):
362381
try:
363382
return core.wait_on_entry_calc(key)
364383
except RecalculationNeeded:
365-
return _calc_entry(core, key, func, args, kwds)
384+
return _calc_entry(core, key, func, args, kwds, _print)
366385
if _next_time:
367386
_print("Async calc and return stale")
368387
core.mark_entry_being_calculated(key)
@@ -374,15 +393,15 @@ def _call(*args, max_age: Optional[timedelta] = None, **kwds):
374393
core.mark_entry_not_calculated(key)
375394
return entry.value
376395
_print("Calling decorated function and waiting")
377-
return _calc_entry(core, key, func, args, kwds)
396+
return _calc_entry(core, key, func, args, kwds, _print)
378397
if entry._processing:
379398
_print("No value but being calculated. Waiting.")
380399
try:
381400
return core.wait_on_entry_calc(key)
382401
except RecalculationNeeded:
383-
return _calc_entry(core, key, func, args, kwds)
402+
return _calc_entry(core, key, func, args, kwds, _print)
384403
_print("No entry found. No current calc. Calling like a boss.")
385-
return _calc_entry(core, key, func, args, kwds)
404+
return _calc_entry(core, key, func, args, kwds, _print)
386405

387406
# MAINTAINER NOTE: The main function wrapper is now a standard function
388407
# that passes *args and **kwargs to _call. This ensures that user

src/cachier/cores/base.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,11 @@
99

1010
import abc # for the _BaseCore abstract base class
1111
import inspect
12+
import pickle
13+
import sys
1214
import threading
1315
from datetime import timedelta
14-
from typing import Callable, Optional, Tuple
16+
from typing import Any, Callable, Optional, Tuple
1517

1618
from .._types import HashFunc
1719
from ..config import CacheEntry, _update_with_defaults
@@ -34,10 +36,12 @@ def __init__(
3436
self,
3537
hash_func: Optional[HashFunc],
3638
wait_for_calc_timeout: Optional[int],
39+
entry_size_limit: Optional[int] = None,
3740
):
3841
self.hash_func = _update_with_defaults(hash_func, "hash_func")
3942
self.wait_for_calc_timeout = wait_for_calc_timeout
4043
self.lock = threading.RLock()
44+
self.entry_size_limit = entry_size_limit
4145

4246
def set_func(self, func):
4347
"""Set the function this core will use.
@@ -90,8 +94,22 @@ def get_entry_by_key(self, key: str) -> Tuple[str, Optional[CacheEntry]]:
9094
9195
"""
9296

97+
def _estimate_size(self, value: Any) -> int:
98+
try:
99+
return len(pickle.dumps(value))
100+
except Exception:
101+
return sys.getsizeof(value)
102+
103+
def _should_store(self, value: Any) -> bool:
104+
if self.entry_size_limit is None:
105+
return True
106+
try:
107+
return self._estimate_size(value) <= self.entry_size_limit
108+
except Exception:
109+
return True
110+
93111
@abc.abstractmethod
94-
def set_entry(self, key: str, func_res):
112+
def set_entry(self, key: str, func_res: Any) -> bool:
95113
"""Map the given result to the given key in this core's cache."""
96114

97115
@abc.abstractmethod

src/cachier/cores/memory.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,9 @@ def __init__(
1616
self,
1717
hash_func: Optional[HashFunc],
1818
wait_for_calc_timeout: Optional[int],
19+
entry_size_limit: Optional[int] = None,
1920
):
20-
super().__init__(hash_func, wait_for_calc_timeout)
21+
super().__init__(hash_func, wait_for_calc_timeout, entry_size_limit)
2122
self.cache: Dict[str, CacheEntry] = {}
2223

2324
def _hash_func_key(self, key: str) -> str:
@@ -29,7 +30,9 @@ def get_entry_by_key(
2930
with self.lock:
3031
return key, self.cache.get(self._hash_func_key(key), None)
3132

32-
def set_entry(self, key: str, func_res: Any) -> None:
33+
def set_entry(self, key: str, func_res: Any) -> bool:
34+
if not self._should_store(func_res):
35+
return False
3336
hash_key = self._hash_func_key(key)
3437
with self.lock:
3538
try:
@@ -47,6 +50,7 @@ def set_entry(self, key: str, func_res: Any) -> None:
4750
_condition=cond,
4851
_completed=True,
4952
)
53+
return True
5054

5155
def mark_entry_being_calculated(self, key: str) -> None:
5256
with self.lock:

src/cachier/cores/mongo.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ def __init__(
4040
hash_func: Optional[HashFunc],
4141
mongetter: Optional[Mongetter],
4242
wait_for_calc_timeout: Optional[int],
43+
entry_size_limit: Optional[int] = None,
4344
):
4445
if "pymongo" not in sys.modules:
4546
warnings.warn(
@@ -49,7 +50,9 @@ def __init__(
4950
) # pragma: no cover
5051

5152
super().__init__(
52-
hash_func=hash_func, wait_for_calc_timeout=wait_for_calc_timeout
53+
hash_func=hash_func,
54+
wait_for_calc_timeout=wait_for_calc_timeout,
55+
entry_size_limit=entry_size_limit,
5356
)
5457
if mongetter is None:
5558
raise MissingMongetter(
@@ -87,7 +90,9 @@ def get_entry_by_key(self, key: str) -> Tuple[str, Optional[CacheEntry]]:
8790
)
8891
return key, entry
8992

90-
def set_entry(self, key: str, func_res: Any) -> None:
93+
def set_entry(self, key: str, func_res: Any) -> bool:
94+
if not self._should_store(func_res):
95+
return False
9196
thebytes = pickle.dumps(func_res)
9297
self.mongo_collection.update_one(
9398
filter={"func": self._func_str, "key": key},
@@ -104,6 +109,7 @@ def set_entry(self, key: str, func_res: Any) -> None:
104109
},
105110
upsert=True,
106111
)
112+
return True
107113

108114
def mark_entry_being_calculated(self, key: str) -> None:
109115
self.mongo_collection.update_one(

src/cachier/cores/pickle.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,9 @@ def __init__(
7878
cache_dir: Optional[Union[str, os.PathLike]],
7979
separate_files: Optional[bool],
8080
wait_for_calc_timeout: Optional[int],
81+
entry_size_limit: Optional[int] = None,
8182
):
82-
super().__init__(hash_func, wait_for_calc_timeout)
83+
super().__init__(hash_func, wait_for_calc_timeout, entry_size_limit)
8384
self._cache_dict: Dict[str, CacheEntry] = {}
8485
self.reload = _update_with_defaults(pickle_reload, "pickle_reload")
8586
self.cache_dir = os.path.expanduser(
@@ -119,7 +120,7 @@ def _convert_legacy_cache_entry(
119120
def _load_cache_dict(self) -> Dict[str, CacheEntry]:
120121
try:
121122
with portalocker.Lock(self.cache_fpath, mode="rb") as cf:
122-
cache = pickle.load(cf)
123+
cache = pickle.load(cf) # type: ignore[arg-type]
123124
self._cache_used_fpath = str(self.cache_fpath)
124125
except (FileNotFoundError, EOFError):
125126
cache = {}
@@ -146,7 +147,7 @@ def _load_cache_by_key(
146147
fpath += f"_{hash_str or key}"
147148
try:
148149
with portalocker.Lock(fpath, mode="rb") as cache_file:
149-
entry = pickle.load(cache_file)
150+
entry = pickle.load(cache_file) # type: ignore[arg-type]
150151
return _PickleCore._convert_legacy_cache_entry(entry)
151152
except (FileNotFoundError, EOFError):
152153
return None
@@ -185,7 +186,7 @@ def _save_cache(
185186
fpath += f"_{hash_str}"
186187
with self.lock:
187188
with portalocker.Lock(fpath, mode="wb") as cf:
188-
pickle.dump(cache, cf, protocol=4)
189+
pickle.dump(cache, cf, protocol=4) # type: ignore[arg-type]
189190
# the same as check for separate_file, but changed for typing
190191
if isinstance(cache, dict):
191192
self._cache_dict = cache
@@ -198,7 +199,9 @@ def get_entry_by_key(
198199
return key, self._load_cache_by_key(key)
199200
return key, self.get_cache_dict(reload).get(key)
200201

201-
def set_entry(self, key: str, func_res: Any) -> None:
202+
def set_entry(self, key: str, func_res: Any) -> bool:
203+
if not self._should_store(func_res):
204+
return False
202205
key_data = CacheEntry(
203206
value=func_res,
204207
time=datetime.now(),
@@ -208,12 +211,13 @@ def set_entry(self, key: str, func_res: Any) -> None:
208211
)
209212
if self.separate_files:
210213
self._save_cache(key_data, key)
211-
return # pragma: no cover
214+
return True # pragma: no cover
212215

213216
with self.lock:
214217
cache = self.get_cache_dict()
215218
cache[key] = key_data
216219
self._save_cache(cache)
220+
return True
217221

218222
def mark_entry_being_calculated_separate_files(self, key: str) -> None:
219223
self._save_cache(

0 commit comments

Comments
 (0)