Skip to content

Commit d419d30

Browse files
Add per-core entry size limit (#294)
* Add entry size limit per cache core * Fix default params test and export parse_bytes * Remove mypy usage and add size util * Restore mypy usage * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * restore mentions of mypy * Add missing docker error message to the local testing script --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 3dc9c66 commit d419d30

17 files changed

Lines changed: 224 additions & 41 deletions

README.rst

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ The following parameters will only be applied to decorators defined after `set_d
138138
* `cache_dir`
139139
* `pickle_reload`
140140
* `separate_files`
141+
* `entry_size_limit`
141142

142143
These parameters can be changed at any time and they will apply to all decorators:
143144

@@ -269,6 +270,22 @@ You can specify a maximum allowed age for a cached value on a per-call basis usi
269270
- If the cached value is older than this threshold, a new calculation is triggered and the cache is updated.
270271
- If not, the cached value is returned as usual.
271272

273+
Entry Size Limit
274+
~~~~~~~~~~~~~~~~
275+
You can prevent very large return values from being cached by specifying
276+
``entry_size_limit`` on the decorator. Values larger than this limit are
277+
returned but not stored. The limit accepts an integer number of bytes or a
278+
human readable string like ``"200MB"``.
279+
280+
.. code-block:: python
281+
282+
@cachier(entry_size_limit="10KB")
283+
def load_data():
284+
...
285+
286+
When ``cachier__verbose=True`` is passed to a call that returns a value
287+
exceeding the limit, an informative message is printed.
288+
272289
Ignore Cache
273290
~~~~~~~~~~~~
274291

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ dynamic = [
4545
]
4646
dependencies = [
4747
"portalocker>=2.3.2",
48+
"pympler>=1",
4849
"watchdog>=2.3.1",
4950
]
5051
urls.Source = "https://github.com/python-cachier/cachier"

scripts/test-local.sh

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -168,16 +168,40 @@ validate_cores "$SELECTED_CORES"
168168
# Function to check if Docker is available
169169
check_docker() {
170170
if ! command -v docker &> /dev/null; then
171-
print_message $RED "Error: Docker is required but not installed."
171+
print_message $RED "═══════════════════════════════════════════════════════════════"
172+
print_message $RED "ERROR: Docker is not installed!"
173+
print_message $RED "═══════════════════════════════════════════════════════════════"
174+
echo ""
175+
echo "This script requires Docker to run external backend tests (MongoDB, Redis, PostgreSQL)."
172176
echo "Please install Docker from: https://www.docker.com/products/docker-desktop"
177+
echo ""
173178
exit 1
174179
fi
175180

176-
if ! docker ps &> /dev/null; then
177-
print_message $RED "Error: Docker daemon is not running."
178-
echo "Please start Docker and try again."
181+
# Try to run docker ps and capture the actual error
182+
if ! docker ps > /dev/null 2>&1; then
183+
print_message $RED "═══════════════════════════════════════════════════════════════"
184+
print_message $RED "ERROR: Docker daemon is not running!"
185+
print_message $RED "═══════════════════════════════════════════════════════════════"
186+
echo ""
187+
echo "Docker is installed but the Docker daemon is not running."
188+
echo ""
189+
echo "To fix this:"
190+
echo " • On macOS: Start Docker Desktop from Applications"
191+
echo " • On Linux: Run 'sudo systemctl start docker' or 'sudo service docker start'"
192+
echo " • On Windows: Start Docker Desktop from the Start Menu"
193+
echo ""
194+
echo "After starting Docker, wait a few seconds and try running this script again."
195+
echo ""
196+
197+
# Show the actual docker error for debugging
198+
echo "Technical details:"
199+
docker ps 2>&1 | sed 's/^/ /'
200+
echo ""
179201
exit 1
180202
fi
203+
204+
print_message $GREEN "✓ Docker is installed and running"
181205
}
182206

183207
# Function to check and install dependencies

src/cachier/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,15 @@
88
set_global_params,
99
)
1010
from .core import cachier
11+
from .util import parse_bytes
1112

1213
__all__ = [
1314
"cachier",
1415
"set_default_params",
1516
"get_default_params",
1617
"set_global_params",
1718
"get_global_params",
19+
"parse_bytes",
1820
"enable_caching",
1921
"disable_caching",
2022
"__version__",

src/cachier/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ class Params:
6565
allow_none: bool = False
6666
cleanup_stale: bool = False
6767
cleanup_interval: timedelta = timedelta(days=1)
68+
entry_size_limit: Optional[int] = None
6869

6970

7071
_global_params = Params()

src/cachier/core.py

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,14 @@
1919
from warnings import warn
2020

2121
from ._types import RedisClient
22-
from .config import (
23-
Backend,
24-
HashFunc,
25-
Mongetter,
26-
_update_with_defaults,
27-
)
22+
from .config import Backend, HashFunc, Mongetter, _update_with_defaults
2823
from .cores.base import RecalculationNeeded, _BaseCore
2924
from .cores.memory import _MemoryCore
3025
from .cores.mongo import _MongoCore
3126
from .cores.pickle import _PickleCore
3227
from .cores.redis import _RedisCore
3328
from .cores.sql import _SQLCore
29+
from .util import parse_bytes
3430

3531
MAX_WORKERS_ENVAR_NAME = "CACHIER_MAX_WORKERS"
3632
DEFAULT_MAX_WORKERS = 8
@@ -60,11 +56,15 @@ def _function_thread(core, key, func, args, kwds):
6056
print(f"Function call failed with the following exception:\n{exc}")
6157

6258

63-
def _calc_entry(core, key, func, args, kwds) -> Optional[Any]:
59+
def _calc_entry(
60+
core, key, func, args, kwds, printer=lambda *_: None
61+
) -> Optional[Any]:
6462
core.mark_entry_being_calculated(key)
6563
try:
6664
func_res = func(*args, **kwds)
67-
core.set_entry(key, func_res)
65+
stored = core.set_entry(key, func_res)
66+
if not stored:
67+
printer("Result exceeds entry_size_limit; not cached")
6868
return func_res
6969
finally:
7070
core.mark_entry_not_calculated(key)
@@ -123,6 +123,7 @@ def cachier(
123123
allow_none: Optional[bool] = None,
124124
cleanup_stale: Optional[bool] = None,
125125
cleanup_interval: Optional[timedelta] = None,
126+
entry_size_limit: Optional[Union[int, str]] = None,
126127
):
127128
"""Wrap as a persistent, stale-free memoization decorator.
128129
@@ -191,6 +192,10 @@ def cachier(
191192
thread. Defaults to False.
192193
cleanup_interval: datetime.timedelta, optional
193194
Minimum time between automatic cleanup runs. Defaults to one day.
195+
entry_size_limit: int or str, optional
196+
Maximum serialized size of a cached value. Values exceeding the limit
197+
are returned but not cached. Human readable strings like ``"10MB"`` are
198+
allowed.
194199
195200
"""
196201
# Check for deprecated parameters
@@ -204,6 +209,9 @@ def cachier(
204209
# Update parameters with defaults if input is None
205210
backend = _update_with_defaults(backend, "backend")
206211
mongetter = _update_with_defaults(mongetter, "mongetter")
212+
size_limit_bytes = parse_bytes(
213+
_update_with_defaults(entry_size_limit, "entry_size_limit")
214+
)
207215
# Override the backend parameter if a mongetter is provided.
208216
if callable(mongetter):
209217
backend = "mongo"
@@ -215,28 +223,34 @@ def cachier(
215223
cache_dir=cache_dir,
216224
separate_files=separate_files,
217225
wait_for_calc_timeout=wait_for_calc_timeout,
226+
entry_size_limit=size_limit_bytes,
218227
)
219228
elif backend == "mongo":
220229
core = _MongoCore(
221230
hash_func=hash_func,
222231
mongetter=mongetter,
223232
wait_for_calc_timeout=wait_for_calc_timeout,
233+
entry_size_limit=size_limit_bytes,
224234
)
225235
elif backend == "memory":
226236
core = _MemoryCore(
227-
hash_func=hash_func, wait_for_calc_timeout=wait_for_calc_timeout
237+
hash_func=hash_func,
238+
wait_for_calc_timeout=wait_for_calc_timeout,
239+
entry_size_limit=size_limit_bytes,
228240
)
229241
elif backend == "sql":
230242
core = _SQLCore(
231243
hash_func=hash_func,
232244
sql_engine=sql_engine,
233245
wait_for_calc_timeout=wait_for_calc_timeout,
246+
entry_size_limit=size_limit_bytes,
234247
)
235248
elif backend == "redis":
236249
core = _RedisCore(
237250
hash_func=hash_func,
238251
redis_client=redis_client,
239252
wait_for_calc_timeout=wait_for_calc_timeout,
253+
entry_size_limit=size_limit_bytes,
240254
)
241255
else:
242256
raise ValueError("specified an invalid core: %s" % backend)
@@ -324,12 +338,12 @@ def _call(*args, max_age: Optional[timedelta] = None, **kwds):
324338
)
325339
key, entry = core.get_entry((), kwargs)
326340
if overwrite_cache:
327-
return _calc_entry(core, key, func, args, kwds)
341+
return _calc_entry(core, key, func, args, kwds, _print)
328342
if entry is None or (
329343
not entry._completed and not entry._processing
330344
):
331345
_print("No entry found. No current calc. Calling like a boss.")
332-
return _calc_entry(core, key, func, args, kwds)
346+
return _calc_entry(core, key, func, args, kwds, _print)
333347
_print("Entry found.")
334348
if _allow_none or entry.value is not None:
335349
_print("Cached result found.")
@@ -362,7 +376,7 @@ def _call(*args, max_age: Optional[timedelta] = None, **kwds):
362376
try:
363377
return core.wait_on_entry_calc(key)
364378
except RecalculationNeeded:
365-
return _calc_entry(core, key, func, args, kwds)
379+
return _calc_entry(core, key, func, args, kwds, _print)
366380
if _next_time:
367381
_print("Async calc and return stale")
368382
core.mark_entry_being_calculated(key)
@@ -374,15 +388,15 @@ def _call(*args, max_age: Optional[timedelta] = None, **kwds):
374388
core.mark_entry_not_calculated(key)
375389
return entry.value
376390
_print("Calling decorated function and waiting")
377-
return _calc_entry(core, key, func, args, kwds)
391+
return _calc_entry(core, key, func, args, kwds, _print)
378392
if entry._processing:
379393
_print("No value but being calculated. Waiting.")
380394
try:
381395
return core.wait_on_entry_calc(key)
382396
except RecalculationNeeded:
383-
return _calc_entry(core, key, func, args, kwds)
397+
return _calc_entry(core, key, func, args, kwds, _print)
384398
_print("No entry found. No current calc. Calling like a boss.")
385-
return _calc_entry(core, key, func, args, kwds)
399+
return _calc_entry(core, key, func, args, kwds, _print)
386400

387401
# MAINTAINER NOTE: The main function wrapper is now a standard function
388402
# that passes *args and **kwargs to _call. This ensures that user

src/cachier/cores/base.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,12 @@
99

1010
import abc # for the _BaseCore abstract base class
1111
import inspect
12+
import sys
1213
import threading
1314
from datetime import timedelta
14-
from typing import Callable, Optional, Tuple
15+
from typing import Any, Callable, Optional, Tuple
16+
17+
from pympler import asizeof # type: ignore
1518

1619
from .._types import HashFunc
1720
from ..config import CacheEntry, _update_with_defaults
@@ -34,10 +37,12 @@ def __init__(
3437
self,
3538
hash_func: Optional[HashFunc],
3639
wait_for_calc_timeout: Optional[int],
40+
entry_size_limit: Optional[int] = None,
3741
):
3842
self.hash_func = _update_with_defaults(hash_func, "hash_func")
3943
self.wait_for_calc_timeout = wait_for_calc_timeout
4044
self.lock = threading.RLock()
45+
self.entry_size_limit = entry_size_limit
4146

4247
def set_func(self, func):
4348
"""Set the function this core will use.
@@ -90,8 +95,22 @@ def get_entry_by_key(self, key: str) -> Tuple[str, Optional[CacheEntry]]:
9095
9196
"""
9297

98+
def _estimate_size(self, value: Any) -> int:
99+
try:
100+
return asizeof.asizeof(value)
101+
except Exception:
102+
return sys.getsizeof(value)
103+
104+
def _should_store(self, value: Any) -> bool:
105+
if self.entry_size_limit is None:
106+
return True
107+
try:
108+
return self._estimate_size(value) <= self.entry_size_limit
109+
except Exception:
110+
return True
111+
93112
@abc.abstractmethod
94-
def set_entry(self, key: str, func_res):
113+
def set_entry(self, key: str, func_res: Any) -> bool:
95114
"""Map the given result to the given key in this core's cache."""
96115

97116
@abc.abstractmethod

src/cachier/cores/memory.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,9 @@ def __init__(
1616
self,
1717
hash_func: Optional[HashFunc],
1818
wait_for_calc_timeout: Optional[int],
19+
entry_size_limit: Optional[int] = None,
1920
):
20-
super().__init__(hash_func, wait_for_calc_timeout)
21+
super().__init__(hash_func, wait_for_calc_timeout, entry_size_limit)
2122
self.cache: Dict[str, CacheEntry] = {}
2223

2324
def _hash_func_key(self, key: str) -> str:
@@ -29,7 +30,9 @@ def get_entry_by_key(
2930
with self.lock:
3031
return key, self.cache.get(self._hash_func_key(key), None)
3132

32-
def set_entry(self, key: str, func_res: Any) -> None:
33+
def set_entry(self, key: str, func_res: Any) -> bool:
34+
if not self._should_store(func_res):
35+
return False
3336
hash_key = self._hash_func_key(key)
3437
with self.lock:
3538
try:
@@ -47,6 +50,7 @@ def set_entry(self, key: str, func_res: Any) -> None:
4750
_condition=cond,
4851
_completed=True,
4952
)
53+
return True
5054

5155
def mark_entry_being_calculated(self, key: str) -> None:
5256
with self.lock:

src/cachier/cores/mongo.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ def __init__(
4040
hash_func: Optional[HashFunc],
4141
mongetter: Optional[Mongetter],
4242
wait_for_calc_timeout: Optional[int],
43+
entry_size_limit: Optional[int] = None,
4344
):
4445
if "pymongo" not in sys.modules:
4546
warnings.warn(
@@ -49,7 +50,9 @@ def __init__(
4950
) # pragma: no cover
5051

5152
super().__init__(
52-
hash_func=hash_func, wait_for_calc_timeout=wait_for_calc_timeout
53+
hash_func=hash_func,
54+
wait_for_calc_timeout=wait_for_calc_timeout,
55+
entry_size_limit=entry_size_limit,
5356
)
5457
if mongetter is None:
5558
raise MissingMongetter(
@@ -87,7 +90,9 @@ def get_entry_by_key(self, key: str) -> Tuple[str, Optional[CacheEntry]]:
8790
)
8891
return key, entry
8992

90-
def set_entry(self, key: str, func_res: Any) -> None:
93+
def set_entry(self, key: str, func_res: Any) -> bool:
94+
if not self._should_store(func_res):
95+
return False
9196
thebytes = pickle.dumps(func_res)
9297
self.mongo_collection.update_one(
9398
filter={"func": self._func_str, "key": key},
@@ -104,6 +109,7 @@ def set_entry(self, key: str, func_res: Any) -> None:
104109
},
105110
upsert=True,
106111
)
112+
return True
107113

108114
def mark_entry_being_calculated(self, key: str) -> None:
109115
self.mongo_collection.update_one(

0 commit comments

Comments
 (0)