Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ The following parameters will only be applied to decorators defined after `set_d
* `cache_dir`
* `pickle_reload`
* `separate_files`
* `entry_size_limit`

These parameters can be changed at any time and they will apply to all decorators:

Expand Down Expand Up @@ -269,6 +270,22 @@ You can specify a maximum allowed age for a cached value on a per-call basis usi
- If the cached value is older than this threshold, a new calculation is triggered and the cache is updated.
- If not, the cached value is returned as usual.

Entry Size Limit
~~~~~~~~~~~~~~~~
You can prevent very large return values from being cached by specifying
``entry_size_limit`` on the decorator. Values larger than this limit are
returned but not stored. The limit accepts an integer number of bytes or a
human readable string like ``"200MB"``.

.. code-block:: python

@cachier(entry_size_limit="10KB")
def load_data():
...

When ``cachier__verbose=True`` is passed to a call that returns a value
exceeding the limit, an informative message is printed.

Ignore Cache
~~~~~~~~~~~~

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ dynamic = [
]
dependencies = [
"portalocker>=2.3.2",
"pympler>=1",
"watchdog>=2.3.1",
]
urls.Source = "https://github.com/python-cachier/cachier"
Expand Down
32 changes: 28 additions & 4 deletions scripts/test-local.sh
Original file line number Diff line number Diff line change
Expand Up @@ -168,16 +168,40 @@ validate_cores "$SELECTED_CORES"
# Function to check if Docker is available
check_docker() {
if ! command -v docker &> /dev/null; then
print_message $RED "Error: Docker is required but not installed."
print_message $RED "═══════════════════════════════════════════════════════════════"
print_message $RED "ERROR: Docker is not installed!"
print_message $RED "═══════════════════════════════════════════════════════════════"
echo ""
echo "This script requires Docker to run external backend tests (MongoDB, Redis, PostgreSQL)."
echo "Please install Docker from: https://www.docker.com/products/docker-desktop"
echo ""
exit 1
fi

if ! docker ps &> /dev/null; then
print_message $RED "Error: Docker daemon is not running."
echo "Please start Docker and try again."
# Try to run docker ps and capture the actual error
if ! docker ps > /dev/null 2>&1; then
print_message $RED "═══════════════════════════════════════════════════════════════"
print_message $RED "ERROR: Docker daemon is not running!"
print_message $RED "═══════════════════════════════════════════════════════════════"
echo ""
echo "Docker is installed but the Docker daemon is not running."
echo ""
echo "To fix this:"
echo " • On macOS: Start Docker Desktop from Applications"
echo " • On Linux: Run 'sudo systemctl start docker' or 'sudo service docker start'"
echo " • On Windows: Start Docker Desktop from the Start Menu"
echo ""
echo "After starting Docker, wait a few seconds and try running this script again."
echo ""

# Show the actual docker error for debugging
echo "Technical details:"
docker ps 2>&1 | sed 's/^/ /'
echo ""
exit 1
fi

print_message $GREEN "✓ Docker is installed and running"
}

# Function to check and install dependencies
Expand Down
2 changes: 2 additions & 0 deletions src/cachier/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@
set_global_params,
)
from .core import cachier
from .util import parse_bytes

__all__ = [
"cachier",
"set_default_params",
"get_default_params",
"set_global_params",
"get_global_params",
"parse_bytes",
"enable_caching",
"disable_caching",
"__version__",
Expand Down
1 change: 1 addition & 0 deletions src/cachier/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class Params:
allow_none: bool = False
cleanup_stale: bool = False
cleanup_interval: timedelta = timedelta(days=1)
entry_size_limit: Optional[int] = None


_global_params = Params()
Expand Down
44 changes: 29 additions & 15 deletions src/cachier/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,14 @@
from warnings import warn

from ._types import RedisClient
from .config import (
Backend,
HashFunc,
Mongetter,
_update_with_defaults,
)
from .config import Backend, HashFunc, Mongetter, _update_with_defaults
from .cores.base import RecalculationNeeded, _BaseCore
from .cores.memory import _MemoryCore
from .cores.mongo import _MongoCore
from .cores.pickle import _PickleCore
from .cores.redis import _RedisCore
from .cores.sql import _SQLCore
from .util import parse_bytes

MAX_WORKERS_ENVAR_NAME = "CACHIER_MAX_WORKERS"
DEFAULT_MAX_WORKERS = 8
Expand Down Expand Up @@ -60,11 +56,15 @@ def _function_thread(core, key, func, args, kwds):
print(f"Function call failed with the following exception:\n{exc}")


def _calc_entry(core, key, func, args, kwds) -> Optional[Any]:
def _calc_entry(
core, key, func, args, kwds, printer=lambda *_: None
) -> Optional[Any]:
core.mark_entry_being_calculated(key)
try:
func_res = func(*args, **kwds)
core.set_entry(key, func_res)
stored = core.set_entry(key, func_res)
if not stored:
printer("Result exceeds entry_size_limit; not cached")
return func_res
finally:
core.mark_entry_not_calculated(key)
Expand Down Expand Up @@ -123,6 +123,7 @@ def cachier(
allow_none: Optional[bool] = None,
cleanup_stale: Optional[bool] = None,
cleanup_interval: Optional[timedelta] = None,
entry_size_limit: Optional[Union[int, str]] = None,
):
"""Wrap as a persistent, stale-free memoization decorator.

Expand Down Expand Up @@ -191,6 +192,10 @@ def cachier(
thread. Defaults to False.
cleanup_interval: datetime.timedelta, optional
Minimum time between automatic cleanup runs. Defaults to one day.
entry_size_limit: int or str, optional
Maximum serialized size of a cached value. Values exceeding the limit
are returned but not cached. Human readable strings like ``"10MB"`` are
allowed.

"""
# Check for deprecated parameters
Expand All @@ -204,6 +209,9 @@ def cachier(
# Update parameters with defaults if input is None
backend = _update_with_defaults(backend, "backend")
mongetter = _update_with_defaults(mongetter, "mongetter")
size_limit_bytes = parse_bytes(
_update_with_defaults(entry_size_limit, "entry_size_limit")
)
# Override the backend parameter if a mongetter is provided.
if callable(mongetter):
backend = "mongo"
Expand All @@ -215,28 +223,34 @@ def cachier(
cache_dir=cache_dir,
separate_files=separate_files,
wait_for_calc_timeout=wait_for_calc_timeout,
entry_size_limit=size_limit_bytes,
)
elif backend == "mongo":
core = _MongoCore(
hash_func=hash_func,
mongetter=mongetter,
wait_for_calc_timeout=wait_for_calc_timeout,
entry_size_limit=size_limit_bytes,
)
elif backend == "memory":
core = _MemoryCore(
hash_func=hash_func, wait_for_calc_timeout=wait_for_calc_timeout
hash_func=hash_func,
wait_for_calc_timeout=wait_for_calc_timeout,
entry_size_limit=size_limit_bytes,
)
elif backend == "sql":
core = _SQLCore(
hash_func=hash_func,
sql_engine=sql_engine,
wait_for_calc_timeout=wait_for_calc_timeout,
entry_size_limit=size_limit_bytes,
)
elif backend == "redis":
core = _RedisCore(
hash_func=hash_func,
redis_client=redis_client,
wait_for_calc_timeout=wait_for_calc_timeout,
entry_size_limit=size_limit_bytes,
)
else:
raise ValueError("specified an invalid core: %s" % backend)
Expand Down Expand Up @@ -324,12 +338,12 @@ def _call(*args, max_age: Optional[timedelta] = None, **kwds):
)
key, entry = core.get_entry((), kwargs)
if overwrite_cache:
return _calc_entry(core, key, func, args, kwds)
return _calc_entry(core, key, func, args, kwds, _print)
if entry is None or (
not entry._completed and not entry._processing
):
_print("No entry found. No current calc. Calling like a boss.")
return _calc_entry(core, key, func, args, kwds)
return _calc_entry(core, key, func, args, kwds, _print)
_print("Entry found.")
if _allow_none or entry.value is not None:
_print("Cached result found.")
Expand Down Expand Up @@ -362,7 +376,7 @@ def _call(*args, max_age: Optional[timedelta] = None, **kwds):
try:
return core.wait_on_entry_calc(key)
except RecalculationNeeded:
return _calc_entry(core, key, func, args, kwds)
return _calc_entry(core, key, func, args, kwds, _print)
if _next_time:
_print("Async calc and return stale")
core.mark_entry_being_calculated(key)
Expand All @@ -374,15 +388,15 @@ def _call(*args, max_age: Optional[timedelta] = None, **kwds):
core.mark_entry_not_calculated(key)
return entry.value
_print("Calling decorated function and waiting")
return _calc_entry(core, key, func, args, kwds)
return _calc_entry(core, key, func, args, kwds, _print)
if entry._processing:
_print("No value but being calculated. Waiting.")
try:
return core.wait_on_entry_calc(key)
except RecalculationNeeded:
return _calc_entry(core, key, func, args, kwds)
return _calc_entry(core, key, func, args, kwds, _print)
_print("No entry found. No current calc. Calling like a boss.")
return _calc_entry(core, key, func, args, kwds)
return _calc_entry(core, key, func, args, kwds, _print)

# MAINTAINER NOTE: The main function wrapper is now a standard function
# that passes *args and **kwargs to _call. This ensures that user
Expand Down
23 changes: 21 additions & 2 deletions src/cachier/cores/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,12 @@

import abc # for the _BaseCore abstract base class
import inspect
import sys
import threading
from datetime import timedelta
from typing import Callable, Optional, Tuple
from typing import Any, Callable, Optional, Tuple

from pympler import asizeof # type: ignore

from .._types import HashFunc
from ..config import CacheEntry, _update_with_defaults
Expand All @@ -34,10 +37,12 @@ def __init__(
self,
hash_func: Optional[HashFunc],
wait_for_calc_timeout: Optional[int],
entry_size_limit: Optional[int] = None,
):
self.hash_func = _update_with_defaults(hash_func, "hash_func")
self.wait_for_calc_timeout = wait_for_calc_timeout
self.lock = threading.RLock()
self.entry_size_limit = entry_size_limit

def set_func(self, func):
"""Set the function this core will use.
Expand Down Expand Up @@ -90,8 +95,22 @@ def get_entry_by_key(self, key: str) -> Tuple[str, Optional[CacheEntry]]:

"""

def _estimate_size(self, value: Any) -> int:
try:
return asizeof.asizeof(value)
except Exception:
return sys.getsizeof(value)

def _should_store(self, value: Any) -> bool:
if self.entry_size_limit is None:
return True
try:
return self._estimate_size(value) <= self.entry_size_limit
except Exception:
return True
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe add a warning that the size could not be estimated?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea!


@abc.abstractmethod
def set_entry(self, key: str, func_res):
def set_entry(self, key: str, func_res: Any) -> bool:
"""Map the given result to the given key in this core's cache."""

@abc.abstractmethod
Expand Down
8 changes: 6 additions & 2 deletions src/cachier/cores/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ def __init__(
self,
hash_func: Optional[HashFunc],
wait_for_calc_timeout: Optional[int],
entry_size_limit: Optional[int] = None,
):
super().__init__(hash_func, wait_for_calc_timeout)
super().__init__(hash_func, wait_for_calc_timeout, entry_size_limit)
self.cache: Dict[str, CacheEntry] = {}

def _hash_func_key(self, key: str) -> str:
Expand All @@ -29,7 +30,9 @@ def get_entry_by_key(
with self.lock:
return key, self.cache.get(self._hash_func_key(key), None)

def set_entry(self, key: str, func_res: Any) -> None:
def set_entry(self, key: str, func_res: Any) -> bool:
if not self._should_store(func_res):
return False
hash_key = self._hash_func_key(key)
with self.lock:
try:
Expand All @@ -47,6 +50,7 @@ def set_entry(self, key: str, func_res: Any) -> None:
_condition=cond,
_completed=True,
)
return True

def mark_entry_being_calculated(self, key: str) -> None:
with self.lock:
Expand Down
10 changes: 8 additions & 2 deletions src/cachier/cores/mongo.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def __init__(
hash_func: Optional[HashFunc],
mongetter: Optional[Mongetter],
wait_for_calc_timeout: Optional[int],
entry_size_limit: Optional[int] = None,
):
if "pymongo" not in sys.modules:
warnings.warn(
Expand All @@ -49,7 +50,9 @@ def __init__(
) # pragma: no cover

super().__init__(
hash_func=hash_func, wait_for_calc_timeout=wait_for_calc_timeout
hash_func=hash_func,
wait_for_calc_timeout=wait_for_calc_timeout,
entry_size_limit=entry_size_limit,
)
if mongetter is None:
raise MissingMongetter(
Expand Down Expand Up @@ -87,7 +90,9 @@ def get_entry_by_key(self, key: str) -> Tuple[str, Optional[CacheEntry]]:
)
return key, entry

def set_entry(self, key: str, func_res: Any) -> None:
def set_entry(self, key: str, func_res: Any) -> bool:
if not self._should_store(func_res):
return False
thebytes = pickle.dumps(func_res)
self.mongo_collection.update_one(
filter={"func": self._func_str, "key": key},
Expand All @@ -104,6 +109,7 @@ def set_entry(self, key: str, func_res: Any) -> None:
},
upsert=True,
)
return True

def mark_entry_being_calculated(self, key: str) -> None:
self.mongo_collection.update_one(
Expand Down
Loading
Loading