From 7ed181a9684ea32642a661f945a3b6f0f90ee5cd Mon Sep 17 00:00:00 2001 From: James Prior Date: Tue, 26 Aug 2025 12:25:25 +0100 Subject: [PATCH 1/4] Cache checked and mapped regex patterns --- jsonpath/function_extensions/match.py | 70 +++++++++++--- jsonpath/function_extensions/search.py | 74 ++++++++++---- jsonpath/lru_cache.py | 129 +++++++++++++++++++++++++ 3 files changed, 241 insertions(+), 32 deletions(-) create mode 100644 jsonpath/lru_cache.py diff --git a/jsonpath/function_extensions/match.py b/jsonpath/function_extensions/match.py index 68494b8..9d56588 100644 --- a/jsonpath/function_extensions/match.py +++ b/jsonpath/function_extensions/match.py @@ -1,5 +1,7 @@ """The standard `match` function extension.""" +from typing import Optional + try: import regex as re @@ -16,35 +18,73 @@ except ImportError: IREGEXP_AVAILABLE = False +from jsonpath.exceptions import JSONPathError from jsonpath.function_extensions import ExpressionType from jsonpath.function_extensions import FilterFunction +from jsonpath.lru_cache import LRUCache +from jsonpath.lru_cache import ThreadSafeLRUCache from ._pattern import map_re class Match(FilterFunction): - """A type-aware implementation of the standard `match` function.""" + """The standard `match` function. + + Arguments: + cache_capacity: The size of the regular expression cache. + debug: When `True`, raise an exception when regex pattern compilation + fails. The default - as required by RFC 9535 - is `False`, which + silently ignores bad patterns. + thread_safe: When `True`, use a `ThreadSafeLRUCache` instead of an + instance of `LRUCache`. + """ arg_types = [ExpressionType.VALUE, ExpressionType.VALUE] return_type = ExpressionType.LOGICAL - def __call__(self, string: str, pattern: str) -> bool: - """Return `True` if _string_ matches _pattern_, or `False` otherwise.""" - # TODO: re.match caches compiled patterns internally, but `map_re` and `check` - # are not cached. + def __init__( + self, + *, + cache_capacity: int = 300, + debug: bool = False, + thread_safe: bool = False, + ): + self._cache: LRUCache[str, Optional[re.Pattern[str]]] = ( + ThreadSafeLRUCache(capacity=cache_capacity) + if thread_safe + else LRUCache(capacity=cache_capacity) + ) - # TODO: validate literal patterns ar compile time? + self.debug = debug - if IREGEXP_AVAILABLE and (not isinstance(pattern, str) or not check(pattern)): + def __call__(self, value: object, pattern: object) -> bool: + """Return `True` if _value_ matches _pattern_, or `False` otherwise.""" + if not isinstance(value, str) or not isinstance(pattern, str): return False - if REGEX_AVAILABLE: - try: - pattern = map_re(pattern) - except TypeError: - return False - try: - return bool(re.fullmatch(pattern, string)) - except (TypeError, re.error): + _pattern = self._cache[pattern] + except KeyError: + if IREGEXP_AVAILABLE and not check(pattern): + if self.debug: + raise JSONPathError( + "search pattern is not a valid I-Regexp", token=None + ) from None + _pattern = None + else: + if REGEX_AVAILABLE: + pattern = map_re(pattern) + + try: + _pattern = re.compile(pattern) + except re.error: + if self.debug: + raise + _pattern = None + + self._cache[pattern] = _pattern + + if _pattern is None: return False + + return bool(_pattern.fullmatch(value)) diff --git a/jsonpath/function_extensions/search.py b/jsonpath/function_extensions/search.py index e6d9086..9ff9cd2 100644 --- a/jsonpath/function_extensions/search.py +++ b/jsonpath/function_extensions/search.py @@ -1,10 +1,12 @@ """The standard `search` function extension.""" +from typing import Optional + try: import regex as re REGEX_AVAILABLE = True -except ImportError: +except ImportError: # pragma: no cover import re # type: ignore REGEX_AVAILABLE = False @@ -13,38 +15,76 @@ from iregexp_check import check IREGEXP_AVAILABLE = True -except ImportError: +except ImportError: # pragma: no cover IREGEXP_AVAILABLE = False +from jsonpath.exceptions import JSONPathError from jsonpath.function_extensions import ExpressionType from jsonpath.function_extensions import FilterFunction +from jsonpath.lru_cache import LRUCache +from jsonpath.lru_cache import ThreadSafeLRUCache from ._pattern import map_re class Search(FilterFunction): - """A type-aware implementation of the standard `search` function.""" + """The standard `search` function. + + Arguments: + cache_capacity: The size of the regular expression cache. + debug: When `True`, raise an exception when regex pattern compilation + fails. The default - as required by RFC 9535 - is `False`, which + silently ignores bad patterns. + thread_safe: When `True`, use a `ThreadSafeLRUCache` instead of an + instance of `LRUCache`. + """ arg_types = [ExpressionType.VALUE, ExpressionType.VALUE] return_type = ExpressionType.LOGICAL - def __call__(self, string: str, pattern: str) -> bool: - """Return `True` if _string_ contains _pattern_, or `False` otherwise.""" - # TODO: re.search caches compiled patterns internally, but `map_re` and `check` - # are not cached. + def __init__( + self, + *, + cache_capacity: int = 300, + debug: bool = False, + thread_safe: bool = False, + ): + self._cache: LRUCache[str, Optional[re.Pattern[str]]] = ( + ThreadSafeLRUCache(capacity=cache_capacity) + if thread_safe + else LRUCache(capacity=cache_capacity) + ) - # TODO: validate literal patterns ar compile time? + self.debug = debug - if IREGEXP_AVAILABLE and (not isinstance(pattern, str) or not check(pattern)): + def __call__(self, value: str, pattern: object) -> bool: + """Return `True` if _value_ contains _pattern_, or `False` otherwise.""" + if not isinstance(value, str) or not isinstance(pattern, str): return False - if REGEX_AVAILABLE: - try: - pattern = map_re(pattern) - except TypeError: - return False - try: - return bool(re.search(pattern, string)) - except (TypeError, re.error): + _pattern = self._cache[pattern] + except KeyError: + if IREGEXP_AVAILABLE and not check(pattern): + if self.debug: + raise JSONPathError( + "search pattern is not a valid I-Regexp", token=None + ) from None + _pattern = None + else: + if REGEX_AVAILABLE: + pattern = map_re(pattern) + + try: + _pattern = re.compile(pattern) + except re.error: + if self.debug: + raise + _pattern = None + + self._cache[pattern] = _pattern + + if _pattern is None: return False + + return bool(_pattern.search(value)) diff --git a/jsonpath/lru_cache.py b/jsonpath/lru_cache.py new file mode 100644 index 0000000..94eabbf --- /dev/null +++ b/jsonpath/lru_cache.py @@ -0,0 +1,129 @@ +"""An LRU cache with a mapping interface implemented using an ordered dict.""" + +from collections import OrderedDict +from threading import Lock +from typing import Generic +from typing import Iterator +from typing import Optional +from typing import TypeVar +from typing import Union +from typing import overload + +_KT = TypeVar("_KT") +_VT = TypeVar("_VT") +_T = TypeVar("_T") + + +class LRUCache(Generic[_KT, _VT]): + """An LRU cache with a mapping interface.""" + + def __init__(self, capacity: int): + if capacity < 1: + raise ValueError("cache capacity must be greater than zero") + + self.capacity = capacity + self._cache: OrderedDict[_KT, _VT] = OrderedDict() + + def __getitem__(self, key: _KT) -> _VT: + value = self._cache[key] # This will raise a KeyError if key is not cached + self._cache.move_to_end(key) + return value + + def __setitem__(self, key: _KT, value: _VT) -> None: + try: + self._cache.move_to_end(key) + except KeyError: + if len(self._cache) >= self.capacity: + self._cache.popitem(last=False) + + self._cache[key] = value + + def __delitem__(self, key: _KT) -> None: + del self._cache[key] + + def __len__(self) -> int: + return len(self._cache) + + def __iter__(self) -> Iterator[_KT]: + return reversed(self._cache) + + def __contains__(self, key: _KT) -> bool: + return key in self._cache + + @overload + def get(self, key: _KT) -> Optional[_VT]: ... + @overload + def get(self, key: _KT, default: _VT) -> _VT: ... + @overload + def get(self, key: _KT, default: _T) -> Union[_VT, _T]: ... + def get(self, key: _KT, default: object = None) -> object: + """Return the cached value for _key_ if _key_ is in the cache, else default.""" + try: + return self[key] + except KeyError: + return default + + def keys(self) -> Iterator[_KT]: + """Return an iterator over this cache's keys.""" + return reversed(self._cache.keys()) + + def values(self) -> Iterator[_VT]: + """Return an iterator over this cache's values.""" + return reversed(self._cache.values()) + + def items(self) -> Iterator[tuple[_KT, _VT]]: + """Return an iterator over this cache's key/value pairs.""" + return reversed(self._cache.items()) + + +class ThreadSafeLRUCache(LRUCache[_KT, _VT]): + """A thread safe LRU cache.""" + + def __init__(self, capacity: int): + super().__init__(capacity) + self._lock = Lock() + + def __getitem__(self, key: _KT) -> _VT: + with self._lock: + return super().__getitem__(key) + + def __setitem__(self, key: _KT, value: _VT) -> None: + with self._lock: + return super().__setitem__(key, value) + + def __delitem__(self, key: _KT) -> None: + with self._lock: + return super().__delitem__(key) + + def __contains__(self, key: _KT) -> bool: + with self._lock: + return super().__contains__(key) + + @overload + def get(self, key: _KT) -> Optional[_VT]: ... + @overload + def get(self, key: _KT, default: _VT) -> _VT: ... + @overload + def get(self, key: _KT, default: _T) -> Union[_VT, _T]: ... + def get(self, key: _KT, default: object = None) -> object: + """Return the cached value for _key_ if _key_ is in the cache, else default.""" + # NOTE: self.__getitem__ is already acquiring the lock. + try: + return self[key] + except KeyError: + return default + + def keys(self) -> Iterator[_KT]: + """Return an iterator over this cache's keys.""" + with self._lock: + return super().keys() + + def values(self) -> Iterator[_VT]: + """Return an iterator over this cache's values.""" + with self._lock: + return super().values() + + def items(self) -> Iterator[tuple[_KT, _VT]]: + """Return an iterator over this cache's key/value pairs.""" + with self._lock: + return super().items() From 440990525190c50e087ce0b4d275ef1e8c3beb86 Mon Sep 17 00:00:00 2001 From: James Prior Date: Tue, 26 Aug 2025 12:35:59 +0100 Subject: [PATCH 2/4] Fix type hints for Python 3.8 --- jsonpath/lru_cache.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/jsonpath/lru_cache.py b/jsonpath/lru_cache.py index 94eabbf..86a43c0 100644 --- a/jsonpath/lru_cache.py +++ b/jsonpath/lru_cache.py @@ -5,6 +5,7 @@ from typing import Generic from typing import Iterator from typing import Optional +from typing import Tuple from typing import TypeVar from typing import Union from typing import overload @@ -71,7 +72,7 @@ def values(self) -> Iterator[_VT]: """Return an iterator over this cache's values.""" return reversed(self._cache.values()) - def items(self) -> Iterator[tuple[_KT, _VT]]: + def items(self) -> Iterator[Tuple[_KT, _VT]]: """Return an iterator over this cache's key/value pairs.""" return reversed(self._cache.items()) @@ -123,7 +124,7 @@ def values(self) -> Iterator[_VT]: with self._lock: return super().values() - def items(self) -> Iterator[tuple[_KT, _VT]]: + def items(self) -> Iterator[Tuple[_KT, _VT]]: """Return an iterator over this cache's key/value pairs.""" with self._lock: return super().items() From 1c06da571bd16da0b3b4eb3b14905a009f0b0863 Mon Sep 17 00:00:00 2001 From: James Prior Date: Tue, 26 Aug 2025 16:51:05 +0100 Subject: [PATCH 3/4] Refactor re caching in to abstrac base class --- CHANGELOG.md | 1 + jsonpath/function_extensions/_pattern.py | 81 +++++++++++++++++++++++ jsonpath/function_extensions/match.py | 79 ++-------------------- jsonpath/function_extensions/search.py | 83 ++---------------------- pyproject.toml | 16 ++++- tests/test_regex_cache.py | 57 ++++++++++++++++ 6 files changed, 162 insertions(+), 155 deletions(-) create mode 100644 tests/test_regex_cache.py diff --git a/CHANGELOG.md b/CHANGELOG.md index d1d3fc5..41be77c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ These breaking changes apply to Python JSONPath in its default configuration. We - Added the `startswith(value, prefix)` function extension. `startswith` returns `True` if both arguments are strings and the second argument is a prefix of the first argument. See the [filter functions](https://jg-rp.github.io/python-jsonpath/functions/#startswith) documentation. - The non-standard `keys()` function extension has been reimplemented. It used to be a simple Python function, `jsonpath.function_extensions.keys`. Now it is a "well-typed" class, `jsonpath.function_extensions.Keys`. See the [filter functions](https://jg-rp.github.io/python-jsonpath/functions/#keys) documentation. +- Added `cache_capacity`, `debug` and `thread_safe` arguments to `jsonpath.function_extensions.Match` and `jsonpath.function_extensions.Search` constructors. **JSONPath features** diff --git a/jsonpath/function_extensions/_pattern.py b/jsonpath/function_extensions/_pattern.py index a42a689..18f1de8 100644 --- a/jsonpath/function_extensions/_pattern.py +++ b/jsonpath/function_extensions/_pattern.py @@ -1,7 +1,88 @@ from typing import List +from typing import Optional + +try: + import regex as re + + REGEX_AVAILABLE = True +except ImportError: + import re # type: ignore + + REGEX_AVAILABLE = False + +try: + from iregexp_check import check + + IREGEXP_AVAILABLE = True +except ImportError: + IREGEXP_AVAILABLE = False + +from jsonpath.exceptions import JSONPathError +from jsonpath.function_extensions import ExpressionType +from jsonpath.function_extensions import FilterFunction +from jsonpath.lru_cache import LRUCache +from jsonpath.lru_cache import ThreadSafeLRUCache + + +class AbstractRegexFilterFunction(FilterFunction): + """Base class for filter function that accept regular expression arguments. + + Arguments: + cache_capacity: The size of the regular expression cache. + debug: When `True`, raise an exception when regex pattern compilation + fails. The default - as required by RFC 9535 - is `False`, which + silently ignores bad patterns. + thread_safe: When `True`, use a `ThreadSafeLRUCache` instead of an + instance of `LRUCache`. + """ + + arg_types = [ExpressionType.VALUE, ExpressionType.VALUE] + return_type = ExpressionType.LOGICAL + + def __init__( + self, + *, + cache_capacity: int = 300, + debug: bool = False, + thread_safe: bool = False, + ): + self.cache: LRUCache[str, Optional[re.Pattern[str]]] = ( + ThreadSafeLRUCache(capacity=cache_capacity) + if thread_safe + else LRUCache(capacity=cache_capacity) + ) + + self.debug = debug + + def check_cache(self, pattern: str) -> Optional[re.Pattern[str]]: + """Return a compiled re pattern if `pattern` is valid, or `None` otherwise.""" + try: + _pattern = self.cache[pattern] + except KeyError: + if IREGEXP_AVAILABLE and not check(pattern): + if self.debug: + raise JSONPathError( + "search pattern is not a valid I-Regexp", token=None + ) from None + _pattern = None + else: + if REGEX_AVAILABLE: + pattern = map_re(pattern) + + try: + _pattern = re.compile(pattern) + except re.error: + if self.debug: + raise + _pattern = None + + self.cache[pattern] = _pattern + + return _pattern def map_re(pattern: str) -> str: + """Convert an I-Regexp pattern into a Python re pattern.""" escaped = False char_class = False parts: List[str] = [] diff --git a/jsonpath/function_extensions/match.py b/jsonpath/function_extensions/match.py index 9d56588..1743c3c 100644 --- a/jsonpath/function_extensions/match.py +++ b/jsonpath/function_extensions/match.py @@ -1,88 +1,17 @@ """The standard `match` function extension.""" -from typing import Optional +from ._pattern import AbstractRegexFilterFunction -try: - import regex as re - REGEX_AVAILABLE = True -except ImportError: - import re # type: ignore - - REGEX_AVAILABLE = False - -try: - from iregexp_check import check - - IREGEXP_AVAILABLE = True -except ImportError: - IREGEXP_AVAILABLE = False - -from jsonpath.exceptions import JSONPathError -from jsonpath.function_extensions import ExpressionType -from jsonpath.function_extensions import FilterFunction -from jsonpath.lru_cache import LRUCache -from jsonpath.lru_cache import ThreadSafeLRUCache - -from ._pattern import map_re - - -class Match(FilterFunction): - """The standard `match` function. - - Arguments: - cache_capacity: The size of the regular expression cache. - debug: When `True`, raise an exception when regex pattern compilation - fails. The default - as required by RFC 9535 - is `False`, which - silently ignores bad patterns. - thread_safe: When `True`, use a `ThreadSafeLRUCache` instead of an - instance of `LRUCache`. - """ - - arg_types = [ExpressionType.VALUE, ExpressionType.VALUE] - return_type = ExpressionType.LOGICAL - - def __init__( - self, - *, - cache_capacity: int = 300, - debug: bool = False, - thread_safe: bool = False, - ): - self._cache: LRUCache[str, Optional[re.Pattern[str]]] = ( - ThreadSafeLRUCache(capacity=cache_capacity) - if thread_safe - else LRUCache(capacity=cache_capacity) - ) - - self.debug = debug +class Match(AbstractRegexFilterFunction): + """The standard `match` function.""" def __call__(self, value: object, pattern: object) -> bool: """Return `True` if _value_ matches _pattern_, or `False` otherwise.""" if not isinstance(value, str) or not isinstance(pattern, str): return False - try: - _pattern = self._cache[pattern] - except KeyError: - if IREGEXP_AVAILABLE and not check(pattern): - if self.debug: - raise JSONPathError( - "search pattern is not a valid I-Regexp", token=None - ) from None - _pattern = None - else: - if REGEX_AVAILABLE: - pattern = map_re(pattern) - - try: - _pattern = re.compile(pattern) - except re.error: - if self.debug: - raise - _pattern = None - - self._cache[pattern] = _pattern + _pattern = self.check_cache(pattern) if _pattern is None: return False diff --git a/jsonpath/function_extensions/search.py b/jsonpath/function_extensions/search.py index 9ff9cd2..4ae2740 100644 --- a/jsonpath/function_extensions/search.py +++ b/jsonpath/function_extensions/search.py @@ -1,88 +1,17 @@ """The standard `search` function extension.""" -from typing import Optional +from ._pattern import AbstractRegexFilterFunction -try: - import regex as re - REGEX_AVAILABLE = True -except ImportError: # pragma: no cover - import re # type: ignore +class Search(AbstractRegexFilterFunction): + """The standard `search` function.""" - REGEX_AVAILABLE = False - -try: - from iregexp_check import check - - IREGEXP_AVAILABLE = True -except ImportError: # pragma: no cover - IREGEXP_AVAILABLE = False - -from jsonpath.exceptions import JSONPathError -from jsonpath.function_extensions import ExpressionType -from jsonpath.function_extensions import FilterFunction -from jsonpath.lru_cache import LRUCache -from jsonpath.lru_cache import ThreadSafeLRUCache - -from ._pattern import map_re - - -class Search(FilterFunction): - """The standard `search` function. - - Arguments: - cache_capacity: The size of the regular expression cache. - debug: When `True`, raise an exception when regex pattern compilation - fails. The default - as required by RFC 9535 - is `False`, which - silently ignores bad patterns. - thread_safe: When `True`, use a `ThreadSafeLRUCache` instead of an - instance of `LRUCache`. - """ - - arg_types = [ExpressionType.VALUE, ExpressionType.VALUE] - return_type = ExpressionType.LOGICAL - - def __init__( - self, - *, - cache_capacity: int = 300, - debug: bool = False, - thread_safe: bool = False, - ): - self._cache: LRUCache[str, Optional[re.Pattern[str]]] = ( - ThreadSafeLRUCache(capacity=cache_capacity) - if thread_safe - else LRUCache(capacity=cache_capacity) - ) - - self.debug = debug - - def __call__(self, value: str, pattern: object) -> bool: - """Return `True` if _value_ contains _pattern_, or `False` otherwise.""" + def __call__(self, value: object, pattern: object) -> bool: + """Return `True` if _value_ matches _pattern_, or `False` otherwise.""" if not isinstance(value, str) or not isinstance(pattern, str): return False - try: - _pattern = self._cache[pattern] - except KeyError: - if IREGEXP_AVAILABLE and not check(pattern): - if self.debug: - raise JSONPathError( - "search pattern is not a valid I-Regexp", token=None - ) from None - _pattern = None - else: - if REGEX_AVAILABLE: - pattern = map_re(pattern) - - try: - _pattern = re.compile(pattern) - except re.error: - if self.debug: - raise - _pattern = None - - self._cache[pattern] = _pattern + _pattern = self.check_cache(pattern) if _pattern is None: return False diff --git a/pyproject.toml b/pyproject.toml index ca89351..0696765 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,8 +59,14 @@ dependencies = [ ] [tool.hatch.envs.default.scripts] -cov = "pytest --cov-report=term-missing --cov-config=pyproject.toml --cov=jsonpath --cov=tests {args}" -cov-html = "pytest --cov-report=html --cov-config=pyproject.toml --cov=jsonpath --cov=tests {args}" +cov = [ + "hatch run no-regex:cov", + "pytest --cov-append --cov-report=term-missing --cov-config=pyproject.toml --cov=jsonpath --cov=tests {args}" +] +cov-html = [ + "hatch run no-regex:cov", + "pytest --cov-append --cov-report=html --cov-config=pyproject.toml --cov=jsonpath --cov=tests {args}", +] no-cov = "cov --no-cov {args}" test = "pytest {args}" lint = "ruff check ." @@ -80,7 +86,11 @@ build = "mkdocs build --clean --strict" serve = "mkdocs serve --dev-addr localhost:8000" [tool.hatch.envs.no-regex] -dependencies = ["pytest"] +dependencies = ["pytest", "pytest-cov"] + +[tool.hatch.envs.no-regex.scripts] +cov = "pytest --cov-report=term-missing --cov-config=pyproject.toml --cov=jsonpath --cov=tests tests/test_compliance.py {args}" + [tool.coverage.run] branch = true diff --git a/tests/test_regex_cache.py b/tests/test_regex_cache.py new file mode 100644 index 0000000..17166d7 --- /dev/null +++ b/tests/test_regex_cache.py @@ -0,0 +1,57 @@ +try: + import iregexp_check # noqa: F401 + + IREGEXP_AVAILABLE = True +except ImportError: + IREGEXP_AVAILABLE = False + +import pytest + +from jsonpath import JSONPathError +from jsonpath.function_extensions import Search + + +def test_patterns_are_cached() -> None: + search_func = Search(cache_capacity=2) + assert len(search_func.cache) == 0 + assert search_func("abcdef", "bc.") + assert len(search_func.cache) == 1 + + +def test_malformed_patterns_are_cached() -> None: + search_func = Search(cache_capacity=2) + assert len(search_func.cache) == 0 + assert search_func("abcdef", "bc[") is False + assert len(search_func.cache) == 1 + assert search_func.cache["bc["] is None + + +@pytest.mark.skipif(IREGEXP_AVAILABLE is False, reason="requires iregexp_check") +def test_invalid_iregexp_patterns_are_cached() -> None: + search_func = Search(cache_capacity=2) + assert len(search_func.cache) == 0 + assert search_func("ab123cdef", "\\d+") is False + assert len(search_func.cache) == 1 + assert search_func.cache["\\d+"] is None + + +def test_debug_regex_patterns() -> None: + search_func = Search(cache_capacity=2, debug=True) + assert len(search_func.cache) == 0 + + with pytest.raises(JSONPathError): + search_func("abcdef", "bc[") + + +def test_cache_capacity() -> None: + search_func = Search(cache_capacity=2) + assert len(search_func.cache) == 0 + assert search_func("1abcdef", "ab[a-z]") + assert len(search_func.cache) == 1 + assert search_func("2abcdef", "bc[a-z]") + assert len(search_func.cache) == 2 # noqa: PLR2004 + assert search_func("3abcdef", "cd[a-z]") + assert len(search_func.cache) == 2 # noqa: PLR2004 + assert "cd[a-z]" in search_func.cache + assert "bc[a-z]" in search_func.cache + assert "ab[a-z]" not in search_func.cache From 1b317dc7e6823ad17bfb4d1b282f19d8b3db2fe0 Mon Sep 17 00:00:00 2001 From: James Prior Date: Tue, 26 Aug 2025 17:02:52 +0100 Subject: [PATCH 4/4] Fix type hints and expect re.error --- jsonpath/function_extensions/_pattern.py | 4 ++-- tests/test_regex_cache.py | 11 ++++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/jsonpath/function_extensions/_pattern.py b/jsonpath/function_extensions/_pattern.py index 18f1de8..d505f29 100644 --- a/jsonpath/function_extensions/_pattern.py +++ b/jsonpath/function_extensions/_pattern.py @@ -46,7 +46,7 @@ def __init__( debug: bool = False, thread_safe: bool = False, ): - self.cache: LRUCache[str, Optional[re.Pattern[str]]] = ( + self.cache: LRUCache[str, Optional[re.Pattern]] = ( # type: ignore ThreadSafeLRUCache(capacity=cache_capacity) if thread_safe else LRUCache(capacity=cache_capacity) @@ -54,7 +54,7 @@ def __init__( self.debug = debug - def check_cache(self, pattern: str) -> Optional[re.Pattern[str]]: + def check_cache(self, pattern: str) -> Optional[re.Pattern]: # type: ignore """Return a compiled re pattern if `pattern` is valid, or `None` otherwise.""" try: _pattern = self.cache[pattern] diff --git a/tests/test_regex_cache.py b/tests/test_regex_cache.py index 17166d7..c41eedb 100644 --- a/tests/test_regex_cache.py +++ b/tests/test_regex_cache.py @@ -1,3 +1,12 @@ +try: + import regex as re + + REGEX_AVAILABLE = True +except ImportError: + import re # type: ignore + + REGEX_AVAILABLE = False + try: import iregexp_check # noqa: F401 @@ -39,7 +48,7 @@ def test_debug_regex_patterns() -> None: search_func = Search(cache_capacity=2, debug=True) assert len(search_func.cache) == 0 - with pytest.raises(JSONPathError): + with pytest.raises((JSONPathError, re.error)): search_func("abcdef", "bc[")