Skip to content

Commit 277c89c

Browse files
authored
Merge pull request #590 from akshat62/feature/glob-wildcard-paths
Add wildcard/glob pattern support for exclude_paths and include_paths
2 parents 83e9e61 + d0ca084 commit 277c89c

10 files changed

Lines changed: 1070 additions & 39 deletions

File tree

deepdiff/deephash.py

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
get_id, type_is_subclass_of_type_group, type_in_type_group,
1616
number_to_string, datetime_normalize, KEY_TO_VAL_STR,
1717
get_truncate_datetime, dict_, add_root_to_paths, PydanticBaseModel,
18+
separate_wildcard_and_exact_paths,
1819
SetOrdered)
1920

2021
from deepdiff.base import Base
@@ -190,6 +191,7 @@ def __init__(self,
190191
custom_operators: Optional[List[Any]] = None,
191192
default_timezone: Union[datetime.timezone, "BaseTzInfo"] = datetime.timezone.utc,
192193
encodings: Optional[List[str]] = None,
194+
exclude_glob_paths: Optional[List[Any]] = None,
193195
exclude_obj_callback: Optional[Callable[[Any, str], bool]] = None,
194196
exclude_paths: Optional[PathType] = None,
195197
exclude_regex_paths: Optional[RegexType] = None,
@@ -206,6 +208,7 @@ def __init__(self,
206208
ignore_type_in_groups: Any = None,
207209
ignore_type_subclasses: bool = False,
208210
ignore_uuid_types: bool = False,
211+
include_glob_paths: Optional[List[Any]] = None,
209212
include_paths: Optional[PathType] = None,
210213
number_format_notation: str = "f",
211214
number_to_string_func: Optional[NumberToStringFunc] = None,
@@ -232,8 +235,14 @@ def __init__(self,
232235
exclude_types = set() if exclude_types is None else set(exclude_types)
233236
self.exclude_types_tuple = tuple(exclude_types) # we need tuple for checking isinstance
234237
self.ignore_repetition = ignore_repetition
235-
self.exclude_paths = add_root_to_paths(convert_item_or_items_into_set_else_none(exclude_paths))
236-
self.include_paths = add_root_to_paths(convert_item_or_items_into_set_else_none(include_paths))
238+
_exclude_set = convert_item_or_items_into_set_else_none(exclude_paths)
239+
_exclude_exact, _exclude_globs = separate_wildcard_and_exact_paths(_exclude_set)
240+
self.exclude_paths = add_root_to_paths(_exclude_exact)
241+
self.exclude_glob_paths = exclude_glob_paths or _exclude_globs
242+
_include_set = convert_item_or_items_into_set_else_none(include_paths)
243+
_include_exact, _include_globs = separate_wildcard_and_exact_paths(_include_set)
244+
self.include_paths = add_root_to_paths(_include_exact)
245+
self.include_glob_paths = include_glob_paths or _include_globs
237246
self.exclude_regex_paths = convert_item_or_items_into_compiled_regexes_else_none(exclude_regex_paths)
238247
self.hasher = default_hasher if hasher is None else hasher
239248
self.hashes[UNPROCESSED_KEY] = [] # type: ignore
@@ -462,11 +471,21 @@ def _skip_this(self, obj: Any, parent: str) -> bool:
462471
skip = False
463472
if self.exclude_paths and parent in self.exclude_paths:
464473
skip = True
465-
if self.include_paths and parent != 'root':
466-
if parent not in self.include_paths:
467-
skip = True
468-
for prefix in self.include_paths:
469-
if parent.startswith(prefix):
474+
elif self.exclude_glob_paths and any(gp.match(parent) for gp in self.exclude_glob_paths):
475+
skip = True
476+
if (self.include_paths or self.include_glob_paths) and parent != 'root':
477+
skip = True
478+
if self.include_paths:
479+
if parent in self.include_paths:
480+
skip = False
481+
else:
482+
for prefix in self.include_paths:
483+
if parent.startswith(prefix):
484+
skip = False
485+
break
486+
if skip and self.include_glob_paths:
487+
for gp in self.include_glob_paths:
488+
if gp.match_or_is_ancestor(parent):
470489
skip = False
471490
break
472491
elif self.exclude_regex_paths and any(

deepdiff/diff.py

Lines changed: 86 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@
2929
TEXT_VIEW, TREE_VIEW, DELTA_VIEW, COLORED_VIEW, COLORED_COMPACT_VIEW,
3030
detailed__dict__, add_root_to_paths,
3131
np, get_truncate_datetime, dict_, CannotCompare, ENUM_INCLUDE_KEYS,
32-
PydanticBaseModel, Opcode, SetOrdered, ipranges)
32+
PydanticBaseModel, Opcode, SetOrdered, ipranges,
33+
separate_wildcard_and_exact_paths)
3334
from deepdiff.serialization import SerializationMixin
3435
from deepdiff.distance import DistanceMixin, logarithmic_similarity
3536
from deepdiff.model import (
@@ -110,7 +111,9 @@ def _report_progress(_stats: Dict[str, Any], progress_logger: Callable[[str], No
110111
DEEPHASH_PARAM_KEYS = (
111112
'exclude_types',
112113
'exclude_paths',
114+
'exclude_glob_paths',
113115
'include_paths',
116+
'include_glob_paths',
114117
'exclude_regex_paths',
115118
'hasher',
116119
'significant_digits',
@@ -209,6 +212,10 @@ def __init__(self,
209212
_shared_parameters: Optional[Dict[str, Any]]=None,
210213
**kwargs):
211214
super().__init__()
215+
# Defaults for glob path attributes — needed for non-root instances
216+
# that may receive _parameters without these keys.
217+
self.exclude_glob_paths = None
218+
self.include_glob_paths = None
212219
if kwargs:
213220
raise ValueError((
214221
"The following parameter(s) are not valid: %s\n"
@@ -257,8 +264,12 @@ def __init__(self,
257264
ignore_type_subclasses=ignore_type_subclasses,
258265
ignore_uuid_types=ignore_uuid_types)
259266
self.report_repetition = report_repetition
260-
self.exclude_paths = add_root_to_paths(convert_item_or_items_into_set_else_none(exclude_paths))
261-
self.include_paths = add_root_to_paths(convert_item_or_items_into_set_else_none(include_paths))
267+
_exclude_set = convert_item_or_items_into_set_else_none(exclude_paths)
268+
_exclude_exact, self.exclude_glob_paths = separate_wildcard_and_exact_paths(_exclude_set)
269+
self.exclude_paths = add_root_to_paths(_exclude_exact)
270+
_include_set = convert_item_or_items_into_set_else_none(include_paths)
271+
_include_exact, self.include_glob_paths = separate_wildcard_and_exact_paths(_include_set)
272+
self.include_paths = add_root_to_paths(_include_exact)
262273
self.exclude_regex_paths = convert_item_or_items_into_compiled_regexes_else_none(exclude_regex_paths)
263274
self.exclude_types = set(exclude_types) if exclude_types else None
264275
self.exclude_types_tuple = tuple(exclude_types) if exclude_types else None # we need tuple for checking isinstance
@@ -423,7 +434,7 @@ def _group_by_sort_key(x):
423434
self.__dict__.clear()
424435

425436
def _get_deephash_params(self):
426-
result = {key: self._parameters[key] for key in DEEPHASH_PARAM_KEYS}
437+
result = {key: self._parameters.get(key) for key in DEEPHASH_PARAM_KEYS}
427438
result['ignore_repetition'] = not self.report_repetition
428439
result['number_to_string_func'] = self.number_to_string
429440
return result
@@ -442,6 +453,8 @@ def _report_result(self, report_type, change_level, local_tree=None):
442453
"""
443454

444455
if not self._skip_this(change_level):
456+
if self._skip_report_for_include_glob(change_level):
457+
return
445458
change_level.report_type = report_type
446459
tree = self.tree if local_tree is None else local_tree
447460
tree[report_type].add(change_level)
@@ -461,10 +474,33 @@ def custom_report_result(self, report_type, level, extra_info=None):
461474
"""
462475

463476
if not self._skip_this(level):
477+
if self._skip_report_for_include_glob(level):
478+
return
464479
level.report_type = report_type
465480
level.additional[CUSTOM_FIELD] = extra_info
466481
self.tree[report_type].add(level)
467482

483+
def _skip_report_for_include_glob(self, level):
484+
"""When include_glob_paths is set, _skip_this allows ancestors through for traversal.
485+
This method does a stricter check at report time: only report if the path
486+
actually matches a glob pattern or is a descendant of a matching path,
487+
or if it already matches an exact include_path."""
488+
if not self.include_glob_paths:
489+
return False
490+
level_path = level.path()
491+
# If exact include_paths already matched, don't skip
492+
if self.include_paths:
493+
if level_path in self.include_paths:
494+
return False
495+
for prefix in self.include_paths:
496+
if prefix in level_path:
497+
return False
498+
# Check glob patterns: match or descendant
499+
for gp in self.include_glob_paths:
500+
if gp.match_or_is_descendant(level_path):
501+
return False
502+
return True
503+
468504
@staticmethod
469505
def _dict_from_slots(object: Any) -> Dict[str, Any]:
470506
def unmangle(attribute: str) -> str:
@@ -552,11 +588,21 @@ def _skip_this(self, level: Any) -> bool:
552588
skip = False
553589
if self.exclude_paths and level_path in self.exclude_paths:
554590
skip = True
555-
if self.include_paths and level_path != 'root':
556-
if level_path not in self.include_paths:
557-
skip = True
558-
for prefix in self.include_paths:
559-
if prefix in level_path or level_path in prefix:
591+
elif self.exclude_glob_paths and any(gp.match(level_path) for gp in self.exclude_glob_paths):
592+
skip = True
593+
if not skip and (self.include_paths or self.include_glob_paths) and level_path != 'root':
594+
skip = True
595+
if self.include_paths:
596+
if level_path in self.include_paths:
597+
skip = False
598+
else:
599+
for prefix in self.include_paths:
600+
if prefix in level_path or level_path in prefix:
601+
skip = False
602+
break
603+
if skip and self.include_glob_paths:
604+
for gp in self.include_glob_paths:
605+
if gp.match_or_is_ancestor(level_path):
560606
skip = False
561607
break
562608
elif self.exclude_regex_paths and any(
@@ -586,28 +632,34 @@ def _skip_this(self, level: Any) -> bool:
586632

587633
def _skip_this_key(self, level: Any, key: Any) -> bool:
588634
# if include_paths is not set, than treet every path as included
589-
if self.include_paths is None:
590-
return False
591-
if "{}['{}']".format(level.path(), key) in self.include_paths:
592-
return False
593-
if level.path() in self.include_paths:
594-
# matches e.g. level+key root['foo']['bar']['veg'] include_paths ["root['foo']['bar']"]
635+
if self.include_paths is None and self.include_glob_paths is None:
595636
return False
596-
for prefix in self.include_paths:
597-
if "{}['{}']".format(level.path(), key) in prefix:
598-
# matches as long the prefix is longer than this object key
599-
# eg.: level+key root['foo']['bar'] matches prefix root['foo']['bar'] from include paths
600-
# level+key root['foo'] matches prefix root['foo']['bar'] from include_paths
601-
# level+key root['foo']['bar'] DOES NOT match root['foo'] from include_paths This needs to be handled afterwards
637+
key_path = "{}['{}']".format(level.path(), key)
638+
if self.include_paths:
639+
if key_path in self.include_paths:
602640
return False
603-
# check if a higher level is included as a whole (=without any sublevels specified)
604-
# matches e.g. level+key root['foo']['bar']['veg'] include_paths ["root['foo']"]
605-
# but does not match, if it is level+key root['foo']['bar']['veg'] include_paths ["root['foo']['bar']['fruits']"]
606-
up = level.up
607-
while up is not None:
608-
if up.path() in self.include_paths:
641+
if level.path() in self.include_paths:
642+
# matches e.g. level+key root['foo']['bar']['veg'] include_paths ["root['foo']['bar']"]
609643
return False
610-
up = up.up
644+
for prefix in self.include_paths:
645+
if key_path in prefix:
646+
# matches as long the prefix is longer than this object key
647+
# eg.: level+key root['foo']['bar'] matches prefix root['foo']['bar'] from include paths
648+
# level+key root['foo'] matches prefix root['foo']['bar'] from include_paths
649+
# level+key root['foo']['bar'] DOES NOT match root['foo'] from include_paths This needs to be handled afterwards
650+
return False
651+
# check if a higher level is included as a whole (=without any sublevels specified)
652+
# matches e.g. level+key root['foo']['bar']['veg'] include_paths ["root['foo']"]
653+
# but does not match, if it is level+key root['foo']['bar']['veg'] include_paths ["root['foo']['bar']['fruits']"]
654+
up = level.up
655+
while up is not None:
656+
if up.path() in self.include_paths:
657+
return False
658+
up = up.up
659+
if self.include_glob_paths:
660+
for gp in self.include_glob_paths:
661+
if gp.match_or_is_ancestor(key_path):
662+
return False
611663
return True
612664

613665
def _get_clean_to_keys_mapping(self, keys: Any, level: Any) -> Dict[Any, Any]:
@@ -701,9 +753,13 @@ def _diff_dict(
701753
t_keys_removed = t1_keys - t_keys_intersect
702754

703755
if self.threshold_to_diff_deeper:
704-
if self.exclude_paths:
756+
if self.exclude_paths or self.exclude_glob_paths:
705757
t_keys_union = {f"{level.path()}[{repr(key)}]" for key in (t2_keys | t1_keys)}
706-
t_keys_union -= self.exclude_paths
758+
if self.exclude_paths:
759+
t_keys_union -= self.exclude_paths
760+
if self.exclude_glob_paths:
761+
t_keys_union = {k for k in t_keys_union
762+
if not any(gp.match(k) for gp in self.exclude_glob_paths)}
707763
t_keys_union_len = len(t_keys_union)
708764
else:
709765
t_keys_union_len = len(t2_keys | t1_keys)

deepdiff/docstrings/deephash_doc.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,12 @@ exclude_types: list, default = None
3232

3333
exclude_paths: list, default = None
3434
List of paths to exclude from the report. If only one item, you can pass it as a string instead of a list containing only one path.
35+
Supports :ref:`wildcard_paths_label`: use ``[*]`` to match one segment or ``[**]`` to match any depth.
3536

3637

3738
include_paths: list, default = None
3839
List of the only paths to include in the report. If only one item, you can pass it as a string.
40+
Supports :ref:`wildcard_paths_label`: use ``[*]`` to match one segment or ``[**]`` to match any depth.
3941

4042

4143
exclude_regex_paths: list, default = None

deepdiff/docstrings/diff_doc.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ encodings: List, default = None
5353
exclude_paths: list, default = None
5454
:ref:`exclude_paths_label`
5555
List of paths to exclude from the report. If only one item, you can path it as a string.
56+
Supports :ref:`wildcard_paths_label`: use ``[*]`` to match one segment or ``[**]`` to match any depth.
5657

5758
exclude_regex_paths: list, default = None
5859
:ref:`exclude_regex_paths_label`
@@ -74,6 +75,7 @@ exclude_obj_callback_strict: function, default = None
7475
include_paths: list, default = None
7576
:ref:`include_paths_label`
7677
List of the only paths to include in the report. If only one item is in the list, you can pass it as a string.
78+
Supports :ref:`wildcard_paths_label`: use ``[*]`` to match one segment or ``[**]`` to match any depth.
7779

7880
include_obj_callback: function, default = None
7981
:ref:`include_obj_callback_label`

deepdiff/docstrings/exclude_paths.rst

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,49 @@ Example
5959
{'values_changed': {"root['foo']['bar']": {'new_value': 'banana', 'old_value': 'potato'}}}
6060

6161

62+
.. _wildcard_paths_label:
63+
64+
Wildcard (Glob) Paths
65+
---------------------
66+
67+
Both ``exclude_paths`` and ``include_paths`` support wildcard patterns for matching multiple paths at once:
68+
69+
- ``[*]`` or ``.*`` matches exactly **one** path segment (any key, index, or attribute).
70+
- ``[**]`` or ``.**`` matches **zero or more** path segments at any depth.
71+
72+
Wildcard patterns must use the full ``root`` prefix (shorthand keys are not supported for wildcards).
73+
74+
Exclude all ``password`` fields regardless of the parent key:
75+
>>> t1 = {"users": {"alice": {"name": "Alice", "password": "s1"}, "bob": {"name": "Bob", "password": "s2"}}}
76+
>>> t2 = {"users": {"alice": {"name": "Alice", "password": "x1"}, "bob": {"name": "Bob", "password": "x2"}}}
77+
>>> DeepDiff(t1, t2, exclude_paths=["root['users'][*]['password']"])
78+
{}
79+
80+
Include only ``name`` fields at any depth:
81+
>>> t1 = {"a": {"name": "A", "secret": 1}, "b": {"name": "B", "secret": 2}}
82+
>>> t2 = {"a": {"name": "X", "secret": 1}, "b": {"name": "Y", "secret": 2}}
83+
>>> result = DeepDiff(t1, t2, include_paths=["root[*]['name']"])
84+
>>> set(result.get('values_changed', {}).keys()) == {"root['a']['name']", "root['b']['name']"}
85+
True
86+
87+
Use ``[**]`` to match at any depth:
88+
>>> t1 = {"config": {"db": {"password": "old"}, "cache": {"password": "old"}}}
89+
>>> t2 = {"config": {"db": {"password": "new"}, "cache": {"password": "new"}}}
90+
>>> DeepDiff(t1, t2, exclude_paths=["root[**]['password']"])
91+
{}
92+
93+
Literal keys named ``*`` or ``**`` are not treated as wildcards when quoted:
94+
>>> t1 = {"*": 1, "a": 2}
95+
>>> t2 = {"*": 10, "a": 20}
96+
>>> result = DeepDiff(t1, t2, exclude_paths=["root['*']"])
97+
>>> "root['a']" in result.get('values_changed', {})
98+
True
99+
100+
When both ``exclude_paths`` and ``include_paths`` apply to the same path, exclusion takes precedence.
101+
102+
Wildcards also work with ``DeepHash`` and ``DeepSearch`` exclude_paths.
103+
104+
62105
.. _exclude_regex_paths_label:
63106

64107
Exclude Regex Paths

deepdiff/docstrings/search_doc.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ verbose_level : int >= 0, default = 1.
1414

1515
exclude_paths: list, default = None.
1616
List of paths to exclude from the report.
17+
Supports wildcard patterns: use ``[*]`` to match one segment or ``[**]`` to match any depth.
1718

1819
exclude_types: list, default = None.
1920
List of object types to exclude from the report.

deepdiff/helper.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,30 @@ def add_root_to_paths(paths: Optional[Iterable[str]]) -> Optional[SetOrdered]:
381381
return result
382382

383383

384+
def separate_wildcard_and_exact_paths(paths):
385+
"""Separate a set of paths into exact paths and wildcard pattern paths.
386+
387+
Returns ``(exact_set_or_none, wildcard_list_or_none)``.
388+
Wildcard paths must start with ``root``; a ``ValueError`` is raised otherwise.
389+
"""
390+
if not paths:
391+
return None, None
392+
from deepdiff.path import path_has_wildcard, compile_glob_paths
393+
exact = set()
394+
wildcards = []
395+
for path in paths:
396+
if path_has_wildcard(path):
397+
if not path.startswith('root'):
398+
raise ValueError(
399+
"Wildcard paths must start with 'root'. Got: {}".format(path))
400+
wildcards.append(path)
401+
else:
402+
exact.add(path)
403+
exact_result = exact if exact else None
404+
glob_result = compile_glob_paths(wildcards) if wildcards else None
405+
return exact_result, glob_result
406+
407+
384408
RE_COMPILED_TYPE = type(re.compile(''))
385409

386410

0 commit comments

Comments
 (0)