Skip to content

Commit 57b6aa3

Browse files
michalis1michalis1
andauthored
Add is-subset and is-subset-ci operator modes for list comparison (#150)
* michalis1-adds-new-operator-check * update changes --------- Co-authored-by: michalis1 <milatis@gmail.com>
1 parent 7cc344b commit 57b6aa3

File tree

10 files changed

+290
-16
lines changed

10 files changed

+290
-16
lines changed

changes/149.added

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add is-subset and is-subset-ci operator modes for list comparison.

docs/user/lib_getting_started.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ Below are the names of checks provided by the library. These both describe the t
5353
- `tolerance`: the keys must match and the values can differ according to the 'tolerance' value provided
5454
- `parameter_match`: a reference key and value is provided and its presence (or absence) is checked in the provided object
5555
- `regex`: a reference regex pattern is provided and is used to find a match in the provided object
56-
- `operator`: similar to parameter match, but the reference includes several different possible operators: 'in', 'bool', 'string', and numerical comparison with 'int' and 'float' to check against
56+
- `operator`: similar to parameter match, but the reference includes several different possible operators: `in`, `bool`, `string`, `list`, and numerical comparison with `int` and `float` to check against
5757

5858
`CheckTypes` are explained in more detail in the [architecture](architecture.md).
5959

docs/user/lib_use_cases.md

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,14 @@ The `operator` check is a collection of more specific checks divided into catego
474474
2. `not-contains`: determines if an element string value does not contain the provided test-string value.
475475
- `not-contains: "overlay"`: checks if "overlay" is present in given node or not.
476476

477+
#### `list` Operators
478+
479+
1. `is-subset`: Check if the value of a specified element is a subset of the provided reference list.
480+
- `is-subset: ["A", "B", "C"]`: checks if the extracted list contains only values from the provided reference list.
481+
482+
2. `is-subset-ci`: Check if the value of a specified element is a subset of the provided reference list using case-insensitive comparison.
483+
- `is-subset-ci: ["A", "B", "C"]`: checks if the extracted list contains only values from the provided reference list, ignoring letter case.
484+
477485
#### `int`, `float` Operators
478486

479487
1. `is-gt`: Check if the value of a specified element is greater than a given numeric value.
@@ -613,6 +621,88 @@ Can you guess what would be the outcome for an `int`, `float` operator?
613621
([], True)
614622
```
615623

624+
What about checking whether an extracted list is a subset of an allowed list?
625+
626+
```python
627+
>>> data = [
628+
... {
629+
... "id": "DOMAIN1.COMPANY.COM",
630+
... "include_trusted_domains": [
631+
... "COMPANY.COM",
632+
... "domain1.company.com",
633+
... "domain2.company.COM",
634+
... "domain3.company.com",
635+
... "test.com",
636+
... ],
637+
... }
638+
... ]
639+
>>> path = "[*].[$id$,include_trusted_domains]"
640+
>>> value = extract_data_from_json(data, path)
641+
>>> value
642+
[{'DOMAIN1.COMPANY.COM': {'include_trusted_domains': ['COMPANY.COM',
643+
'domain1.company.com',
644+
'domain2.company.COM',
645+
'domain3.company.com',
646+
'test.com']}}]
647+
```
648+
649+
Using the case-sensitive subset operator:
650+
651+
```python
652+
>>> check_args = {
653+
... "params": {
654+
... "mode": "is-subset",
655+
... "operator_data": [
656+
... "COMPANY.COM",
657+
... "domain1.company.com",
658+
... "domain2.company.com",
659+
... "domain3.company.com",
660+
... "domain4.company.com",
661+
... "domain5.company.com",
662+
... "test.com",
663+
... "test1.com",
664+
... "test2.com",
665+
... ],
666+
... }
667+
... }
668+
>>> check = CheckType.create("operator")
669+
>>> result = check.evaluate(check_args, value)
670+
>>> result
671+
([{'DOMAIN1.COMPANY.COM': {'include_trusted_domains': ['COMPANY.COM',
672+
'domain1.company.com',
673+
'domain2.company.COM',
674+
'domain3.company.com',
675+
'test.com']}}], False)
676+
```
677+
678+
The is-subset operator is case-sensitive, so "domain2.company.COM" does not match "domain2.company.com".
679+
680+
Using the case-insensitive subset operator:
681+
682+
```python
683+
>>> check_args = {
684+
... "params": {
685+
... "mode": "is-subset-ci",
686+
... "operator_data": [
687+
... "COMPANY.COM",
688+
... "domain1.company.com",
689+
... "domain2.company.com",
690+
... "domain3.company.com",
691+
... "domain4.company.com",
692+
... "domain5.company.com",
693+
... "test.com",
694+
... "test1.com",
695+
... "test2.com",
696+
... ],
697+
... }
698+
... }
699+
>>> result = check.evaluate(check_args, value)
700+
>>> result
701+
([], True)
702+
```
703+
704+
These operators are useful when the extracted value itself is a list and must be validated against an allowed reference list.
705+
616706
See `tests` folder in the repo for more examples.
617707

618708
## Putting a Result Back Together

jdiff/check_types.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,11 +180,13 @@ def _validate(params) -> None: # type: ignore[override]
180180
bool_operators = ("all-same",)
181181
number_operators = ("is-gt", "is-lt", "is-ge", "is-le")
182182
string_operators = ("contains", "not-contains")
183+
list_operators = ("is-subset", "is-subset-ci")
183184
valid_options = (
184185
in_operators,
185186
bool_operators,
186187
number_operators,
187188
string_operators,
189+
list_operators,
188190
)
189191

190192
# Validate "params" argument is not None.
@@ -245,6 +247,13 @@ def _validate(params) -> None: # type: ignore[override]
245247
raise ValueError(
246248
f"check option all-same must have value of type bool. You have: {params_value} of type {type(params_value)}"
247249
)
250+
# "is-subset" requires lists or tuples
251+
if params_key in list_operators:
252+
if not isinstance(params_value, (list, tuple)):
253+
raise ValueError(
254+
f"check options {list_operators} must have value of type list or tuple. "
255+
f"You have: {params_value} of type {type(params_value)}."
256+
)
248257

249258
def evaluate(self, params: Any, value_to_compare: Any) -> Tuple[Dict, bool]: # type: ignore[override]
250259
"""Operator evaluator implementation."""

jdiff/operator.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,3 +129,43 @@ def in_range(self) -> Tuple[List, bool]:
129129
def not_in_range(self) -> Tuple[List, bool]:
130130
"""Is not in range operator caller."""
131131
return self._loop_through_wrapper("not_in_range")
132+
133+
def is_subset(self) -> Tuple[List, bool]:
134+
"""Check whether each extracted list is a subset of the reference list."""
135+
result = []
136+
reference_set = set(self.reference_data)
137+
138+
for item in self.value_to_compare:
139+
for value in item.values():
140+
for evaluated_value in value.values():
141+
# Fail fast if the extracted value is not a list/tuple
142+
if not isinstance(evaluated_value, (list, tuple)):
143+
result.append(item)
144+
continue
145+
146+
if not set(evaluated_value).issubset(reference_set):
147+
result.append(item)
148+
149+
if result:
150+
return (result, False)
151+
return ([], True)
152+
153+
def is_subset_ci(self) -> Tuple[List, bool]:
154+
"""Check whether each extracted list is a subset of the reference list (case-insensitive)."""
155+
result = []
156+
reference_set = {str(item).lower() for item in self.reference_data}
157+
158+
for item in self.value_to_compare:
159+
for value in item.values():
160+
for evaluated_value in value.values():
161+
if not isinstance(evaluated_value, (list, tuple)):
162+
result.append(item)
163+
continue
164+
165+
normalized_value = {str(element).lower() for element in evaluated_value}
166+
if not normalized_value.issubset(reference_set):
167+
result.append(item)
168+
169+
if result:
170+
return (result, False)
171+
return ([], True)

jdiff/utils/jmespath_parsers.py

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -74,33 +74,37 @@ def jmespath_refkey_parser(path: str) -> str:
7474

7575

7676
def associate_key_of_my_value(paths: str, wanted_value: List) -> List:
77-
"""Associate each reference key (from: jmespath_refkey_parser) to every value found in output (from: jmespath_value_parser)."""
78-
# global.peers.*.[is_enabled,is_up] / result.[*].state
77+
"""Associate extracted field names with the values returned by the parsed JMESPath value path."""
7978
find_the_key_of_my_values = paths.split(".")[-1]
8079

81-
# [is_enabled,is_up]
8280
if find_the_key_of_my_values.startswith("[") and find_the_key_of_my_values.endswith("]"):
83-
# ['is_enabled', 'is_up']
8481
my_key_value_list = find_the_key_of_my_values.strip("[]").split(",")
85-
# state
8682
else:
8783
my_key_value_list = [find_the_key_of_my_values]
8884

8985
final_list = []
9086

91-
if not all(isinstance(item, list) for item in wanted_value) and len(my_key_value_list) == 1:
87+
if len(my_key_value_list) == 1:
88+
key_name = my_key_value_list[0]
89+
9290
for item in wanted_value:
93-
temp_dict = {my_key_value_list[0]: item}
94-
final_list.append(temp_dict)
91+
if not isinstance(item, list):
92+
value = item
93+
elif len(item) == 1 and not isinstance(item[0], list):
94+
value = item[0]
95+
else:
96+
value = item
9597

96-
else:
97-
for items in wanted_value:
98-
if len(items) != len(my_key_value_list):
99-
raise ValueError("Key's value len != from value len")
98+
final_list.append({key_name: value})
99+
100+
return final_list
100101

101-
temp_dict = {my_key_value_list[my_index]: my_value for my_index, my_value in enumerate(items)}
102+
for items in wanted_value:
103+
if len(items) != len(my_key_value_list):
104+
raise ValueError("Key's value len != from value len")
102105

103-
final_list.append(temp_dict)
106+
temp_dict = {my_key_value_list[my_index]: my_value for my_index, my_value in enumerate(items)}
107+
final_list.append(temp_dict)
104108

105109
return final_list
106110

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
[
2+
{
3+
"allocate_gids": true,
4+
"allocate_uids": true,
5+
"assume_default_domain": false,
6+
"authentication": true,
7+
"check_online_interval": 300,
8+
"controller_time": 1769518575,
9+
"create_home_directory": false,
10+
"domain_offline_alerts": false,
11+
"extra_expected_spns": [],
12+
"findable_groups": [],
13+
"findable_users": [],
14+
"forest": "domain1.company.com",
15+
"groupnet": "groupnet1",
16+
"home_directory_template": "/ifs/zone1/cluster1/adm/%U",
17+
"hostname": "cluster1.domain1.company.com",
18+
"id": "DOMAIN1.COMPANY.COM",
19+
"ignore_all_trusts": false,
20+
"ignored_trusted_domains": [],
21+
"include_trusted_domains": [
22+
"COMPANY.COM",
23+
"domain1.company.com",
24+
"domain2.company.COM",
25+
"domain3.company.com",
26+
"test.com"
27+
]
28+
}
29+
]

tests/test_get_value.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,3 +139,37 @@ def test_top_key_anchor(jmspath, expected_value):
139139
value = extract_data_from_json(data=data, path=jmspath)
140140

141141
assert value == expected_value, ASSERT_FAIL_MESSAGE.format(output=value, expected_output=expected_value)
142+
143+
144+
def test_extract_data_from_json_with_ref_key_and_list_value():
145+
"""Verify that extract_data_from_json correctly handles ref-key paths when the extracted field value is a list."""
146+
data = [
147+
{
148+
"id": "DOMAIN1.MYCOMPANY.COM",
149+
"include_trusted_domains": [
150+
"MYCOMPANY.COM",
151+
"domain1.mycompany.com",
152+
"domain2.mycompany.COM",
153+
"domain3.mycompany.com",
154+
"test_domain.com",
155+
],
156+
}
157+
]
158+
159+
expected_value = [
160+
{
161+
"DOMAIN1.MYCOMPANY.COM": {
162+
"include_trusted_domains": [
163+
"MYCOMPANY.COM",
164+
"domain1.mycompany.com",
165+
"domain2.mycompany.COM",
166+
"domain3.mycompany.com",
167+
"test_domain.com",
168+
]
169+
}
170+
}
171+
]
172+
173+
value = extract_data_from_json(data=data, path="[*].[$id$,include_trusted_domains]")
174+
175+
assert value == expected_value, ASSERT_FAIL_MESSAGE.format(output=value, expected_output=expected_value)

tests/test_operators.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,65 @@
157157
False,
158158
),
159159
)
160+
operator_is_subset = (
161+
"trusted_domains.json",
162+
"operator",
163+
{
164+
"params": {
165+
"mode": "is-subset",
166+
"operator_data": [
167+
"COMPANY.COM",
168+
"domain1.company.com",
169+
"domain2.company.com",
170+
"domain3.company.com",
171+
"domain4.company.com",
172+
"domain5.company.com",
173+
"test.com",
174+
"test1.com",
175+
"test2.com",
176+
],
177+
}
178+
},
179+
"[*].[$id$,include_trusted_domains]",
180+
(
181+
[
182+
{
183+
"DOMAIN1.COMPANY.COM": {
184+
"include_trusted_domains": [
185+
"COMPANY.COM",
186+
"domain1.company.com",
187+
"domain2.company.COM",
188+
"domain3.company.com",
189+
"test.com",
190+
]
191+
}
192+
}
193+
],
194+
False,
195+
),
196+
)
197+
operator_is_subset_ci = (
198+
"trusted_domains.json",
199+
"operator",
200+
{
201+
"params": {
202+
"mode": "is-subset-ci",
203+
"operator_data": [
204+
"COMPANY.COM",
205+
"domain1.company.com",
206+
"domain2.company.com",
207+
"domain3.company.com",
208+
"domain4.company.com",
209+
"domain5.company.com",
210+
"test.com",
211+
"test1.com",
212+
"test2.com",
213+
],
214+
}
215+
},
216+
"[*].[$id$,include_trusted_domains]",
217+
([], True),
218+
)
160219

161220
operator_all_tests = [
162221
operator_all_same,
@@ -172,6 +231,8 @@
172231
operator_not_in,
173232
operator_in_range,
174233
operator_not_in_range,
234+
operator_is_subset,
235+
operator_is_subset_ci,
175236
]
176237

177238

0 commit comments

Comments
 (0)