Add is-subset and is-subset-ci operator modes for list comparison (#150)

michalis1 · michalis1 · web-flow · commit 57b6aa37d6ed · 2026-03-11T07:48:26.000-06:00
* michalis1-adds-new-operator-check

* update changes

---------

Co-authored-by: michalis1 &lt;milatis@gmail.com&gt;
diff --git a/changes/149.added b/changes/149.added
@@ -0,0 +1 @@
+Add is-subset and is-subset-ci operator modes for list comparison.
diff --git a/docs/user/lib_getting_started.md b/docs/user/lib_getting_started.md
@@ -53,7 +53,7 @@ Below are the names of checks provided by the library. These both describe the t
 - `tolerance`: the keys must match and the values can differ according to the 'tolerance' value provided
 - `parameter_match`: a reference key and value is provided and its presence (or absence) is checked in the provided object
 - `regex`: a reference regex pattern is provided and is used to find a match in the provided object
-- `operator`: similar to parameter match, but the reference includes several different possible operators: 'in', 'bool', 'string', and numerical comparison with 'int' and 'float' to check against
+- `operator`: similar to parameter match, but the reference includes several different possible operators: `in`, `bool`, `string`, `list`, and numerical comparison with `int` and `float` to check against
 
 `CheckTypes` are explained in more detail in the [architecture](architecture.md).
 
diff --git a/docs/user/lib_use_cases.md b/docs/user/lib_use_cases.md
@@ -474,6 +474,14 @@ The `operator` check is a collection of more specific checks divided into catego
 2. `not-contains`: determines if an element string value does not contain the provided test-string value.
     - `not-contains: "overlay"`: checks if "overlay" is present in given node or not.
 
+#### `list` Operators
+
+1. `is-subset`: Check if the value of a specified element is a subset of the provided reference list.
+    - `is-subset: ["A", "B", "C"]`: checks if the extracted list contains only values from the provided reference list.
+
+2. `is-subset-ci`: Check if the value of a specified element is a subset of the provided reference list using case-insensitive comparison.
+    - `is-subset-ci: ["A", "B", "C"]`: checks if the extracted list contains only values from the provided reference list, ignoring letter case.
+
 #### `int`, `float` Operators
 
 1. `is-gt`: Check if the value of a specified element is greater than a given numeric value.
@@ -613,6 +621,88 @@ Can you guess what would be the outcome for an `int`, `float` operator?
 ([], True)
 ```
 
+What about checking whether an extracted list is a subset of an allowed list?
+
+```python
+>>> data = [
+...     {
+...         "id": "DOMAIN1.COMPANY.COM",
+...         "include_trusted_domains": [
+...             "COMPANY.COM",
+...             "domain1.company.com",
+...             "domain2.company.COM",
+...             "domain3.company.com",
+...             "test.com",
+...         ],
+...     }
+... ]
+>>> path = "[*].[$id$,include_trusted_domains]"
+>>> value = extract_data_from_json(data, path)
+>>> value
+[{'DOMAIN1.COMPANY.COM': {'include_trusted_domains': ['COMPANY.COM',
+                                                      'domain1.company.com',
+                                                      'domain2.company.COM',
+                                                      'domain3.company.com',
+                                                      'test.com']}}]
+```
+
+Using the case-sensitive subset operator:
+
+```python
+>>> check_args = {
+...     "params": {
+...         "mode": "is-subset",
+...         "operator_data": [
+...             "COMPANY.COM",
+...             "domain1.company.com",
+...             "domain2.company.com",
+...             "domain3.company.com",
+...             "domain4.company.com",
+...             "domain5.company.com",
+...             "test.com",
+...             "test1.com",
+...             "test2.com",
+...         ],
+...     }
+... }
+>>> check = CheckType.create("operator")
+>>> result = check.evaluate(check_args, value)
+>>> result
+([{'DOMAIN1.COMPANY.COM': {'include_trusted_domains': ['COMPANY.COM',
+                                                       'domain1.company.com',
+                                                       'domain2.company.COM',
+                                                       'domain3.company.com',
+                                                       'test.com']}}], False)
+```
+
+The is-subset operator is case-sensitive, so "domain2.company.COM" does not match "domain2.company.com".
+
+Using the case-insensitive subset operator:
+
+```python
+>>> check_args = {
+...     "params": {
+...         "mode": "is-subset-ci",
+...         "operator_data": [
+...             "COMPANY.COM",
+...             "domain1.company.com",
+...             "domain2.company.com",
+...             "domain3.company.com",
+...             "domain4.company.com",
+...             "domain5.company.com",
+...             "test.com",
+...             "test1.com",
+...             "test2.com",
+...         ],
+...     }
+... }
+>>> result = check.evaluate(check_args, value)
+>>> result
+([], True)
+```
+
+These operators are useful when the extracted value itself is a list and must be validated against an allowed reference list.
+
 See `tests` folder in the repo for more examples.
 
 ## Putting a Result Back Together
diff --git a/jdiff/check_types.py b/jdiff/check_types.py
@@ -180,11 +180,13 @@ def _validate(params) -> None:  # type: ignore[override]
         bool_operators = ("all-same",)
         number_operators = ("is-gt", "is-lt", "is-ge", "is-le")
         string_operators = ("contains", "not-contains")
+        list_operators = ("is-subset", "is-subset-ci")
         valid_options = (
             in_operators,
             bool_operators,
             number_operators,
             string_operators,
+            list_operators,
         )
 
         # Validate "params" argument is not None.
@@ -245,6 +247,13 @@ def _validate(params) -> None:  # type: ignore[override]
             raise ValueError(
                 f"check option all-same must have value of type bool. You have: {params_value} of type {type(params_value)}"
             )
+        # "is-subset" requires lists or tuples
+        if params_key in list_operators:
+            if not isinstance(params_value, (list, tuple)):
+                raise ValueError(
+                    f"check options {list_operators} must have value of type list or tuple. "
+                    f"You have: {params_value} of type {type(params_value)}."
+                )
 
     def evaluate(self, params: Any, value_to_compare: Any) -> Tuple[Dict, bool]:  # type: ignore[override]
         """Operator evaluator implementation."""
diff --git a/jdiff/operator.py b/jdiff/operator.py
@@ -129,3 +129,43 @@ def in_range(self) -> Tuple[List, bool]:
     def not_in_range(self) -> Tuple[List, bool]:
         """Is not in range operator caller."""
         return self._loop_through_wrapper("not_in_range")
+
+    def is_subset(self) -> Tuple[List, bool]:
+        """Check whether each extracted list is a subset of the reference list."""
+        result = []
+        reference_set = set(self.reference_data)
+
+        for item in self.value_to_compare:
+            for value in item.values():
+                for evaluated_value in value.values():
+                    # Fail fast if the extracted value is not a list/tuple
+                    if not isinstance(evaluated_value, (list, tuple)):
+                        result.append(item)
+                        continue
+
+                    if not set(evaluated_value).issubset(reference_set):
+                        result.append(item)
+
+        if result:
+            return (result, False)
+        return ([], True)
+
+    def is_subset_ci(self) -> Tuple[List, bool]:
+        """Check whether each extracted list is a subset of the reference list (case-insensitive)."""
+        result = []
+        reference_set = {str(item).lower() for item in self.reference_data}
+
+        for item in self.value_to_compare:
+            for value in item.values():
+                for evaluated_value in value.values():
+                    if not isinstance(evaluated_value, (list, tuple)):
+                        result.append(item)
+                        continue
+
+                    normalized_value = {str(element).lower() for element in evaluated_value}
+                    if not normalized_value.issubset(reference_set):
+                        result.append(item)
+
+        if result:
+            return (result, False)
+        return ([], True)
diff --git a/jdiff/utils/jmespath_parsers.py b/jdiff/utils/jmespath_parsers.py
@@ -74,33 +74,37 @@ def jmespath_refkey_parser(path: str) -> str:
 
 
 def associate_key_of_my_value(paths: str, wanted_value: List) -> List:
-    """Associate each reference key (from: jmespath_refkey_parser) to every value found in output (from: jmespath_value_parser)."""
-    # global.peers.*.[is_enabled,is_up] / result.[*].state
+    """Associate extracted field names with the values returned by the parsed JMESPath value path."""
     find_the_key_of_my_values = paths.split(".")[-1]
 
-    # [is_enabled,is_up]
     if find_the_key_of_my_values.startswith("[") and find_the_key_of_my_values.endswith("]"):
-        # ['is_enabled', 'is_up']
         my_key_value_list = find_the_key_of_my_values.strip("[]").split(",")
-    # state
     else:
         my_key_value_list = [find_the_key_of_my_values]
 
     final_list = []
 
-    if not all(isinstance(item, list) for item in wanted_value) and len(my_key_value_list) == 1:
+    if len(my_key_value_list) == 1:
+        key_name = my_key_value_list[0]
+
         for item in wanted_value:
-            temp_dict = {my_key_value_list[0]: item}
-            final_list.append(temp_dict)
+            if not isinstance(item, list):
+                value = item
+            elif len(item) == 1 and not isinstance(item[0], list):
+                value = item[0]
+            else:
+                value = item
 
-    else:
-        for items in wanted_value:
-            if len(items) != len(my_key_value_list):
-                raise ValueError("Key's value len != from value len")
+            final_list.append({key_name: value})
+
+        return final_list
 
-            temp_dict = {my_key_value_list[my_index]: my_value for my_index, my_value in enumerate(items)}
+    for items in wanted_value:
+        if len(items) != len(my_key_value_list):
+            raise ValueError("Key's value len != from value len")
 
-            final_list.append(temp_dict)
+        temp_dict = {my_key_value_list[my_index]: my_value for my_index, my_value in enumerate(items)}
+        final_list.append(temp_dict)
 
     return final_list
 
diff --git a/tests/mock/api/trusted_domains.json b/tests/mock/api/trusted_domains.json
@@ -0,0 +1,29 @@
+[
+    {
+      "allocate_gids": true,
+      "allocate_uids": true,
+      "assume_default_domain": false,
+      "authentication": true,
+      "check_online_interval": 300,
+      "controller_time": 1769518575,
+      "create_home_directory": false,
+      "domain_offline_alerts": false,
+      "extra_expected_spns": [],
+      "findable_groups": [],
+      "findable_users": [],
+      "forest": "domain1.company.com",
+      "groupnet": "groupnet1",
+      "home_directory_template": "/ifs/zone1/cluster1/adm/%U",
+      "hostname": "cluster1.domain1.company.com",
+      "id": "DOMAIN1.COMPANY.COM",
+      "ignore_all_trusts": false,
+      "ignored_trusted_domains": [],
+      "include_trusted_domains": [
+        "COMPANY.COM",
+        "domain1.company.com",
+        "domain2.company.COM",
+        "domain3.company.com",
+        "test.com"
+      ]
+    }
+  ]
diff --git a/tests/test_get_value.py b/tests/test_get_value.py
@@ -139,3 +139,37 @@ def test_top_key_anchor(jmspath, expected_value):
     value = extract_data_from_json(data=data, path=jmspath)
 
     assert value == expected_value, ASSERT_FAIL_MESSAGE.format(output=value, expected_output=expected_value)
+
+
+def test_extract_data_from_json_with_ref_key_and_list_value():
+    """Verify that extract_data_from_json correctly handles ref-key paths when the extracted field value is a list."""
+    data = [
+        {
+            "id": "DOMAIN1.MYCOMPANY.COM",
+            "include_trusted_domains": [
+                "MYCOMPANY.COM",
+                "domain1.mycompany.com",
+                "domain2.mycompany.COM",
+                "domain3.mycompany.com",
+                "test_domain.com",
+            ],
+        }
+    ]
+
+    expected_value = [
+        {
+            "DOMAIN1.MYCOMPANY.COM": {
+                "include_trusted_domains": [
+                    "MYCOMPANY.COM",
+                    "domain1.mycompany.com",
+                    "domain2.mycompany.COM",
+                    "domain3.mycompany.com",
+                    "test_domain.com",
+                ]
+            }
+        }
+    ]
+
+    value = extract_data_from_json(data=data, path="[*].[$id$,include_trusted_domains]")
+
+    assert value == expected_value, ASSERT_FAIL_MESSAGE.format(output=value, expected_output=expected_value)
diff --git a/tests/test_operators.py b/tests/test_operators.py
@@ -157,6 +157,65 @@
         False,
     ),
 )
+operator_is_subset = (
+    "trusted_domains.json",
+    "operator",
+    {
+        "params": {
+            "mode": "is-subset",
+            "operator_data": [
+                "COMPANY.COM",
+                "domain1.company.com",
+                "domain2.company.com",
+                "domain3.company.com",
+                "domain4.company.com",
+                "domain5.company.com",
+                "test.com",
+                "test1.com",
+                "test2.com",
+            ],
+        }
+    },
+    "[*].[$id$,include_trusted_domains]",
+    (
+        [
+            {
+                "DOMAIN1.COMPANY.COM": {
+                    "include_trusted_domains": [
+                        "COMPANY.COM",
+                        "domain1.company.com",
+                        "domain2.company.COM",
+                        "domain3.company.com",
+                        "test.com",
+                    ]
+                }
+            }
+        ],
+        False,
+    ),
+)
+operator_is_subset_ci = (
+    "trusted_domains.json",
+    "operator",
+    {
+        "params": {
+            "mode": "is-subset-ci",
+            "operator_data": [
+                "COMPANY.COM",
+                "domain1.company.com",
+                "domain2.company.com",
+                "domain3.company.com",
+                "domain4.company.com",
+                "domain5.company.com",
+                "test.com",
+                "test1.com",
+                "test2.com",
+            ],
+        }
+    },
+    "[*].[$id$,include_trusted_domains]",
+    ([], True),
+)
 
 operator_all_tests = [
     operator_all_same,
@@ -172,6 +231,8 @@
     operator_not_in,
     operator_in_range,
     operator_not_in_range,
+    operator_is_subset,
+    operator_is_subset_ci,
 ]
 
 
diff --git a/tests/test_validates.py b/tests/test_validates.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Add is-subset and is-subset-ci operator modes for list comparison.`