77from haystack .errors import FilterError
88
99
10- def normalize_filters (filters : dict [str , Any ]) -> dict [str , Any ]:
10+ def normalize_filters (filters : dict [str , Any ], nested_fields : set [ str ] | None = None ) -> dict [str , Any ]:
1111 """
1212 Converts Haystack filters in OpenSearch compatible filters.
13+
14+ :param filters: Haystack filter dictionary.
15+ :param nested_fields: Set of metadata field paths that are mapped as `nested` type in OpenSearch.
16+ When provided, conditions targeting sub-fields of these paths are wrapped in `nested` queries.
1317 """
1418 if not isinstance (filters , dict ):
1519 msg = "Filters must be a dictionary"
1620 raise FilterError (msg )
1721
1822 if "field" in filters :
19- return {"bool" : {"must" : _parse_comparison_condition (filters )}}
20- return _parse_logical_condition (filters )
23+ parsed = _parse_comparison_condition (filters , nested_fields )
24+ if nested_fields :
25+ nested_path = _get_nested_path (filters , nested_fields )
26+ if nested_path :
27+ return {"bool" : {"must" : {"nested" : {"path" : nested_path , "query" : parsed }}}}
28+ return {"bool" : {"must" : parsed }}
29+ return _parse_logical_condition (filters , nested_fields )
30+
31+
32+ def _get_nested_path (condition : dict [str , Any ], nested_fields : set [str ]) -> str | None :
33+ """Returns the nested path for a comparison condition, or None."""
34+ if not (field := condition .get ("field" )):
35+ return None
36+ if field .startswith ("meta." ):
37+ field = field [5 :]
38+ parts = field .split ("." )
39+ for i in range (1 , len (parts )):
40+ prefix = "." .join (parts [:i ])
41+ if prefix in nested_fields :
42+ return prefix
43+ return None
44+
45+
46+ def _get_logical_condition_nested_path (condition : dict [str , Any ], nested_fields : set [str ]) -> str | None :
47+ """Returns the common nested path if all leaf comparisons in a logical sub-group share one, else None."""
48+ paths : set [str | None ] = set ()
49+ for c in condition .get ("conditions" , []):
50+ if "field" in c :
51+ paths .add (_get_nested_path (c , nested_fields ))
52+ elif "operator" in c and "conditions" in c :
53+ paths .add (_get_logical_condition_nested_path (c , nested_fields ))
54+ else :
55+ return None
56+ if len (paths ) == 1 :
57+ return next (iter (paths ))
58+ return None
59+
60+
61+ def _group_nested_conditions (
62+ raw_conditions : list [dict [str , Any ]],
63+ nested_fields : set [str ],
64+ operator : str ,
65+ ) -> list [dict [str , Any ]]:
66+ """
67+ Group conditions by nested path and wrap each group in a `nested` query.
2168
69+ Both direct comparisons and logical sub-groups whose leaves all target the
70+ same nested path are absorbed into the group so that they match within the
71+ same array element.
72+ """
73+ nested_groups : dict [str , list [dict [str , Any ]]] = {}
74+ flat_raw : list [dict [str , Any ]] = []
75+
76+ for c in raw_conditions :
77+ nested_path = _get_nested_path (c , nested_fields )
78+ if nested_path is None and "operator" in c and "conditions" in c :
79+ nested_path = _get_logical_condition_nested_path (c , nested_fields )
80+ if nested_path :
81+ nested_groups .setdefault (nested_path , []).append (c )
82+ else :
83+ flat_raw .append (c )
84+
85+ conditions = [_parse_comparison_condition (c , nested_fields ) for c in flat_raw ]
86+
87+ for path , group in nested_groups .items ():
88+ inner = []
89+ for c in group :
90+ if "operator" in c and "conditions" in c :
91+ # Logical sub-group: parse without nested awareness to avoid
92+ # redundant nested wrapping — the outer code handles that.
93+ inner .append (_parse_logical_condition (c , nested_fields = None ))
94+ else :
95+ inner .append (_parse_comparison_condition (c , nested_fields ))
96+ if len (inner ) > 1 :
97+ inner = _normalize_ranges (inner )
98+ if len (inner ) == 1 :
99+ conditions .append ({"nested" : {"path" : path , "query" : inner [0 ]}})
100+ elif operator == "OR" :
101+ conditions .append ({"nested" : {"path" : path , "query" : {"bool" : {"should" : inner }}}})
102+ else :
103+ conditions .append ({"nested" : {"path" : path , "query" : {"bool" : {"must" : inner }}}})
22104
23- def _parse_logical_condition (condition : dict [str , Any ]) -> dict [str , Any ]:
105+ return conditions
106+
107+
108+ def _parse_logical_condition (condition : dict [str , Any ], nested_fields : set [str ] | None = None ) -> dict [str , Any ]:
24109 if "operator" not in condition :
25110 msg = f"'operator' key missing in { condition } "
26111 raise FilterError (msg )
@@ -29,7 +114,12 @@ def _parse_logical_condition(condition: dict[str, Any]) -> dict[str, Any]:
29114 raise FilterError (msg )
30115
31116 operator = condition ["operator" ]
32- conditions = [_parse_comparison_condition (c ) for c in condition ["conditions" ]]
117+
118+ if nested_fields :
119+ conditions = _group_nested_conditions (condition ["conditions" ], nested_fields , operator )
120+ else :
121+ conditions = [_parse_comparison_condition (c , nested_fields ) for c in condition ["conditions" ]]
122+
33123 if len (conditions ) > 1 :
34124 conditions = _normalize_ranges (conditions )
35125 if operator == "AND" :
@@ -189,11 +279,11 @@ def _not_in(field: str, value: Any) -> dict[str, Any]:
189279}
190280
191281
192- def _parse_comparison_condition (condition : dict [str , Any ]) -> dict [str , Any ]:
282+ def _parse_comparison_condition (condition : dict [str , Any ], nested_fields : set [ str ] | None = None ) -> dict [str , Any ]:
193283 if "field" not in condition :
194284 # 'field' key is only found in comparison dictionaries.
195285 # We assume this is a logic dictionary since it's not present.
196- return _parse_logical_condition (condition )
286+ return _parse_logical_condition (condition , nested_fields )
197287 field : str = condition ["field" ]
198288
199289 if field .startswith ("meta." ):
0 commit comments