77from haystack .errors import FilterError
88
99
10- def normalize_filters (filters : dict [str , Any ]) -> dict [str , Any ]:
10+ def normalize_filters (filters : dict [str , Any ], nested_fields : set [ str ] | None = None ) -> dict [str , Any ]:
1111 """
1212 Converts Haystack filters in OpenSearch compatible filters.
13+
14+ :param filters: Haystack filter dictionary.
15+ :param nested_fields: Set of metadata field paths that are mapped as ``nested`` type in OpenSearch.
16+ When provided, conditions targeting sub-fields of these paths are wrapped in ``nested`` queries.
1317 """
1418 if not isinstance (filters , dict ):
1519 msg = "Filters must be a dictionary"
1620 raise FilterError (msg )
1721
1822 if "field" in filters :
19- return {"bool" : {"must" : _parse_comparison_condition (filters )}}
20- return _parse_logical_condition (filters )
23+ parsed = _parse_comparison_condition (filters , nested_fields )
24+ if nested_fields :
25+ nested_path = _get_nested_path (filters , nested_fields )
26+ if nested_path :
27+ return {"bool" : {"must" : {"nested" : {"path" : nested_path , "query" : parsed }}}}
28+ return {"bool" : {"must" : parsed }}
29+ return _parse_logical_condition (filters , nested_fields )
30+
31+
32+ def _get_nested_path (condition : dict [str , Any ], nested_fields : set [str ]) -> str | None :
33+ """Returns the nested path for a comparison condition, or None."""
34+ if not (field := condition .get ("field" )):
35+ return None
36+ if field .startswith ("meta." ):
37+ field = field [5 :]
38+ parts = field .split ("." )
39+ for i in range (1 , len (parts )):
40+ prefix = "." .join (parts [:i ])
41+ if prefix in nested_fields :
42+ return prefix
43+ return None
44+
45+
46+ def _get_logical_condition_nested_path (condition : dict [str , Any ], nested_fields : set [str ]) -> str | None :
47+ """Returns the common nested path if all leaf comparisons in a logical sub-group share one, else None."""
48+ paths : set [str | None ] = set ()
49+ for c in condition .get ("conditions" , []):
50+ if "field" in c :
51+ paths .add (_get_nested_path (c , nested_fields ))
52+ elif "operator" in c and "conditions" in c :
53+ paths .add (_get_logical_condition_nested_path (c , nested_fields ))
54+ else :
55+ return None
56+ if len (paths ) == 1 :
57+ return next (iter (paths ))
58+ return None
59+
60+
61+ def _group_nested_conditions (
62+ raw_conditions : list [dict [str , Any ]],
63+ nested_fields : set [str ],
64+ operator : str ,
65+ ) -> list [dict [str , Any ]]:
66+ """Group conditions by nested path and wrap each group in a ``nested`` query.
67+
68+ Both direct comparisons and logical sub-groups whose leaves all target the
69+ same nested path are absorbed into the group so that they match within the
70+ same array element.
71+ """
72+ nested_groups : dict [str , list [dict [str , Any ]]] = {}
73+ flat_raw : list [dict [str , Any ]] = []
74+
75+ for c in raw_conditions :
76+ nested_path = _get_nested_path (c , nested_fields )
77+ if nested_path is None and "operator" in c and "conditions" in c :
78+ nested_path = _get_logical_condition_nested_path (c , nested_fields )
79+ if nested_path :
80+ nested_groups .setdefault (nested_path , []).append (c )
81+ else :
82+ flat_raw .append (c )
83+
84+ conditions = [_parse_comparison_condition (c , nested_fields ) for c in flat_raw ]
85+
86+ for path , group in nested_groups .items ():
87+ inner = []
88+ for c in group :
89+ if "operator" in c and "conditions" in c :
90+ # Logical sub-group: parse without nested awareness to avoid
91+ # redundant nested wrapping — the outer code handles that.
92+ inner .append (_parse_logical_condition (c , nested_fields = None ))
93+ else :
94+ inner .append (_parse_comparison_condition (c , nested_fields ))
95+ if len (inner ) > 1 :
96+ inner = _normalize_ranges (inner )
97+ if len (inner ) == 1 :
98+ conditions .append ({"nested" : {"path" : path , "query" : inner [0 ]}})
99+ elif operator == "OR" :
100+ conditions .append ({"nested" : {"path" : path , "query" : {"bool" : {"should" : inner }}}})
101+ else :
102+ conditions .append ({"nested" : {"path" : path , "query" : {"bool" : {"must" : inner }}}})
21103
104+ return conditions
22105
23- def _parse_logical_condition (condition : dict [str , Any ]) -> dict [str , Any ]:
106+
107+ def _parse_logical_condition (condition : dict [str , Any ], nested_fields : set [str ] | None = None ) -> dict [str , Any ]:
24108 if "operator" not in condition :
25109 msg = f"'operator' key missing in { condition } "
26110 raise FilterError (msg )
@@ -29,7 +113,12 @@ def _parse_logical_condition(condition: dict[str, Any]) -> dict[str, Any]:
29113 raise FilterError (msg )
30114
31115 operator = condition ["operator" ]
32- conditions = [_parse_comparison_condition (c ) for c in condition ["conditions" ]]
116+
117+ if nested_fields :
118+ conditions = _group_nested_conditions (condition ["conditions" ], nested_fields , operator )
119+ else :
120+ conditions = [_parse_comparison_condition (c , nested_fields ) for c in condition ["conditions" ]]
121+
33122 if len (conditions ) > 1 :
34123 conditions = _normalize_ranges (conditions )
35124 if operator == "AND" :
@@ -189,11 +278,11 @@ def _not_in(field: str, value: Any) -> dict[str, Any]:
189278}
190279
191280
192- def _parse_comparison_condition (condition : dict [str , Any ]) -> dict [str , Any ]:
281+ def _parse_comparison_condition (condition : dict [str , Any ], nested_fields : set [ str ] | None = None ) -> dict [str , Any ]:
193282 if "field" not in condition :
194283 # 'field' key is only found in comparison dictionaries.
195284 # We assume this is a logic dictionary since it's not present.
196- return _parse_logical_condition (condition )
285+ return _parse_logical_condition (condition , nested_fields )
197286 field : str = condition ["field" ]
198287
199288 if field .startswith ("meta." ):
0 commit comments