Skip to content

Commit fd4acfa

Browse files
authored
Merge pull request lightspeed-core#1581 from anik120/vector-io-filters
LCORE-1446: Add support for metadata filters in Solr vector search
2 parents 5b72b08 + f0732e2 commit fd4acfa

5 files changed

Lines changed: 286 additions & 15 deletions

File tree

docs/okp_guide.md

Lines changed: 118 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,7 @@ okp:
148148
* **`rag.inline`** and **`rag.tool`**: Enable OKP as the RAG source for inline context injection and for the RAG tool. Tool rag means the LLM will be provided a search tool it can choose to invoke to find relevant content and augment the user prompt. The tool may or may not be invoked. Inline means a rag search and prompt augmentation will always occur.
149149
* **`okp.offline`**: When `true`, source URLs use `parent_id` (offline/Mimir-style). When `false`, use `reference_url` (online).
150150

151-
If you want to filter the docs to a specific product, you can include a query
152-
filter such as:
151+
If you want to filter the docs to a specific product, you can include a static query filter such as:
153152

154153
```yaml
155154
okp:
@@ -160,6 +159,123 @@ okp:
160159
When you launch Lightspeed stack it will augment the Llamastack run.yaml with
161160
configuration for OKP.
162161

162+
### Dynamic Metadata Filtering
163+
164+
In addition to static filters configured in `lightspeed-stack.yaml`, you can apply **dynamic filters** per query using structured filter objects in the request. Dynamic filters are combined with static filters using AND logic.
165+
166+
#### Supported Filter Operations
167+
168+
**Comparison Filters:**
169+
- `eq` - Equal to (exact match)
170+
- `ne` - Not equal to
171+
- `in` - Value in list
172+
- `nin` - Value not in list
173+
174+
**Compound Filters:**
175+
- `and` - All filters must match
176+
- `or` - Any filter must match
177+
178+
> **Note:** Range operators (`gt`, `gte`, `lt`, `lte`) are not supported because they use lexicographic comparison on string fields, which can produce unexpected results.
179+
180+
#### Dynamic Filter Examples
181+
182+
**Simple equality filter:**
183+
184+
```bash
185+
curl -sX POST http://localhost:8080/v1/query \
186+
-H "Content-Type: application/json" \
187+
-d '{
188+
"query": "How to install ansible?",
189+
"solr": {
190+
"mode": "hybrid",
191+
"filters": {
192+
"filters": {
193+
"type": "eq",
194+
"key": "product",
195+
"value": "ansible_automation_platform"
196+
}
197+
}
198+
}
199+
}'
200+
```
201+
202+
**Multiple values with 'in' filter:**
203+
204+
```bash
205+
curl -sX POST http://localhost:8080/v1/query \
206+
-H "Content-Type: application/json" \
207+
-d '{
208+
"query": "Security best practices",
209+
"solr": {
210+
"mode": "semantic",
211+
"filters": {
212+
"filters": {
213+
"type": "in",
214+
"key": "product",
215+
"value": ["openshift_container_platform", "ansible_automation_platform", "red_hat_enterprise_linux"]
216+
}
217+
}
218+
}
219+
}'
220+
```
221+
222+
**Compound filters (AND/OR):**
223+
224+
```bash
225+
curl -sX POST http://localhost:8080/v1/query \
226+
-H "Content-Type: application/json" \
227+
-d '{
228+
"query": "Advanced configuration",
229+
"solr": {
230+
"mode": "hybrid",
231+
"filters": {
232+
"filters": {
233+
"type": "and",
234+
"filters": [
235+
{"type": "eq", "key": "product", "value": "openshift_container_platform"},
236+
{"type": "eq", "key": "version", "value": "4.21"}
237+
]
238+
}
239+
}
240+
}
241+
}'
242+
```
243+
244+
**Nested compound filters:**
245+
246+
```bash
247+
curl -sX POST http://localhost:8080/v1/query \
248+
-H "Content-Type: application/json" \
249+
-d '{
250+
"query": "Troubleshooting guide",
251+
"solr": {
252+
"mode": "hybrid",
253+
"filters": {
254+
"filters": {
255+
"type": "and",
256+
"filters": [
257+
{"type": "eq", "key": "doc_type", "value": "guide"},
258+
{
259+
"type": "or",
260+
"filters": [
261+
{"type": "eq", "key": "product", "value": "openshift_container_platform"},
262+
{"type": "eq", "key": "product", "value": "ansible_automation_platform"}
263+
]
264+
}
265+
]
266+
}
267+
}
268+
}
269+
}'
270+
```
271+
272+
#### Filter Behavior
273+
274+
- **Static filters preserved:** The configured `chunk_filter_query` (e.g., `"product:*openshift*"`) is always applied
275+
- **Dynamic filters added:** Request filters are combined with static filters using AND logic
276+
- **String escaping:** Special Solr characters in filter values are automatically escaped
277+
- **Works with all search modes:** Filters apply to `semantic`, `hybrid`, and `lexical` search modes
278+
163279
### Configure Lightspeed Stack for library mode
164280

165281
For the simplest local development, configure `lightspeed-stack.yaml` to

docs/openapi.json

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19250,12 +19250,39 @@
1925019250
}
1925119251
],
1925219252
"title": "Filters",
19253-
"description": "Solr provider filter payload passed through as params['solr'].",
19253+
"description": "Solr provider filter payload passed through as params['solr']. Supports structured metadata filters (eq, ne, in, nin comparison operators). Legacy filter-only objects (e.g. fq) are still accepted.",
1925419254
"examples": [
19255+
{
19256+
"filters": {
19257+
"key": "product",
19258+
"type": "eq",
19259+
"value": "openshift_container_platform"
19260+
}
19261+
},
19262+
{
19263+
"filters": {
19264+
"filters": [
19265+
{
19266+
"key": "product",
19267+
"type": "eq",
19268+
"value": "openshift_container_platform"
19269+
},
19270+
{
19271+
"key": "version",
19272+
"type": "in",
19273+
"value": [
19274+
"4.14",
19275+
"4.15",
19276+
"4.16"
19277+
]
19278+
}
19279+
],
19280+
"type": "and"
19281+
}
19282+
},
1925519283
{
1925619284
"fq": [
19257-
"product:*openshift*",
19258-
"product_version:*4.16*"
19285+
"product:*openshift*"
1925919286
]
1926019287
}
1926119288
]

src/models/common/query.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,38 @@ class SolrVectorSearchRequest(BaseModel):
8282
)
8383
filters: Optional[dict[str, Any]] = Field(
8484
None,
85-
description="Solr provider filter payload passed through as params['solr'].",
86-
examples=[{"fq": ["product:*openshift*", "product_version:*4.16*"]}],
85+
description=(
86+
"Solr provider filter payload passed through as params['solr']. "
87+
"Supports structured metadata filters (eq, ne, in, nin comparison operators). "
88+
"Legacy filter-only objects (e.g. fq) are still accepted."
89+
),
90+
examples=[
91+
{
92+
"filters": {
93+
"type": "eq",
94+
"key": "product",
95+
"value": "openshift_container_platform",
96+
}
97+
},
98+
{
99+
"filters": {
100+
"type": "and",
101+
"filters": [
102+
{
103+
"type": "eq",
104+
"key": "product",
105+
"value": "openshift_container_platform",
106+
},
107+
{
108+
"type": "in",
109+
"key": "version",
110+
"value": ["4.14", "4.15", "4.16"],
111+
},
112+
],
113+
}
114+
},
115+
{"fq": ["product:*openshift*"]},
116+
],
87117
)
88118

89119
@model_validator(mode="before")

src/utils/vector_search.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,11 @@ def _build_query_params(
112112
113113
Returns:
114114
Query parameters dict for vector_io.query.
115+
- mode: Solr search mode (semantic, hybrid, lexical)
116+
- filters: Solr filter payload, may contain structured metadata filters
117+
118+
Returns:
119+
Parameter dictionary for ``vector_io.query`` with extracted filters at top level.
115120
"""
116121
resolved_mode = (
117122
solr.mode
@@ -127,8 +132,23 @@ def _build_query_params(
127132
logger.debug("query_request.solr: %s", solr)
128133

129134
if solr is not None and solr.filters is not None:
130-
params["solr"] = solr.filters
131-
logger.debug("Final params with solr filters: %s", params)
135+
# Extract structured metadata filters if present in solr.filters dict
136+
# Filters need to be at top-level params for vector_io.query
137+
if isinstance(solr.filters, dict) and "filters" in solr.filters:
138+
params["filters"] = solr.filters["filters"]
139+
logger.debug("Extracted filters from solr.filters: %s", params["filters"])
140+
141+
# Pass remaining solr.filters content (legacy fq, etc.) to params["solr"]
142+
remaining_filters = {
143+
k: v for k, v in solr.filters.items() if k != "filters"
144+
}
145+
if remaining_filters:
146+
params["solr"] = remaining_filters
147+
logger.debug("Remaining solr.filters: %s", remaining_filters)
148+
else:
149+
# Legacy format: entire solr.filters dict is passed as params["solr"]
150+
params["solr"] = solr.filters
151+
logger.debug("Legacy solr.filters format: %s", params["solr"])
132152
else:
133153
logger.debug("No solr filters provided")
134154

tests/unit/utils/test_vector_search.py

Lines changed: 84 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -74,14 +74,89 @@ def test_default_params(self) -> None:
7474
assert params["mode"] == constants.SOLR_VECTOR_SEARCH_DEFAULT_MODE
7575
assert "solr" not in params
7676

77-
def test_with_solr_filters(self) -> None:
78-
"""Test parameters when solr filters are provided."""
79-
solr = SolrVectorSearchRequest.model_validate({"filter": "value"})
77+
def test_with_legacy_solr_filters(self) -> None:
78+
"""Test parameters when legacy solr filters are provided."""
79+
solr = SolrVectorSearchRequest.model_validate(
80+
{
81+
"filters": {
82+
"fq": ["platform:openshift"],
83+
},
84+
},
85+
)
86+
params = _build_query_params(solr=solr)
87+
88+
assert params["solr"] == {"fq": ["platform:openshift"]}
89+
assert params["k"] == constants.SOLR_VECTOR_SEARCH_DEFAULT_K
90+
assert "filters" not in params
91+
92+
def test_with_structured_metadata_filters(self) -> None:
93+
"""Test parameters with structured metadata filter format."""
94+
solr = SolrVectorSearchRequest.model_validate(
95+
{
96+
"filters": {
97+
"filters": {
98+
"type": "eq",
99+
"key": "platform",
100+
"value": "openshift",
101+
},
102+
},
103+
},
104+
)
105+
params = _build_query_params(solr=solr)
106+
107+
# Filters should be extracted to top-level
108+
assert "filters" in params
109+
assert params["filters"]["type"] == "eq"
110+
assert params["filters"]["key"] == "platform"
111+
assert params["filters"]["value"] == "openshift"
112+
assert params["k"] == constants.SOLR_VECTOR_SEARCH_DEFAULT_K
113+
# No remaining solr params
114+
assert "solr" not in params
115+
116+
def test_with_filters_and_other_solr_params(self) -> None:
117+
"""Test parameters with both filters and other solr-specific params."""
118+
solr = SolrVectorSearchRequest.model_validate(
119+
{
120+
"filters": {
121+
"filters": {
122+
"type": "in",
123+
"key": "version",
124+
"value": ["4.14", "4.15"],
125+
},
126+
"custom_param": "value",
127+
},
128+
},
129+
)
80130
params = _build_query_params(solr=solr)
81131

82-
assert params["solr"] == {"filter": "value"}
132+
# Filters extracted to top-level
133+
assert params["filters"]["type"] == "in"
134+
assert params["filters"]["key"] == "version"
135+
# Other params remain under solr key
136+
assert params["solr"] == {"custom_param": "value"}
83137
assert params["k"] == constants.SOLR_VECTOR_SEARCH_DEFAULT_K
84138

139+
def test_with_compound_filter(self) -> None:
140+
"""Test parameters with compound AND filter."""
141+
solr = SolrVectorSearchRequest.model_validate(
142+
{
143+
"filters": {
144+
"filters": {
145+
"type": "and",
146+
"filters": [
147+
{"type": "eq", "key": "platform", "value": "openshift"},
148+
{"type": "ne", "key": "status", "value": "archived"},
149+
],
150+
},
151+
},
152+
},
153+
)
154+
params = _build_query_params(solr=solr)
155+
156+
assert params["filters"]["type"] == "and"
157+
assert len(params["filters"]["filters"]) == 2
158+
assert "solr" not in params
159+
85160
def test_custom_mode(self) -> None:
86161
"""Request mode overrides the default Solr vector_io mode."""
87162
solr = SolrVectorSearchRequest(mode="lexical")
@@ -93,7 +168,8 @@ def test_custom_mode(self) -> None:
93168
def test_mode_with_solr_filters(self) -> None:
94169
"""Custom mode is combined with solr filter payload."""
95170
solr = SolrVectorSearchRequest(
96-
mode="semantic", filters={"fq": ["product:*openshift*"]}
171+
mode="semantic",
172+
filters={"fq": ["product:*openshift*"]},
97173
)
98174
params = _build_query_params(solr=solr)
99175

@@ -102,7 +178,9 @@ def test_mode_with_solr_filters(self) -> None:
102178

103179
def test_mode_with_only_filters(self) -> None:
104180
"""Mode is set to default value when only filters are provided."""
105-
solr = SolrVectorSearchRequest(filters={"fq": ["product:*openshift*"]})
181+
solr = SolrVectorSearchRequest(
182+
filters={"fq": ["product:*openshift*"]},
183+
)
106184
params = _build_query_params(solr=solr)
107185

108186
assert params["mode"] == constants.SOLR_VECTOR_SEARCH_DEFAULT_MODE

0 commit comments

Comments
 (0)