Skip to content

Commit dfc021f

Browse files
mengweiericclaude
andcommitted
WIP: fix pushDownFilter to preserve knn scores with WHERE clause
Introduce VectorSearchQueryBuilder that keeps the knn query in a bool.must (scoring) context while placing WHERE filters in bool.filter (non-scoring) context. Without this fix, both knn and WHERE queries were wrapped in bool.filter, which destroyed knn relevance scores. Verified: knn + WHERE now returns correct scores matching DSL baseline. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent c539d3c commit dfc021f

4 files changed

Lines changed: 110 additions & 10 deletions

File tree

opensearch/src/main/java/org/opensearch/sql/opensearch/storage/VectorSearchIndex.java

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder;
1616
import org.opensearch.sql.opensearch.storage.scan.OpenSearchIndexScan;
1717
import org.opensearch.sql.opensearch.storage.scan.OpenSearchIndexScanBuilder;
18+
import org.opensearch.sql.opensearch.storage.scan.VectorSearchQueryBuilder;
1819
import org.opensearch.sql.storage.read.TableScanBuilder;
1920

2021
/**
@@ -45,20 +46,21 @@ public VectorSearchIndex(
4546
public TableScanBuilder createScanBuilder() {
4647
final TimeValue cursorKeepAlive =
4748
getSettings().getSettingValue(Settings.Key.SQL_CURSOR_KEEP_ALIVE);
48-
var builder = createRequestBuilder();
49+
var requestBuilder = createRequestBuilder();
4950

50-
// Inject knn query
51-
builder.pushDownFilter(buildKnnQuery());
52-
builder.pushDownTrackedScore(true);
51+
// Use VectorSearchQueryBuilder to keep knn in must (scoring) context.
52+
// WHERE filters will be placed in filter (non-scoring) context.
53+
var queryBuilder = new VectorSearchQueryBuilder(requestBuilder, buildKnnQuery());
54+
requestBuilder.pushDownTrackedScore(true);
5355

5456
Function<OpenSearchRequestBuilder, OpenSearchIndexScan> createScanOperator =
55-
requestBuilder ->
57+
rb ->
5658
new OpenSearchIndexScan(
5759
getClient(),
58-
requestBuilder.getMaxResponseSize(),
59-
requestBuilder.build(
60+
rb.getMaxResponseSize(),
61+
rb.build(
6062
getIndexName(), cursorKeepAlive, getClient(), getFieldTypes().isEmpty()));
61-
return new OpenSearchIndexScanBuilder(builder, createScanOperator);
63+
return new OpenSearchIndexScanBuilder(queryBuilder, createScanOperator);
6264
}
6365

6466
private QueryBuilder buildKnnQuery() {

opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/OpenSearchIndexScanBuilder.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ public OpenSearchIndexScanBuilder(
4545
this.scanFactory = scanFactory;
4646
}
4747

48-
/** Constructor used for unit tests. */
49-
protected OpenSearchIndexScanBuilder(
48+
/** Constructor that accepts a custom PushDownQueryBuilder delegate. */
49+
public OpenSearchIndexScanBuilder(
5050
PushDownQueryBuilder translator,
5151
Function<OpenSearchRequestBuilder, OpenSearchIndexScan> scanFactory) {
5252
this.delegate = translator;
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.opensearch.storage.scan;
7+
8+
import org.opensearch.index.query.BoolQueryBuilder;
9+
import org.opensearch.index.query.QueryBuilder;
10+
import org.opensearch.index.query.QueryBuilders;
11+
import org.opensearch.sql.expression.Expression;
12+
import org.opensearch.sql.expression.FunctionExpression;
13+
import org.opensearch.sql.expression.function.OpenSearchFunctions;
14+
import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder;
15+
import org.opensearch.sql.opensearch.storage.script.filter.FilterQueryBuilder;
16+
import org.opensearch.sql.opensearch.storage.serde.DefaultExpressionSerializer;
17+
import org.opensearch.sql.planner.logical.LogicalFilter;
18+
19+
/**
20+
* Query builder for vector search that keeps the knn query in a scoring (must) context and puts
21+
* WHERE filters in a non-scoring (filter) context. This prevents the knn relevance scores from
22+
* being destroyed when a WHERE clause is pushed down.
23+
*
24+
* <p>Without this, the default pushDownFilter wraps both queries into bool.filter, which is a
25+
* non-scoring context.
26+
*/
27+
public class VectorSearchQueryBuilder extends OpenSearchIndexScanQueryBuilder {
28+
29+
private final QueryBuilder knnQuery;
30+
31+
public VectorSearchQueryBuilder(OpenSearchRequestBuilder requestBuilder, QueryBuilder knnQuery) {
32+
super(requestBuilder);
33+
// Set knn as the initial query (scoring context)
34+
requestBuilder.getSourceBuilder().query(knnQuery);
35+
this.knnQuery = knnQuery;
36+
}
37+
38+
@Override
39+
public boolean pushDownFilter(LogicalFilter filter) {
40+
FilterQueryBuilder queryBuilder = new FilterQueryBuilder(new DefaultExpressionSerializer());
41+
Expression queryCondition = filter.getCondition();
42+
QueryBuilder whereQuery = queryBuilder.build(queryCondition);
43+
44+
// Combine: knn in must (scores), WHERE in filter (no scoring impact)
45+
BoolQueryBuilder combined =
46+
QueryBuilders.boolQuery().must(knnQuery).filter(whereQuery);
47+
requestBuilder.getSourceBuilder().query(combined);
48+
return true;
49+
}
50+
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.opensearch.storage.scan;
7+
8+
import static org.junit.jupiter.api.Assertions.assertTrue;
9+
import static org.mockito.Mockito.mock;
10+
11+
import org.junit.jupiter.api.Test;
12+
import org.opensearch.index.query.QueryBuilder;
13+
import org.opensearch.index.query.WrapperQueryBuilder;
14+
import org.opensearch.sql.common.setting.Settings;
15+
import org.opensearch.sql.opensearch.data.value.OpenSearchExprValueFactory;
16+
import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder;
17+
18+
class VectorSearchQueryBuilderTest {
19+
20+
@Test
21+
void knnQuerySetAsScoringQuery() {
22+
var requestBuilder = createRequestBuilder();
23+
var knnQuery = new WrapperQueryBuilder("eyJrbm4iOnt9fQ==");
24+
25+
new VectorSearchQueryBuilder(requestBuilder, knnQuery);
26+
27+
QueryBuilder query = requestBuilder.getSourceBuilder().query();
28+
assertTrue(query instanceof WrapperQueryBuilder,
29+
"knn query should be set directly as top-level query (scoring context)");
30+
}
31+
32+
@Test
33+
void knnQueryNotWrappedInFilterWhenNoWhere() {
34+
var requestBuilder = createRequestBuilder();
35+
var knnQuery = new WrapperQueryBuilder("eyJrbm4iOnt9fQ==");
36+
37+
new VectorSearchQueryBuilder(requestBuilder, knnQuery);
38+
39+
QueryBuilder query = requestBuilder.getSourceBuilder().query();
40+
assertTrue(query instanceof WrapperQueryBuilder,
41+
"Without WHERE clause, knn query should NOT be wrapped in bool.filter");
42+
}
43+
44+
private OpenSearchRequestBuilder createRequestBuilder() {
45+
return new OpenSearchRequestBuilder(
46+
mock(OpenSearchExprValueFactory.class), 10000, mock(Settings.class));
47+
}
48+
}

0 commit comments

Comments
 (0)