Skip to content

Commit 5f8151d

Browse files
committed
Reject OFFSET, WHERE on _score, and script subtrees under filter_type=efficient
Three bug fixes in VectorSearchQueryBuilder that close silent-data-loss paths on vectorSearch() relations: 1. OFFSET on vectorSearch(): pushDownLimit now rejects any LogicalLimit with a non-zero offset. The parent path would push `from: <offset>` into the OpenSearch request, silently shifting the top-k window and dropping the highest-scoring results. LogicalSort carries only a count (no offset), so no change is needed on the sort path. 2. WHERE on _score: pushDownFilter walks the condition with an ExpressionNodeVisitor before compiling to DSL and rejects any ReferenceExpression whose attr is "_score". Previously the synthetic _score column compiled to a range query on a non-existent stored field, which returned zero rows at the cluster with no error. Users who want a score floor should use option='min_score=...'. 3. Script subtrees under filter_type=efficient: after building the WHERE QueryBuilder, the EFFICIENT branch recursively scans for any ScriptQueryBuilder (traversing BoolQueryBuilder's must/filter/should/ mustNot lists and ConstantScoreQueryBuilder wrappers). If found, refuse to embed it under knn.filter. AOSS and serverless vector collections reject script queries inside knn.filter; without this guard the user saw an opaque cluster-side failure instead of a clear plan-time error. Unit tests cover each rejection (including the compound predicate path for _score and the POST-mode negative case for script predicates). Integration tests assert 400 status and the user-facing message from the SQL endpoint. Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
1 parent fa444fe commit 5f8151d

3 files changed

Lines changed: 295 additions & 0 deletions

File tree

integ-test/src/test/java/org/opensearch/sql/sql/VectorSearchIT.java

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,65 @@ public void testBareAggregateRejects() throws IOException {
263263
containsString("Aggregations are not supported on vectorSearch() relations"));
264264
}
265265

266+
// ── OFFSET / WHERE _score / filter_type=efficient script rejection ───
267+
268+
@Test
269+
public void testOffsetRejected() throws IOException {
270+
ResponseException ex =
271+
expectThrows(
272+
ResponseException.class,
273+
() ->
274+
executeQuery(
275+
"SELECT v._id FROM vectorSearch(table='"
276+
+ TEST_INDEX
277+
+ "', field='embedding', "
278+
+ "vector='[1.0, 2.0]', option='k=5') AS v "
279+
+ "LIMIT 5 OFFSET 2"));
280+
281+
assertThat(ex.getMessage(), containsString("OFFSET is not supported on vectorSearch()"));
282+
assertThat(ex.getMessage(), containsString("LIMIT only"));
283+
}
284+
285+
@Test
286+
public void testScoreInWhereRejected() throws IOException {
287+
ResponseException ex =
288+
expectThrows(
289+
ResponseException.class,
290+
() ->
291+
executeQuery(
292+
"SELECT v._id FROM vectorSearch(table='"
293+
+ TEST_INDEX
294+
+ "', field='embedding', "
295+
+ "vector='[1.0, 2.0]', option='k=5') AS v "
296+
+ "WHERE v._score > 0.5 "
297+
+ "LIMIT 5"));
298+
299+
assertThat(ex.getMessage(), containsString("WHERE on _score is not supported"));
300+
assertThat(ex.getMessage(), containsString("min_score"));
301+
}
302+
303+
@Test
304+
public void testEfficientModeRejectsScriptPredicate() throws IOException {
305+
// WHERE age + 1 > 30 compiles to a ScriptQueryBuilder under the hood because the outer >
306+
// is applied to an arithmetic expression, not a direct field reference. Efficient mode
307+
// cannot embed script queries under knn.filter, so this must be rejected up front with a
308+
// clear remediation hint instead of a cluster-side failure.
309+
ResponseException ex =
310+
expectThrows(
311+
ResponseException.class,
312+
() ->
313+
executeQuery(
314+
"SELECT v._id FROM vectorSearch(table='"
315+
+ TEST_INDEX
316+
+ "', field='embedding', "
317+
+ "vector='[1.0, 2.0]', option='k=5,filter_type=efficient') AS v "
318+
+ "WHERE v.age + 1 > 30 "
319+
+ "LIMIT 5"));
320+
321+
assertThat(ex.getMessage(), containsString("filter_type=efficient does not support"));
322+
assertThat(ex.getMessage(), containsString("script queries"));
323+
}
324+
266325
// ── k-NN plugin capability check ──────────────────────────────────────
267326
// The default integ-test cluster does not have the k-NN plugin installed. Execution-path
268327
// queries against vectorSearch() should therefore fail with the clear "k-NN plugin missing"

opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchQueryBuilder.java

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,19 @@
66
package org.opensearch.sql.opensearch.storage.scan;
77

88
import java.util.Map;
9+
import java.util.concurrent.atomic.AtomicBoolean;
910
import java.util.function.Function;
1011
import org.apache.commons.lang3.tuple.Pair;
1112
import org.opensearch.index.query.BoolQueryBuilder;
13+
import org.opensearch.index.query.ConstantScoreQueryBuilder;
1214
import org.opensearch.index.query.QueryBuilder;
1315
import org.opensearch.index.query.QueryBuilders;
16+
import org.opensearch.index.query.ScriptQueryBuilder;
1417
import org.opensearch.sql.ast.tree.Sort;
1518
import org.opensearch.sql.ast.tree.Sort.SortOption;
1619
import org.opensearch.sql.exception.ExpressionEvaluationException;
1720
import org.opensearch.sql.expression.Expression;
21+
import org.opensearch.sql.expression.ExpressionNodeVisitor;
1822
import org.opensearch.sql.expression.ReferenceExpression;
1923
import org.opensearch.sql.opensearch.request.OpenSearchRequestBuilder;
2024
import org.opensearch.sql.opensearch.storage.FilterType;
@@ -78,6 +82,18 @@ public VectorSearchQueryBuilder(
7882
public boolean pushDownFilter(LogicalFilter filter) {
7983
FilterQueryBuilder queryBuilder = new FilterQueryBuilder(new DefaultExpressionSerializer());
8084
Expression queryCondition = filter.getCondition();
85+
86+
// Reject WHERE predicates that reference the synthetic _score column. The planner surfaces
87+
// _score as a projectable/filterable field, but it is not a stored document field in
88+
// OpenSearch. If we let this pass through, FilterQueryBuilder produces a range query on a
89+
// non-existent field and the cluster silently returns 0 rows. Users who want a score floor
90+
// should use option='min_score=...' instead.
91+
if (containsScoreReference(queryCondition)) {
92+
throw new ExpressionEvaluationException(
93+
"WHERE on _score is not supported on vectorSearch()."
94+
+ " Use option='min_score=...' for score-floor filtering.");
95+
}
96+
8197
QueryBuilder whereQuery;
8298
try {
8399
whereQuery = queryBuilder.build(queryCondition);
@@ -93,6 +109,16 @@ public boolean pushDownFilter(LogicalFilter filter) {
93109
filterPushed = true;
94110

95111
if (filterType == FilterType.EFFICIENT) {
112+
// knn.filter on AOSS/serverless vector collections rejects script queries. If any WHERE
113+
// subtree compiled to a ScriptQueryBuilder (arithmetic, function calls, CASE, date math),
114+
// refuse to embed it under knn.filter instead of shipping a request that will fail at the
115+
// cluster with an opaque error.
116+
if (containsScriptQuery(whereQuery)) {
117+
throw new ExpressionEvaluationException(
118+
"filter_type=efficient does not support predicates that compile to script queries"
119+
+ " (arithmetic, function calls, CASE, date math). Rewrite the WHERE clause to"
120+
+ " use comparable/term/range predicates, or omit filter_type.");
121+
}
96122
QueryBuilder rebuiltKnn = rebuildKnnWithFilter.apply(whereQuery);
97123
requestBuilder.getSourceBuilder().query(rebuiltKnn);
98124
} else {
@@ -105,6 +131,14 @@ public boolean pushDownFilter(LogicalFilter filter) {
105131

106132
@Override
107133
public boolean pushDownLimit(LogicalLimit limit) {
134+
// OFFSET on vectorSearch() silently rewrites the search window and drops top results, which
135+
// defeats the entire point of a relevance-ranked top-k query. The parent path would push
136+
// `from: <offset>` into the OpenSearch request; reject it explicitly so users get a clear
137+
// error instead of surprising result shifts.
138+
if (limit.getOffset() != null && limit.getOffset() != 0) {
139+
throw new ExpressionEvaluationException(
140+
"OFFSET is not supported on vectorSearch(). Remove OFFSET and use LIMIT only.");
141+
}
108142
validateLimitWithinK(limit.getLimit());
109143
limitPushed = true;
110144
return super.pushDownLimit(limit);
@@ -151,6 +185,71 @@ private void validateLimitWithinK(int limit) {
151185
}
152186
}
153187

188+
/**
189+
* Returns true if any subexpression is a ReferenceExpression whose attr is "_score". Uses the
190+
* standard ExpressionNodeVisitor so compound predicates (AND/OR/NOT, function calls, CASE) are
191+
* walked uniformly.
192+
*/
193+
private static boolean containsScoreReference(Expression expr) {
194+
AtomicBoolean found = new AtomicBoolean(false);
195+
expr.accept(
196+
new ExpressionNodeVisitor<Void, Void>() {
197+
@Override
198+
public Void visitReference(ReferenceExpression node, Void context) {
199+
if ("_score".equals(node.getAttr())) {
200+
found.set(true);
201+
}
202+
return null;
203+
}
204+
},
205+
null);
206+
return found.get();
207+
}
208+
209+
/**
210+
* Recursively scans a QueryBuilder tree for any ScriptQueryBuilder. Handles the common wrappers
211+
* that FilterQueryBuilder produces: BoolQueryBuilder (must/should/mustNot/filter) and
212+
* ConstantScoreQueryBuilder. Other QueryBuilder subtypes are leaves for our purposes: if the
213+
* top-level builder itself is a ScriptQueryBuilder we catch it, otherwise we treat it as
214+
* script-free.
215+
*/
216+
private static boolean containsScriptQuery(QueryBuilder qb) {
217+
if (qb == null) {
218+
return false;
219+
}
220+
if (qb instanceof ScriptQueryBuilder) {
221+
return true;
222+
}
223+
if (qb instanceof BoolQueryBuilder) {
224+
BoolQueryBuilder bool = (BoolQueryBuilder) qb;
225+
for (QueryBuilder child : bool.must()) {
226+
if (containsScriptQuery(child)) {
227+
return true;
228+
}
229+
}
230+
for (QueryBuilder child : bool.filter()) {
231+
if (containsScriptQuery(child)) {
232+
return true;
233+
}
234+
}
235+
for (QueryBuilder child : bool.should()) {
236+
if (containsScriptQuery(child)) {
237+
return true;
238+
}
239+
}
240+
for (QueryBuilder child : bool.mustNot()) {
241+
if (containsScriptQuery(child)) {
242+
return true;
243+
}
244+
}
245+
return false;
246+
}
247+
if (qb instanceof ConstantScoreQueryBuilder) {
248+
return containsScriptQuery(((ConstantScoreQueryBuilder) qb).innerQuery());
249+
}
250+
return false;
251+
}
252+
154253
@Override
155254
public OpenSearchRequestBuilder build() {
156255
if (filterTypeExplicit && !filterPushed) {

opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchQueryBuilderTest.java

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -594,6 +594,143 @@ void pushDownFilterNonPushdownableWithoutExplicitFilterTypeFallsBack() {
594594
assertFalse(pushed, "Non-pushdownable filter should return false for in-memory fallback");
595595
}
596596

597+
// ── OFFSET rejection ────────────────────────────────────────────────
598+
599+
@Test
600+
void pushDownLimit_rejectsNonZeroOffset() {
601+
var requestBuilder = createRequestBuilder();
602+
var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}");
603+
var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5"));
604+
605+
var dummyChild = new LogicalValues(Collections.emptyList());
606+
// LIMIT 3 OFFSET 2: the planner passes both through LogicalLimit
607+
var limit = new LogicalLimit(dummyChild, 3, 2);
608+
609+
ExpressionEvaluationException ex =
610+
assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownLimit(limit));
611+
assertTrue(
612+
ex.getMessage().contains("OFFSET is not supported on vectorSearch()"),
613+
"Expected OFFSET rejection message, got: " + ex.getMessage());
614+
assertTrue(
615+
ex.getMessage().contains("LIMIT only"),
616+
"Expected remediation guidance in message, got: " + ex.getMessage());
617+
}
618+
619+
@Test
620+
void pushDownLimit_acceptsZeroOffset() {
621+
var requestBuilder = createRequestBuilder();
622+
var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}");
623+
var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5"));
624+
625+
var dummyChild = new LogicalValues(Collections.emptyList());
626+
var limit = new LogicalLimit(dummyChild, 3, 0);
627+
628+
// Zero offset is the normal case; must continue to succeed.
629+
assertTrue(builder.pushDownLimit(limit));
630+
}
631+
632+
// ── WHERE on _score rejection ────────────────────────────────────────
633+
634+
@Test
635+
void pushDownFilter_rejectsScoreReferenceInWhere() {
636+
var requestBuilder = createRequestBuilder();
637+
var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}");
638+
var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5"));
639+
640+
// WHERE _score > 0.5 (note: _score is a synthetic column, not a stored field)
641+
var condition =
642+
DSL.greater(new ReferenceExpression("_score", ExprCoreType.FLOAT), DSL.literal(0.5));
643+
var dummyChild = new LogicalValues(Collections.emptyList());
644+
var filter = new LogicalFilter(dummyChild, condition);
645+
646+
ExpressionEvaluationException ex =
647+
assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownFilter(filter));
648+
assertTrue(
649+
ex.getMessage().contains("WHERE on _score is not supported"),
650+
"Expected _score rejection message, got: " + ex.getMessage());
651+
assertTrue(
652+
ex.getMessage().contains("min_score"),
653+
"Expected remediation guidance pointing at option='min_score=...', got: "
654+
+ ex.getMessage());
655+
}
656+
657+
@Test
658+
void pushDownFilter_rejectsScoreReferenceInsideCompound() {
659+
var requestBuilder = createRequestBuilder();
660+
var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}");
661+
var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5"));
662+
663+
// WHERE state = 'TX' AND _score > 0.5: rejection must walk compound predicates
664+
var condition =
665+
DSL.and(
666+
DSL.equal(new ReferenceExpression("state", STRING), DSL.literal("TX")),
667+
DSL.greater(new ReferenceExpression("_score", ExprCoreType.FLOAT), DSL.literal(0.5)));
668+
var dummyChild = new LogicalValues(Collections.emptyList());
669+
var filter = new LogicalFilter(dummyChild, condition);
670+
671+
ExpressionEvaluationException ex =
672+
assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownFilter(filter));
673+
assertTrue(
674+
ex.getMessage().contains("WHERE on _score is not supported"),
675+
"Expected _score rejection message, got: " + ex.getMessage());
676+
}
677+
678+
// ── filter_type=efficient rejects script subtrees ───────────────────
679+
680+
@Test
681+
void pushDownFilter_efficient_rejectsScriptSubtree() {
682+
var requestBuilder = createRequestBuilder();
683+
var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}");
684+
Function<QueryBuilder, QueryBuilder> rebuildWithFilter =
685+
whereQuery -> new WrapperQueryBuilder("{\"knn\":{\"filter\":\"embedded\"}}");
686+
var builder =
687+
new VectorSearchQueryBuilder(
688+
requestBuilder,
689+
knnQuery,
690+
Map.of("k", "5"),
691+
FilterType.EFFICIENT,
692+
true,
693+
rebuildWithFilter);
694+
695+
// WHERE price + 1 > 100: the outer > is not a direct field ref, so FilterQueryBuilder
696+
// falls through to buildScriptQuery() and produces a ScriptQueryBuilder. Under filter_type=
697+
// efficient that would be embedded under knn.filter, which AOSS rejects.
698+
var condition =
699+
DSL.greater(
700+
DSL.add(new ReferenceExpression("price", ExprCoreType.INTEGER), DSL.literal(1)),
701+
DSL.literal(100));
702+
var dummyChild = new LogicalValues(Collections.emptyList());
703+
var filter = new LogicalFilter(dummyChild, condition);
704+
705+
ExpressionEvaluationException ex =
706+
assertThrows(ExpressionEvaluationException.class, () -> builder.pushDownFilter(filter));
707+
assertTrue(
708+
ex.getMessage().contains("filter_type=efficient does not support"),
709+
"Expected script rejection message, got: " + ex.getMessage());
710+
assertTrue(
711+
ex.getMessage().contains("script queries"),
712+
"Expected script queries guidance in message, got: " + ex.getMessage());
713+
}
714+
715+
@Test
716+
void pushDownFilter_post_allowsScriptSubtree() {
717+
// Under POST mode, script queries are fine: the WHERE lives in an outer bool.filter
718+
// that OpenSearch evaluates against hits, not inside knn.filter. Verify we do not
719+
// false-positive on the POST branch.
720+
var requestBuilder = createRequestBuilder();
721+
var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}");
722+
var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5"));
723+
724+
var condition =
725+
DSL.greater(
726+
DSL.add(new ReferenceExpression("price", ExprCoreType.INTEGER), DSL.literal(1)),
727+
DSL.literal(100));
728+
var dummyChild = new LogicalValues(Collections.emptyList());
729+
var filter = new LogicalFilter(dummyChild, condition);
730+
731+
assertTrue(builder.pushDownFilter(filter), "POST mode must still accept script predicates");
732+
}
733+
597734
@Test
598735
void buildSucceedsRadialWithSortEmbeddedLimit() {
599736
var requestBuilder = createRequestBuilder();

0 commit comments

Comments
 (0)