Skip to content

Commit e328337

Browse files
committed
Replace script-query blacklist with fail-closed allow-list for efficient filter
The prior containsScriptQuery walker missed any wrapper type it didn't know about, which risked silently embedding unsupported queries (e.g., NestedQueryBuilder) under knn.filter. Switch to an allow-list validator over FilterQueryBuilder's actual leaf output (term/range/wildcard/match*/query_string/simple_query_string/match_bool_prefix/exists), recurse bool and constant_score, reject script and nested with targeted messages, and fail closed on unknown shapes. Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
1 parent 154df52 commit e328337

2 files changed

Lines changed: 146 additions & 69 deletions

File tree

opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchQueryBuilder.java

Lines changed: 69 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,27 @@
66
package org.opensearch.sql.opensearch.storage.scan;
77

88
import java.util.Map;
9+
import java.util.Set;
910
import java.util.concurrent.atomic.AtomicBoolean;
1011
import java.util.function.Function;
1112
import org.apache.commons.lang3.tuple.Pair;
1213
import org.opensearch.index.query.BoolQueryBuilder;
1314
import org.opensearch.index.query.ConstantScoreQueryBuilder;
15+
import org.opensearch.index.query.ExistsQueryBuilder;
16+
import org.opensearch.index.query.MatchBoolPrefixQueryBuilder;
17+
import org.opensearch.index.query.MatchPhrasePrefixQueryBuilder;
18+
import org.opensearch.index.query.MatchPhraseQueryBuilder;
19+
import org.opensearch.index.query.MatchQueryBuilder;
20+
import org.opensearch.index.query.MultiMatchQueryBuilder;
21+
import org.opensearch.index.query.NestedQueryBuilder;
1422
import org.opensearch.index.query.QueryBuilder;
1523
import org.opensearch.index.query.QueryBuilders;
24+
import org.opensearch.index.query.QueryStringQueryBuilder;
25+
import org.opensearch.index.query.RangeQueryBuilder;
1626
import org.opensearch.index.query.ScriptQueryBuilder;
27+
import org.opensearch.index.query.SimpleQueryStringBuilder;
28+
import org.opensearch.index.query.TermQueryBuilder;
29+
import org.opensearch.index.query.WildcardQueryBuilder;
1730
import org.opensearch.sql.ast.tree.Sort;
1831
import org.opensearch.sql.ast.tree.Sort.SortOption;
1932
import org.opensearch.sql.exception.ExpressionEvaluationException;
@@ -83,11 +96,8 @@ public boolean pushDownFilter(LogicalFilter filter) {
8396
FilterQueryBuilder queryBuilder = new FilterQueryBuilder(new DefaultExpressionSerializer());
8497
Expression queryCondition = filter.getCondition();
8598

86-
// Reject WHERE predicates that reference the synthetic _score column. The planner surfaces
87-
// _score as a projectable/filterable field, but it is not a stored document field in
88-
// OpenSearch. If we let this pass through, FilterQueryBuilder produces a range query on a
89-
// non-existent field and the cluster silently returns 0 rows. Users who want a score floor
90-
// should use option='min_score=...' instead.
99+
// _score is synthetic, not a stored field; a range query on it silently returns 0 rows.
100+
// Users who want a score floor should use option='min_score=...'.
91101
if (containsScoreReference(queryCondition)) {
92102
throw new ExpressionEvaluationException(
93103
"WHERE on _score is not supported on vectorSearch()."
@@ -109,16 +119,9 @@ public boolean pushDownFilter(LogicalFilter filter) {
109119
filterPushed = true;
110120

111121
if (filterType == FilterType.EFFICIENT) {
112-
// knn.filter on AOSS/serverless vector collections rejects script queries. If any WHERE
113-
// subtree compiled to a ScriptQueryBuilder (arithmetic, function calls, CASE, date math),
114-
// refuse to embed it under knn.filter instead of shipping a request that will fail at the
115-
// cluster with an opaque error.
116-
if (containsScriptQuery(whereQuery)) {
117-
throw new ExpressionEvaluationException(
118-
"filter_type=efficient does not support predicates that compile to script queries"
119-
+ " (arithmetic, function calls, CASE, date math). Rewrite the WHERE clause to"
120-
+ " use comparable/term/range predicates, or omit filter_type.");
121-
}
122+
// Fail closed: knn.filter on AOSS rejects script queries and nested predicates expand the
123+
// preview contract. Allow-list validator beats a blacklist walker.
124+
validateEfficientFilterSafe(whereQuery);
122125
QueryBuilder rebuiltKnn = rebuildKnnWithFilter.apply(whereQuery);
123126
requestBuilder.getSourceBuilder().query(rebuiltKnn);
124127
} else {
@@ -131,10 +134,8 @@ public boolean pushDownFilter(LogicalFilter filter) {
131134

132135
@Override
133136
public boolean pushDownLimit(LogicalLimit limit) {
134-
// OFFSET on vectorSearch() silently rewrites the search window and drops top results, which
135-
// defeats the entire point of a relevance-ranked top-k query. The parent path would push
136-
// `from: <offset>` into the OpenSearch request; reject it explicitly so users get a clear
137-
// error instead of surprising result shifts.
137+
// OFFSET would shift the search window and silently drop top results; reject with a clear
138+
// error rather than have the parent path push `from: <offset>` into the request.
138139
if (limit.getOffset() != null && limit.getOffset() != 0) {
139140
throw new ExpressionEvaluationException(
140141
"OFFSET is not supported on vectorSearch(). Remove OFFSET and use LIMIT only.");
@@ -163,9 +164,8 @@ public boolean pushDownSort(LogicalSort sort) {
163164
"vectorSearch only supports ORDER BY _score DESC; _score ASC is not supported");
164165
}
165166
}
166-
// _score DESC is the natural knn order — no need to push the sort itself to OpenSearch.
167-
// Preserve the parent's sort.getCount() → limit pushdown contract: SQL always sets count=0,
168-
// but PPL or future callers may set a non-zero count to combine sort+limit in one node.
167+
// _score DESC is knn's natural order, so the sort itself is not pushed. Preserve the
168+
// parent's sort.getCount() → limit contract; SQL sends 0, PPL may combine sort+limit.
169169
if (sort.getCount() != 0) {
170170
validateLimitWithinK(sort.getCount());
171171
limitPushed = true;
@@ -185,20 +185,14 @@ private void validateLimitWithinK(int limit) {
185185
}
186186
}
187187

188-
/**
189-
* Returns true if any subexpression is a ReferenceExpression whose attr is "_score". Uses the
190-
* standard ExpressionNodeVisitor so compound predicates (AND/OR/NOT, function calls, CASE) are
191-
* walked uniformly.
192-
*/
188+
// True if any ReferenceExpression in the tree names _score (case-insensitive, so quoted/
189+
// backticked variants cannot bypass the guard).
193190
private static boolean containsScoreReference(Expression expr) {
194191
AtomicBoolean found = new AtomicBoolean(false);
195192
expr.accept(
196193
new ExpressionNodeVisitor<Void, Void>() {
197194
@Override
198195
public Void visitReference(ReferenceExpression node, Void context) {
199-
// Case-insensitive match so _SCORE, _Score, and any quoted/backticked variant that
200-
// preserves original casing cannot bypass the guard and reach the cluster as a range
201-
// query on a non-existent field.
202196
if (node.getAttr() != null && "_score".equalsIgnoreCase(node.getAttr())) {
203197
found.set(true);
204198
}
@@ -209,48 +203,60 @@ public Void visitReference(ReferenceExpression node, Void context) {
209203
return found.get();
210204
}
211205

212-
/**
213-
* Recursively scans a QueryBuilder tree for any ScriptQueryBuilder. Handles the common wrappers
214-
* that FilterQueryBuilder produces: BoolQueryBuilder (must/should/mustNot/filter) and
215-
* ConstantScoreQueryBuilder. Other QueryBuilder subtypes are leaves for our purposes: if the
216-
* top-level builder itself is a ScriptQueryBuilder we catch it, otherwise we treat it as
217-
* script-free.
218-
*/
219-
private static boolean containsScriptQuery(QueryBuilder qb) {
206+
// Allow-list of leaf query types FilterQueryBuilder emits today. Any new wrapper or container
207+
// appearing here must fail closed rather than silently embed under knn.filter.
208+
private static final Set<Class<? extends QueryBuilder>> SAFE_EFFICIENT_FILTER_LEAVES =
209+
Set.of(
210+
TermQueryBuilder.class,
211+
RangeQueryBuilder.class,
212+
WildcardQueryBuilder.class,
213+
MatchQueryBuilder.class,
214+
MatchPhraseQueryBuilder.class,
215+
MatchPhrasePrefixQueryBuilder.class,
216+
MultiMatchQueryBuilder.class,
217+
QueryStringQueryBuilder.class,
218+
SimpleQueryStringBuilder.class,
219+
MatchBoolPrefixQueryBuilder.class,
220+
ExistsQueryBuilder.class);
221+
222+
// Package-private for direct branch coverage in unit tests. Fail-closed: recurse known
223+
// containers, reject ScriptQueryBuilder/NestedQueryBuilder with targeted messages, allow
224+
// listed leaves, reject everything else as unsupported shape.
225+
static void validateEfficientFilterSafe(QueryBuilder qb) {
220226
if (qb == null) {
221-
return false;
227+
return;
222228
}
223229
if (qb instanceof ScriptQueryBuilder) {
224-
return true;
230+
throw new ExpressionEvaluationException(
231+
"filter_type=efficient does not support predicates that compile to script queries"
232+
+ " (arithmetic, function calls, CASE, date math). Rewrite the WHERE clause to"
233+
+ " use comparable/term/range predicates, or omit filter_type.");
225234
}
226235
if (qb instanceof BoolQueryBuilder) {
227236
BoolQueryBuilder bool = (BoolQueryBuilder) qb;
228-
for (QueryBuilder child : bool.must()) {
229-
if (containsScriptQuery(child)) {
230-
return true;
231-
}
232-
}
233-
for (QueryBuilder child : bool.filter()) {
234-
if (containsScriptQuery(child)) {
235-
return true;
236-
}
237-
}
238-
for (QueryBuilder child : bool.should()) {
239-
if (containsScriptQuery(child)) {
240-
return true;
241-
}
242-
}
243-
for (QueryBuilder child : bool.mustNot()) {
244-
if (containsScriptQuery(child)) {
245-
return true;
246-
}
247-
}
248-
return false;
237+
bool.must().forEach(VectorSearchQueryBuilder::validateEfficientFilterSafe);
238+
bool.filter().forEach(VectorSearchQueryBuilder::validateEfficientFilterSafe);
239+
bool.should().forEach(VectorSearchQueryBuilder::validateEfficientFilterSafe);
240+
bool.mustNot().forEach(VectorSearchQueryBuilder::validateEfficientFilterSafe);
241+
return;
249242
}
250243
if (qb instanceof ConstantScoreQueryBuilder) {
251-
return containsScriptQuery(((ConstantScoreQueryBuilder) qb).innerQuery());
244+
validateEfficientFilterSafe(((ConstantScoreQueryBuilder) qb).innerQuery());
245+
return;
246+
}
247+
if (qb instanceof NestedQueryBuilder) {
248+
throw new ExpressionEvaluationException(
249+
"filter_type=efficient does not support nested predicates in this preview."
250+
+ " Rewrite the WHERE clause using non-nested fields or omit filter_type.");
251+
}
252+
if (SAFE_EFFICIENT_FILTER_LEAVES.contains(qb.getClass())) {
253+
return;
252254
}
253-
return false;
255+
throw new ExpressionEvaluationException(
256+
"filter_type=efficient encountered an unsupported filter query shape: "
257+
+ qb.getClass().getSimpleName()
258+
+ ". Rewrite the WHERE clause using simple term/range/bool predicates,"
259+
+ " or omit filter_type.");
254260
}
255261

256262
@Override

opensearch/src/test/java/org/opensearch/sql/opensearch/storage/scan/VectorSearchQueryBuilderTest.java

Lines changed: 77 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,11 @@
1717
import java.util.List;
1818
import java.util.Map;
1919
import java.util.function.Function;
20+
import org.apache.lucene.search.join.ScoreMode;
2021
import org.junit.jupiter.api.Test;
2122
import org.opensearch.index.query.BoolQueryBuilder;
2223
import org.opensearch.index.query.QueryBuilder;
24+
import org.opensearch.index.query.QueryBuilders;
2325
import org.opensearch.index.query.WrapperQueryBuilder;
2426
import org.opensearch.sql.common.setting.Settings;
2527
import org.opensearch.sql.data.type.ExprCoreType;
@@ -712,9 +714,8 @@ void pushDownFilter_efficient_rejectsScriptSubtree() {
712714
true,
713715
rebuildWithFilter);
714716

715-
// WHERE price + 1 > 100: the outer > is not a direct field ref, so FilterQueryBuilder
716-
// falls through to buildScriptQuery() and produces a ScriptQueryBuilder. Under filter_type=
717-
// efficient that would be embedded under knn.filter, which AOSS rejects.
717+
// price + 1 > 100 lowers to a ScriptQueryBuilder; embedding it under knn.filter would
718+
// trigger the AOSS rejection this PR guards against.
718719
var condition =
719720
DSL.greater(
720721
DSL.add(new ReferenceExpression("price", ExprCoreType.INTEGER), DSL.literal(1)),
@@ -734,9 +735,7 @@ void pushDownFilter_efficient_rejectsScriptSubtree() {
734735

735736
@Test
736737
void pushDownFilter_post_allowsScriptSubtree() {
737-
// Under POST mode, script queries are fine: the WHERE lives in an outer bool.filter
738-
// that OpenSearch evaluates against hits, not inside knn.filter. Verify we do not
739-
// false-positive on the POST branch.
738+
// POST puts WHERE in an outer bool.filter, not under knn.filter, so scripts are fine.
740739
var requestBuilder = createRequestBuilder();
741740
var knnQuery = new WrapperQueryBuilder("{\"knn\":{}}");
742741
var builder = new VectorSearchQueryBuilder(requestBuilder, knnQuery, Map.of("k", "5"));
@@ -776,6 +775,78 @@ void buildSucceedsRadialWithSortEmbeddedLimit() {
776775
assertNotNull(result);
777776
}
778777

778+
// ── filter_type=efficient allow-list validator ──────────────────────
779+
780+
@Test
781+
void validateEfficientFilterSafe_rejectsNestedQuery() {
782+
// FilterQueryBuilder emits NestedQueryBuilder for SQL nested(field, pred); nested vector
783+
// semantics are outside the P0 preview so rejection must be targeted, not generic.
784+
QueryBuilder nested =
785+
QueryBuilders.nestedQuery(
786+
"parent", QueryBuilders.termQuery("parent.f", "v"), ScoreMode.None);
787+
788+
ExpressionEvaluationException ex =
789+
assertThrows(
790+
ExpressionEvaluationException.class,
791+
() -> VectorSearchQueryBuilder.validateEfficientFilterSafe(nested));
792+
assertTrue(
793+
ex.getMessage().contains("filter_type=efficient does not support nested predicates"),
794+
"Expected targeted nested rejection, got: " + ex.getMessage());
795+
}
796+
797+
@Test
798+
void validateEfficientFilterSafe_rejectsNestedBuriedInBool() {
799+
// AND-ing nested() with a term must still be caught; otherwise the guard is trivially bypassed.
800+
QueryBuilder tree =
801+
QueryBuilders.boolQuery()
802+
.filter(QueryBuilders.termQuery("state", "CA"))
803+
.filter(
804+
QueryBuilders.nestedQuery(
805+
"parent", QueryBuilders.termQuery("parent.f", "v"), ScoreMode.None));
806+
807+
ExpressionEvaluationException ex =
808+
assertThrows(
809+
ExpressionEvaluationException.class,
810+
() -> VectorSearchQueryBuilder.validateEfficientFilterSafe(tree));
811+
assertTrue(ex.getMessage().contains("nested predicates"));
812+
}
813+
814+
@Test
815+
void validateEfficientFilterSafe_acceptsBoolOfSafeLeaves() {
816+
QueryBuilder tree =
817+
QueryBuilders.boolQuery()
818+
.filter(QueryBuilders.termQuery("category", "shoes"))
819+
.filter(QueryBuilders.rangeQuery("price").gte(80).lte(150));
820+
821+
VectorSearchQueryBuilder.validateEfficientFilterSafe(tree);
822+
}
823+
824+
@Test
825+
void validateEfficientFilterSafe_acceptsExistsLeaf() {
826+
// IS NOT NULL lowers to ExistsQueryBuilder; locks in allow-list coverage for that path.
827+
QueryBuilder exists = QueryBuilders.existsQuery("brand");
828+
829+
VectorSearchQueryBuilder.validateEfficientFilterSafe(exists);
830+
}
831+
832+
@Test
833+
void validateEfficientFilterSafe_rejectsUnknownWrapper() {
834+
// Unknown shapes must fail closed so future FilterQueryBuilder additions cannot silently
835+
// re-introduce the AOSS-rejection bug class this PR is guarding against.
836+
QueryBuilder unknown = new WrapperQueryBuilder("{\"term\":{\"f\":\"v\"}}");
837+
838+
ExpressionEvaluationException ex =
839+
assertThrows(
840+
ExpressionEvaluationException.class,
841+
() -> VectorSearchQueryBuilder.validateEfficientFilterSafe(unknown));
842+
assertTrue(
843+
ex.getMessage().contains("unsupported filter query shape"),
844+
"Expected unknown-shape rejection, got: " + ex.getMessage());
845+
assertTrue(
846+
ex.getMessage().contains("WrapperQueryBuilder"),
847+
"Expected class name in message, got: " + ex.getMessage());
848+
}
849+
779850
private OpenSearchRequestBuilder createRequestBuilder() {
780851
return new OpenSearchRequestBuilder(
781852
mock(OpenSearchExprValueFactory.class), 10000, mock(Settings.class));

0 commit comments

Comments
 (0)