Skip to content

Commit fd50694

Browse files
committed
Rewrite IS NOT NULL / IS NULL to native exists DSL in v2 filter pushdown
The v2 FilterQueryBuilder's luceneQueries map did not register entries for BuiltinFunctionName.IS_NULL or IS_NOT_NULL, so visitFunction fell through to buildScriptQuery for these unary predicates. This produced a compounded script query in the pushed-down DSL, e.g.: {"script": {"script": {"source": "{\"langType\":\"v2\",\"script\":\"is not null(age)\"}", ...}}} instead of the native OpenSearch exists query. Script fallback is more expensive to evaluate, is not supported on AOSS / serverless, and diverges from what the Calcite path and downstream tooling already produce. This change adds a new ExistsQuery lucene query class that overrides the LuceneQuery.canSupport(FunctionExpression) / build(FunctionExpression) pair to accept a single ReferenceExpression argument (the standard base class contract is binary {ref, literal}). Two instances are registered in FilterQueryBuilder, one for each predicate sense, producing: IS NOT NULL col -> {"exists": {"field": "col"}} IS NULL col -> {"bool": {"must_not": [{"exists": {"field": "col"}}]}} matching the Calcite PredicateAnalyzer output for these predicates. Nested-field predicates are explicitly guarded: ExistsQuery.canSupport returns false when the argument is a nested() function, mirroring the equivalent guard in PredicateAnalyzer ("OpenSearch DSL does not handle IS_NULL / IS_NOT_NULL on nested fields correctly"). When canSupport returns false the existing default branch in FilterQueryBuilder.visitFunction falls through to the script query path, preserving the prior behavior for that edge case. This closes the tracker row "Pushdown coverage - IS NOT NULL pushes script instead of exists". The change affects all v2 SQL queries, not just vectorSearch table function queries. Test coverage: - FilterQueryBuilderTest: updated should_build_script_query_for_unsupported_lucene_query to two new assertions - should_build_exists_query_for_is_not_null and should_build_must_not_exists_query_for_is_null - pinning the new DSL shape. - ExistsPushdownIT (new): explain-plan integration tests asserting that WHERE col IS NOT NULL pushes as native exists DSL (no "script" key) and WHERE col IS NULL pushes as bool/must_not[exists]. - AggregationIT (existing) continues to pass, confirming result-level correctness is preserved for queries like "where int0 IS NOT NULL". Signed-off-by: Eric Wei <mengwei.eric@gmail.com>
1 parent fa444fe commit fd50694

4 files changed

Lines changed: 167 additions & 8 deletions

File tree

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.sql;
7+
8+
import java.io.IOException;
9+
import java.util.regex.Matcher;
10+
import java.util.regex.Pattern;
11+
import org.junit.Test;
12+
import org.opensearch.sql.legacy.SQLIntegTestCase;
13+
import org.opensearch.sql.legacy.TestsConstants;
14+
15+
/**
16+
* Explain-plan integration tests asserting that {@code IS NOT NULL} / {@code IS NULL} predicates
17+
* push down as native OpenSearch {@code exists} DSL rather than as serialized script queries.
18+
*
19+
* <p>Before this change both predicates serialized through the compounded script engine, producing
20+
* a {@code "script"} clause in the pushdown DSL. After this change the v2 filter builder emits
21+
* {@code {"exists": {"field": ...}}} directly for {@code IS NOT NULL}, and a {@code bool} query
22+
* with a single {@code must_not[exists]} child for {@code IS NULL}. This matches what downstream
23+
* tooling, serverless / AOSS, and the Calcite path already produce.
24+
*/
25+
public class ExistsPushdownIT extends SQLIntegTestCase {
26+
27+
// Anchored on the surrounding `sourceBuilder=...`, `pitId=` tokens in OpenSearchRequest's
28+
// toString() output. Test-only coupling: if that request-string format changes (token renamed,
29+
// pitId removed), this helper breaks even when the DSL shape is still correct. Update the regex
30+
// anchors if that happens.
31+
private static final Pattern SOURCE_BUILDER_JSON =
32+
Pattern.compile("sourceBuilder=(\\{.*?\\}), pitId=", Pattern.DOTALL);
33+
34+
/** Extracts and unescapes the sourceBuilder JSON embedded in the explain request string. */
35+
private static String extractSourceBuilderJson(String explain) {
36+
Matcher m = SOURCE_BUILDER_JSON.matcher(explain);
37+
assertTrue("Explain should contain sourceBuilder JSON:\n" + explain, m.find());
38+
return m.group(1).replace("\\\"", "\"");
39+
}
40+
41+
@Override
42+
protected void init() throws Exception {
43+
loadIndex(Index.ACCOUNT);
44+
}
45+
46+
private static final String TEST_INDEX = TestsConstants.TEST_INDEX_ACCOUNT;
47+
48+
@Test
49+
public void testIsNotNullPushesDownAsExistsQuery() throws IOException {
50+
String explain =
51+
explainQuery("SELECT age FROM " + TEST_INDEX + " WHERE age IS NOT NULL LIMIT 1");
52+
String sourceBuilder = extractSourceBuilderJson(explain);
53+
54+
assertTrue(
55+
"IS NOT NULL should push down as native exists DSL:\n" + sourceBuilder,
56+
sourceBuilder.contains("\"exists\""));
57+
assertTrue(
58+
"IS NOT NULL exists DSL should target the 'age' field:\n" + sourceBuilder,
59+
sourceBuilder.contains("\"field\":\"age\""));
60+
assertFalse(
61+
"IS NOT NULL should not fall through to a script query:\n" + sourceBuilder,
62+
sourceBuilder.contains("\"script\""));
63+
}
64+
65+
@Test
66+
public void testIsNullPushesDownAsMustNotExistsQuery() throws IOException {
67+
String explain = explainQuery("SELECT age FROM " + TEST_INDEX + " WHERE age IS NULL LIMIT 1");
68+
String sourceBuilder = extractSourceBuilderJson(explain);
69+
70+
assertTrue(
71+
"IS NULL should push down as bool/must_not[exists] DSL:\n" + sourceBuilder,
72+
sourceBuilder.contains("\"must_not\""));
73+
assertTrue(
74+
"IS NULL should wrap a native exists clause:\n" + sourceBuilder,
75+
sourceBuilder.contains("\"exists\""));
76+
assertTrue(
77+
"IS NULL exists DSL should target the 'age' field:\n" + sourceBuilder,
78+
sourceBuilder.contains("\"field\":\"age\""));
79+
assertFalse(
80+
"IS NULL should not fall through to a script query:\n" + sourceBuilder,
81+
sourceBuilder.contains("\"script\""));
82+
}
83+
}

opensearch/src/main/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilder.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import org.opensearch.sql.expression.function.FunctionName;
3030
import org.opensearch.sql.opensearch.storage.script.CompoundedScriptEngine.ScriptEngineType;
3131
import org.opensearch.sql.opensearch.storage.script.core.ExpressionScript;
32+
import org.opensearch.sql.opensearch.storage.script.filter.lucene.ExistsQuery;
3233
import org.opensearch.sql.opensearch.storage.script.filter.lucene.LikeQuery;
3334
import org.opensearch.sql.opensearch.storage.script.filter.lucene.LuceneQuery;
3435
import org.opensearch.sql.opensearch.storage.script.filter.lucene.NestedQuery;
@@ -86,6 +87,8 @@ public ScriptQueryUnSupportedException(String message) {
8687
.put(BuiltinFunctionName.WILDCARD_QUERY.getName(), new WildcardQuery())
8788
.put(BuiltinFunctionName.WILDCARDQUERY.getName(), new WildcardQuery())
8889
.put(BuiltinFunctionName.NESTED.getName(), new NestedQuery())
90+
.put(BuiltinFunctionName.IS_NULL.getName(), new ExistsQuery(true /* negated */))
91+
.put(BuiltinFunctionName.IS_NOT_NULL.getName(), new ExistsQuery(false))
8992
.build();
9093

9194
/**
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.opensearch.storage.script.filter.lucene;
7+
8+
import static org.opensearch.sql.analysis.NestedAnalyzer.isNestedFunction;
9+
10+
import lombok.RequiredArgsConstructor;
11+
import org.opensearch.index.query.QueryBuilder;
12+
import org.opensearch.index.query.QueryBuilders;
13+
import org.opensearch.sql.expression.FunctionExpression;
14+
import org.opensearch.sql.expression.ReferenceExpression;
15+
16+
/**
17+
* Lucene query that builds a native {@code exists} DSL fragment for {@code IS NULL} / {@code IS NOT
18+
* NULL} predicates.
19+
*
20+
* <p>This replaces the previous behavior of serializing these unary predicates as compounded script
21+
* queries. The native {@code exists} query is cheaper, AOSS / serverless compatible, and the
22+
* expected DSL shape downstream consumers look for.
23+
*
24+
* <p>Unlike most {@link LuceneQuery} subclasses this predicate family is unary (a single reference
25+
* argument) rather than the standard {ref, literal} pair, so this class overrides both {@link
26+
* #canSupport(FunctionExpression)} and {@link #build(FunctionExpression)}.
27+
*
28+
* <p>Nested-field predicates are intentionally NOT supported here: OpenSearch DSL does not handle
29+
* {@code IS_NULL} / {@code IS_NOT_NULL} on nested fields correctly (see the equivalent guard in
30+
* {@code PredicateAnalyzer} for the Calcite path). When the reference is a nested function, {@link
31+
* #canSupport} returns {@code false} and {@link
32+
* org.opensearch.sql.opensearch.storage.script.filter.FilterQueryBuilder} falls back to the script
33+
* query path, preserving correctness.
34+
*/
35+
@RequiredArgsConstructor
36+
public class ExistsQuery extends LuceneQuery {
37+
38+
/** When true, the predicate is {@code IS NULL} and the exists query is wrapped in must_not. */
39+
private final boolean negated;
40+
41+
@Override
42+
public boolean canSupport(FunctionExpression func) {
43+
return func.getArguments().size() == 1
44+
&& func.getArguments().get(0) instanceof ReferenceExpression
45+
&& !isNestedFunction(func.getArguments().get(0));
46+
}
47+
48+
@Override
49+
public QueryBuilder build(FunctionExpression func) {
50+
ReferenceExpression ref = (ReferenceExpression) func.getArguments().get(0);
51+
String fieldName = ref.getRawPath();
52+
QueryBuilder existsQuery = QueryBuilders.existsQuery(fieldName);
53+
if (negated) {
54+
return QueryBuilders.boolQuery().mustNot(existsQuery);
55+
}
56+
return existsQuery;
57+
}
58+
}

opensearch/src/test/java/org/opensearch/sql/opensearch/storage/script/filter/FilterQueryBuilderTest.java

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -174,22 +174,37 @@ void should_build_wildcard_query_for_like_expression() {
174174
}
175175

176176
@Test
177-
void should_build_script_query_for_unsupported_lucene_query() {
178-
mockToStringSerializer();
177+
void should_build_exists_query_for_is_not_null() {
179178
assertJsonEquals(
180179
"{\n"
181-
+ " \"script\" : {\n"
182-
+ " \"script\" : {\n"
183-
+ " \"source\" : \"{\\\"langType\\\":\\\"v2\\\",\\\"script\\\":\\\"is not"
184-
+ " null(age)\\\"}\",\n"
185-
+ " \"lang\" : \"opensearch_compounded_script\"\n"
186-
+ " },\n"
180+
+ " \"exists\" : {\n"
181+
+ " \"field\" : \"age\",\n"
187182
+ " \"boost\" : 1.0\n"
188183
+ " }\n"
189184
+ "}",
190185
buildQuery(DSL.isnotnull(ref("age", INTEGER))));
191186
}
192187

188+
@Test
189+
void should_build_must_not_exists_query_for_is_null() {
190+
assertJsonEquals(
191+
"{\n"
192+
+ " \"bool\" : {\n"
193+
+ " \"must_not\" : [\n"
194+
+ " {\n"
195+
+ " \"exists\" : {\n"
196+
+ " \"field\" : \"age\",\n"
197+
+ " \"boost\" : 1.0\n"
198+
+ " }\n"
199+
+ " }\n"
200+
+ " ],\n"
201+
+ " \"adjust_pure_negative\" : true,\n"
202+
+ " \"boost\" : 1.0\n"
203+
+ " }\n"
204+
+ "}",
205+
buildQuery(DSL.is_null(ref("age", INTEGER))));
206+
}
207+
193208
@Test
194209
void should_build_script_query_for_function_expression() {
195210
mockToStringSerializer();

0 commit comments

Comments
 (0)