Skip to content

Commit 8543e63

Browse files
authored
[BugFix] Fix NOT LIKE includes null/missing field rows (opensearch-project#5169) (opensearch-project#5338)
* [BugFix] Fix NOT LIKE includes null/missing field rows (opensearch-project#5169) Signed-off-by: Songkan Tang <songkant@amazon.com> * [BugFix] Update ClickBench q23 explain snapshot for NOT LIKE null handling Update the expected DSL output to include the exists filter for the NOT(LIKE(URL, pattern)) clause, matching the new behavior where must(exists(field)) is emitted alongside must_not(wildcard(...)). Signed-off-by: Songkan Tang <songkant@amazon.com> * [BugFix] Move exists injection from not() to prefix() for null-intolerant predicates only The blanket exists injection in SimpleQueryExpression.not() broke expressions like NOT(IS_NOT_NULL(a)), NOT(IS_TRUE(e)), and NOT(IS_FALSE(e)) by always adding an exists filter. This was incorrect because truth-test and null-test operators already encode null semantics. The fix reverts not() to its original mustNot-only behavior and moves the exists injection to the prefix() call site, where it is applied only when the inner operand is a null-intolerant predicate (LIKE, comparisons, SEARCH, etc.). Signed-off-by: Songkan Tang <songkant@amazon.com> * Refactor: replace getFieldExpression() with notWithExistsFilter() in PredicateAnalyzer Remove the getFieldExpression() accessor that exposed internal state and replace it with notWithExistsFilter(), which encapsulates the exists + mustNot logic inside SimpleQueryExpression. This produces a flatter DSL (must[exists] + mustNot[original]) instead of a nested bool wrapper, and keeps the same semantic behavior for null-intolerant predicate negation. Signed-off-by: Songkan Tang <songkant@amazon.com> --------- Signed-off-by: Songkan Tang <songkant@amazon.com>
1 parent e814981 commit 8543e63

6 files changed

Lines changed: 364 additions & 2 deletions

File tree

integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
CalciteMultisearchCommandIT.class,
5555
CalciteMultiValueStatsIT.class,
5656
CalciteNewAddedCommandsIT.class,
57+
CalciteNotLikeNullIT.class,
5758
CalciteNowLikeFunctionIT.class,
5859
CalciteObjectFieldOperateIT.class,
5960
CalciteOperatorIT.class,
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.calcite.remote;
7+
8+
import static org.opensearch.sql.util.MatcherUtils.rows;
9+
import static org.opensearch.sql.util.MatcherUtils.verifyDataRowsInOrder;
10+
11+
import java.io.IOException;
12+
import org.json.JSONObject;
13+
import org.junit.jupiter.api.Test;
14+
import org.opensearch.client.Request;
15+
import org.opensearch.client.ResponseException;
16+
import org.opensearch.sql.ppl.PPLIntegTestCase;
17+
18+
/**
19+
* Integration tests for NOT LIKE with null/missing field values. Tests the fix for issue #5169: NOT
20+
* LIKE should exclude rows where the field is null or missing.
21+
*/
22+
public class CalciteNotLikeNullIT extends PPLIntegTestCase {
23+
24+
private static final String TEST_INDEX = "issue5169_not_like_null";
25+
26+
@Override
27+
public void init() throws Exception {
28+
super.init();
29+
enableCalcite();
30+
createTestIndex();
31+
}
32+
33+
private void createTestIndex() throws IOException {
34+
try {
35+
Request deleteIndex = new Request("DELETE", "/" + TEST_INDEX);
36+
client().performRequest(deleteIndex);
37+
} catch (ResponseException e) {
38+
// Index doesn't exist, which is fine
39+
}
40+
41+
Request createIndex = new Request("PUT", "/" + TEST_INDEX);
42+
createIndex.setJsonEntity(
43+
"{\n"
44+
+ " \"settings\": {\"number_of_shards\": 1, \"number_of_replicas\": 0},\n"
45+
+ " \"mappings\": {\n"
46+
+ " \"properties\": {\n"
47+
+ " \"keyword_field\": {\"type\": \"keyword\"},\n"
48+
+ " \"int_field\": {\"type\": \"integer\"}\n"
49+
+ " }\n"
50+
+ " }\n"
51+
+ "}");
52+
client().performRequest(createIndex);
53+
54+
Request bulkRequest = new Request("POST", "/" + TEST_INDEX + "/_bulk?refresh=true");
55+
bulkRequest.setJsonEntity(
56+
"{\"index\":{\"_id\":\"1\"}}\n"
57+
+ "{\"keyword_field\": \"hello\", \"int_field\": 1}\n"
58+
+ "{\"index\":{\"_id\":\"2\"}}\n"
59+
+ "{\"keyword_field\": \"world\", \"int_field\": 2}\n"
60+
+ "{\"index\":{\"_id\":\"3\"}}\n"
61+
+ "{\"keyword_field\": \"\", \"int_field\": 3}\n"
62+
+ "{\"index\":{\"_id\":\"4\"}}\n"
63+
+ "{\"keyword_field\": \"special chars...\", \"int_field\": 4}\n"
64+
+ "{\"index\":{\"_id\":\"5\"}}\n"
65+
+ "{\"keyword_field\": null, \"int_field\": null}\n");
66+
client().performRequest(bulkRequest);
67+
}
68+
69+
@Test
70+
public void testNotLikeExcludesNull() throws IOException {
71+
// NOT LIKE '%ello%' should match 'world', '', 'special chars...' but NOT null
72+
JSONObject result =
73+
executeQuery(
74+
"source="
75+
+ TEST_INDEX
76+
+ " | where NOT keyword_field LIKE '%ello%'"
77+
+ " | sort keyword_field"
78+
+ " | fields keyword_field");
79+
verifyDataRowsInOrder(result, rows(""), rows("special chars..."), rows("world"));
80+
}
81+
82+
@Test
83+
public void testNotLikeWithNoMatch() throws IOException {
84+
// NOT LIKE '%zzz%' should return all non-null rows (4 rows)
85+
JSONObject result =
86+
executeQuery(
87+
"source="
88+
+ TEST_INDEX
89+
+ " | where NOT keyword_field LIKE '%zzz%'"
90+
+ " | sort keyword_field"
91+
+ " | fields keyword_field");
92+
verifyDataRowsInOrder(result, rows(""), rows("hello"), rows("special chars..."), rows("world"));
93+
}
94+
95+
@Test
96+
public void testNotGreaterThanExcludesNull() throws IOException {
97+
// NOT int_field > 2 should return rows with int_field 1, 2 but NOT null
98+
JSONObject result =
99+
executeQuery(
100+
"source="
101+
+ TEST_INDEX
102+
+ " | where NOT int_field > 2"
103+
+ " | sort int_field"
104+
+ " | fields int_field");
105+
verifyDataRowsInOrder(result, rows(1), rows(2));
106+
}
107+
}

integ-test/src/test/resources/expectedOutput/calcite/clickbench/q23.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,4 @@ calcite:
99
LogicalFilter(condition=[AND(LIKE($97, '%Google%', '\'), <>($63, ''), NOT(LIKE($26, '%.google.%', '\')))])
1010
CalciteLogicalIndexScan(table=[[OpenSearch, hits]])
1111
physical: |
12-
CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(LIKE($3, '%Google%', '\'), <>($1, ''), NOT(LIKE($0, '%.google.%', '\'))), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},c=COUNT(),dc(UserID)=COUNT(DISTINCT $2)), PROJECT->[c, dc(UserID), SearchPhrase], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"wildcard":{"Title":{"wildcard":"*Google*","boost":1.0}}},{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"bool":{"must_not":[{"wildcard":{"URL":{"wildcard":"*.google.*","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"dc(UserID)":{"cardinality":{"field":"UserID"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
12+
CalciteEnumerableIndexScan(table=[[OpenSearch, hits]], PushDownContext=[[FILTER->AND(LIKE($3, '%Google%', '\'), <>($1, ''), NOT(LIKE($0, '%.google.%', '\'))), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={1},c=COUNT(),dc(UserID)=COUNT(DISTINCT $2)), PROJECT->[c, dc(UserID), SearchPhrase], SORT_AGG_METRICS->[0 DESC LAST], LIMIT->10, LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"wildcard":{"Title":{"wildcard":"*Google*","boost":1.0}}},{"bool":{"must":[{"exists":{"field":"SearchPhrase","boost":1.0}}],"must_not":[{"term":{"SearchPhrase":{"value":"","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}},{"bool":{"must":[{"exists":{"field":"URL","boost":1.0}}],"must_not":[{"wildcard":{"URL":{"wildcard":"*.google.*","boost":1.0}}}],"adjust_pure_negative":true,"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"aggregations":{"SearchPhrase":{"terms":{"field":"SearchPhrase","size":10,"min_doc_count":1,"shard_min_doc_count":0,"show_term_doc_count_error":false,"order":[{"c":"desc"},{"_key":"asc"}]},"aggregations":{"dc(UserID)":{"cardinality":{"field":"UserID"}},"c":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
setup:
2+
- do:
3+
indices.create:
4+
index: issue5169_keyword
5+
body:
6+
settings:
7+
number_of_shards: 1
8+
number_of_replicas: 0
9+
mappings:
10+
properties:
11+
keyword_field:
12+
type: keyword
13+
14+
- do:
15+
bulk:
16+
refresh: true
17+
body:
18+
- '{"index": {"_index": "issue5169_keyword", "_id": "1"}}'
19+
- '{"keyword_field": "hello"}'
20+
- '{"index": {"_index": "issue5169_keyword", "_id": "2"}}'
21+
- '{"keyword_field": "world"}'
22+
- '{"index": {"_index": "issue5169_keyword", "_id": "3"}}'
23+
- '{"keyword_field": ""}'
24+
- '{"index": {"_index": "issue5169_keyword", "_id": "4"}}'
25+
- '{"keyword_field": "special chars..."}'
26+
- '{"index": {"_index": "issue5169_keyword", "_id": "5"}}'
27+
- '{"keyword_field": null}'
28+
29+
---
30+
teardown:
31+
- do:
32+
indices.delete:
33+
index: issue5169_keyword
34+
ignore_unavailable: true
35+
36+
---
37+
"Issue 5169: NOT LIKE should exclude null/missing field rows":
38+
- skip:
39+
features:
40+
- headers
41+
- do:
42+
headers:
43+
Content-Type: 'application/json'
44+
ppl:
45+
body:
46+
query: source=issue5169_keyword | where NOT keyword_field LIKE '%ello%' | fields keyword_field
47+
48+
- match: { total: 3 }
49+
- length: { datarows: 3 }

opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -577,7 +577,8 @@ private QueryExpression prefix(RexCall call) {
577577
throw new PredicateAnalyzerException(message);
578578
}
579579

580-
Expression operandExpr = call.getOperands().get(0).accept(this);
580+
RexNode innerOperand = call.getOperands().get(0);
581+
Expression operandExpr = innerOperand.accept(this);
581582
// Handle NOT(boolean_field) - Calcite simplifies "field = false" to NOT($field).
582583
// In PPL semantics, "field = false" should only match documents where the field is
583584
// explicitly false (not null or missing). This is achieved via term query {value: false}.
@@ -586,9 +587,36 @@ private QueryExpression prefix(RexCall call) {
586587
return QueryExpression.create(namedField).isFalse();
587588
}
588589
QueryExpression expr = (QueryExpression) operandExpr;
590+
// For null-intolerant predicates (LIKE, comparisons, equality, etc.),
591+
// negation must also exclude documents where the field is NULL/missing.
592+
// Truth-test operators (IS_TRUE, IS_NULL, etc.) already encode null
593+
// semantics and must NOT get an additional exists filter.
594+
if (isNullIntolerantPredicate(innerOperand) && expr instanceof SimpleQueryExpression sqe) {
595+
return sqe.notWithExistsFilter();
596+
}
589597
return expr.not();
590598
}
591599

600+
/** Returns true if the given RexNode is a null-intolerant predicate (value comparison). */
601+
private static boolean isNullIntolerantPredicate(RexNode node) {
602+
if (!(node instanceof RexCall innerCall)) {
603+
return false;
604+
}
605+
return switch (innerCall.getKind()) {
606+
case LIKE,
607+
EQUALS,
608+
NOT_EQUALS,
609+
GREATER_THAN,
610+
GREATER_THAN_OR_EQUAL,
611+
LESS_THAN,
612+
LESS_THAN_OR_EQUAL,
613+
BETWEEN,
614+
SEARCH ->
615+
true;
616+
default -> false;
617+
};
618+
}
619+
592620
private QueryExpression postfix(RexCall call) {
593621
checkArgument(
594622
call.getKind() == SqlKind.IS_TRUE
@@ -1309,6 +1337,12 @@ public QueryExpression not() {
13091337
return this;
13101338
}
13111339

1340+
/** Negate with an exists filter to exclude null/missing documents. */
1341+
QueryExpression notWithExistsFilter() {
1342+
builder = boolQuery().must(existsQuery(getFieldReference())).mustNot(builder());
1343+
return this;
1344+
}
1345+
13121346
@Override
13131347
public QueryExpression exists() {
13141348
builder = existsQuery(getFieldReference());

0 commit comments

Comments
 (0)