diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteNotInNullFilterIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteNotInNullFilterIT.java new file mode 100644 index 00000000000..267a750e0cc --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteNotInNullFilterIT.java @@ -0,0 +1,61 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK_WITH_NULL_VALUES; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRowsInOrder; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.Test; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +/** Integration test for NOT IN excluding null/missing rows (issue #5165). */ +public class CalciteNotInNullFilterIT extends PPLIntegTestCase { + + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + loadIndex(Index.BANK_WITH_NULL_VALUES); + } + + @Test + public void testNotInExcludesNullRows() throws IOException { + // age values: 32, 36, 28, 33, 36, null, 34 + // NOT IN (32, 28) should return 36, 33, 36, 34 — excluding the null row + JSONObject result = + executeQuery( + String.format( + "source=%s | where age NOT IN (32, 28) | fields age | sort age", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifyDataRowsInOrder(result, rows(33), rows(34), rows(36), rows(36)); + } + + @Test + public void testNotInExcludesNullAndMissingRows() throws IOException { + // balance values: 39225, null, 32838, 4180, null, null, 48086 + // NOT IN (39225) should return 32838, 4180, 48086 — excluding null/missing rows + JSONObject result = + executeQuery( + String.format( + "source=%s | where balance NOT IN (39225) | fields balance | sort balance", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifyDataRowsInOrder(result, rows(4180), rows(32838), rows(48086)); + } + + @Test + public void testInWithNullRowsIsUnaffected() throws IOException { + // IN should naturally exclude nulls (positive match never matches null) + JSONObject result = + executeQuery( + String.format( + "source=%s | where age IN (32, 28) | fields age | sort age", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifyDataRowsInOrder(result, rows(28), rows(32)); + } +} diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5165.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5165.yml new file mode 100644 index 00000000000..34cb9f24370 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5165.yml @@ -0,0 +1,62 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + - do: + indices.create: + index: issue5165 + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + int_field: + type: integer + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "issue5165", "_id": "1"}}' + - '{"int_field": 42}' + - '{"index": {"_index": "issue5165", "_id": "2"}}' + - '{"int_field": -1}' + - '{"index": {"_index": "issue5165", "_id": "3"}}' + - '{"int_field": 0}' + - '{"index": {"_index": "issue5165", "_id": "4"}}' + - '{"int_field": 2147483647}' + - '{"index": {"_index": "issue5165", "_id": "5"}}' + - '{"int_field": null}' + +--- +teardown: + - do: + indices.delete: + index: issue5165 + ignore_unavailable: true + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + +--- +"Issue 5165: NOT IN should exclude null/missing rows": + - skip: + features: + - headers + - allowed_warnings + - do: + allowed_warnings: + - 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled' + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=issue5165 | where int_field NOT IN (42, -1, 0) | fields int_field + + - match: { total: 1 } + - length: { datarows: 1 } + - match: { datarows: [ [ 2147483647 ] ] } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java index 8e6dbede58e..3f5295412b4 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java @@ -725,7 +725,14 @@ private QueryExpression binary(RexCall call) { CompoundQueryExpression.or( expression, QueryExpression.create(pair.getKey()).notExists()); // e.g. where a = 1 or a = 2 - case UNKNOWN -> expression; + // For NOT IN (complemented points), SQL three-valued logic dictates + // NULL NOT IN (...) evaluates to UNKNOWN (not TRUE), so null rows + // must be excluded via an exists filter. + case UNKNOWN -> + isSearchWithComplementedPoints(call) + ? CompoundQueryExpression.and( + false, expression, QueryExpression.create(pair.getKey()).exists()) + : expression; }; finalExpression.updateAnalyzedNodes(call); return finalExpression; diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java index 572f748fd03..5db0272793b 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java @@ -1245,4 +1245,62 @@ void search_complementedPointsWithNullAsFalse_generatesExistsAndNotInQuery() """, result.toString()); } + + @Test + void search_complementedPointsWithNullAsUnknown_generatesExistsAndNotInQuery() + throws ExpressionNotAnalyzableException { + // Simulates: a NOT IN (12, 13) + // Calcite represents this as SEARCH($0, Sarg[...; NULL AS UNKNOWN]) with complemented points + // SQL three-valued logic: NULL NOT IN (...) evaluates to UNKNOWN (not TRUE), + // so null rows must be excluded. + Sarg sarg = + Sarg.of( + RexUnknownAs.UNKNOWN, + ImmutableRangeSet.builder() + .add(Range.lessThan(BigDecimal.valueOf(12))) + .add(Range.open(BigDecimal.valueOf(12), BigDecimal.valueOf(13))) + .add(Range.greaterThan(BigDecimal.valueOf(13))) + .build()); + RexNode sargLiteral = + builder.makeSearchArgumentLiteral(sarg, typeFactory.createSqlType(SqlTypeName.DECIMAL)); + RexNode call = builder.makeCall(SqlStdOperatorTable.SEARCH, field1, sargLiteral); + QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes); + + assertInstanceOf(BoolQueryBuilder.class, result); + assertEquals( + """ + { + "bool" : { + "must" : [ + { + "bool" : { + "must_not" : [ + { + "terms" : { + "a" : [ + 12.0, + 13.0 + ], + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }, + { + "exists" : { + "field" : "a", + "boost" : 1.0 + } + } + ], + "adjust_pure_negative" : true, + "boost" : 1.0 + } + }\ + """, + result.toString()); + } }