Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.calcite.remote;

import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK_WITH_NULL_VALUES;
import static org.opensearch.sql.util.MatcherUtils.rows;
import static org.opensearch.sql.util.MatcherUtils.verifyDataRowsInOrder;

import java.io.IOException;
import org.json.JSONObject;
import org.junit.Test;
import org.opensearch.sql.ppl.PPLIntegTestCase;

/** Integration test for NOT IN excluding null/missing rows (issue #5165). */
public class CalciteNotInNullFilterIT extends PPLIntegTestCase {

@Override
public void init() throws Exception {
super.init();
enableCalcite();
loadIndex(Index.BANK_WITH_NULL_VALUES);
}

@Test
public void testNotInExcludesNullRows() throws IOException {
// age values: 32, 36, 28, 33, 36, null, 34
// NOT IN (32, 28) should return 36, 33, 36, 34 — excluding the null row
JSONObject result =
executeQuery(
String.format(
"source=%s | where age NOT IN (32, 28) | fields age | sort age",
TEST_INDEX_BANK_WITH_NULL_VALUES));
verifyDataRowsInOrder(result, rows(33), rows(34), rows(36), rows(36));
}

@Test
public void testNotInExcludesNullAndMissingRows() throws IOException {
// balance values: 39225, null, 32838, 4180, null, null, 48086
// NOT IN (39225) should return 32838, 4180, 48086 — excluding null/missing rows
JSONObject result =
executeQuery(
String.format(
"source=%s | where balance NOT IN (39225) | fields balance | sort balance",
TEST_INDEX_BANK_WITH_NULL_VALUES));
verifyDataRowsInOrder(result, rows(4180), rows(32838), rows(48086));
}

@Test
public void testInWithNullRowsIsUnaffected() throws IOException {
// IN should naturally exclude nulls (positive match never matches null)
JSONObject result =
executeQuery(
String.format(
"source=%s | where age IN (32, 28) | fields age | sort age",
TEST_INDEX_BANK_WITH_NULL_VALUES));
verifyDataRowsInOrder(result, rows(28), rows(32));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
setup:
- do:
query.settings:
body:
transient:
plugins.calcite.enabled: true
- do:
indices.create:
index: issue5165
body:
settings:
number_of_shards: 1
number_of_replicas: 0
mappings:
properties:
int_field:
type: integer
- do:
bulk:
refresh: true
body:
- '{"index": {"_index": "issue5165", "_id": "1"}}'
- '{"int_field": 42}'
- '{"index": {"_index": "issue5165", "_id": "2"}}'
- '{"int_field": -1}'
- '{"index": {"_index": "issue5165", "_id": "3"}}'
- '{"int_field": 0}'
- '{"index": {"_index": "issue5165", "_id": "4"}}'
- '{"int_field": 2147483647}'
- '{"index": {"_index": "issue5165", "_id": "5"}}'
- '{"int_field": null}'

---
teardown:
- do:
indices.delete:
index: issue5165
ignore_unavailable: true
- do:
query.settings:
body:
transient:
plugins.calcite.enabled: false

---
"Issue 5165: NOT IN should exclude null/missing rows":
- skip:
features:
- headers
- allowed_warnings
- do:
allowed_warnings:
- 'Loading the fielddata on the _id field is deprecated and will be removed in future versions. If you require sorting or aggregating on this field you should also include the id in the body of your documents, and map this field as a keyword field that has [doc_values] enabled'
headers:
Content-Type: 'application/json'
ppl:
body:
query: source=issue5165 | where int_field NOT IN (42, -1, 0) | fields int_field

- match: { total: 1 }
- length: { datarows: 1 }
- match: { datarows: [ [ 2147483647 ] ] }
Original file line number Diff line number Diff line change
Expand Up @@ -725,7 +725,14 @@ private QueryExpression binary(RexCall call) {
CompoundQueryExpression.or(
expression, QueryExpression.create(pair.getKey()).notExists());
// e.g. where a = 1 or a = 2
case UNKNOWN -> expression;
// For NOT IN (complemented points), SQL three-valued logic dictates
// NULL NOT IN (...) evaluates to UNKNOWN (not TRUE), so null rows
// must be excluded via an exists filter.
case UNKNOWN ->
isSearchWithComplementedPoints(call)
? CompoundQueryExpression.and(
false, expression, QueryExpression.create(pair.getKey()).exists())
: expression;
};
finalExpression.updateAnalyzedNodes(call);
return finalExpression;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1245,4 +1245,62 @@ void search_complementedPointsWithNullAsFalse_generatesExistsAndNotInQuery()
""",
result.toString());
}

@Test
void search_complementedPointsWithNullAsUnknown_generatesExistsAndNotInQuery()
throws ExpressionNotAnalyzableException {
// Simulates: a NOT IN (12, 13)
// Calcite represents this as SEARCH($0, Sarg[...; NULL AS UNKNOWN]) with complemented points
// SQL three-valued logic: NULL NOT IN (...) evaluates to UNKNOWN (not TRUE),
// so null rows must be excluded.
Sarg<BigDecimal> sarg =
Sarg.of(
RexUnknownAs.UNKNOWN,
ImmutableRangeSet.<BigDecimal>builder()
.add(Range.lessThan(BigDecimal.valueOf(12)))
.add(Range.open(BigDecimal.valueOf(12), BigDecimal.valueOf(13)))
.add(Range.greaterThan(BigDecimal.valueOf(13)))
.build());
RexNode sargLiteral =
builder.makeSearchArgumentLiteral(sarg, typeFactory.createSqlType(SqlTypeName.DECIMAL));
RexNode call = builder.makeCall(SqlStdOperatorTable.SEARCH, field1, sargLiteral);
QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes);

assertInstanceOf(BoolQueryBuilder.class, result);
assertEquals(
"""
{
"bool" : {
"must" : [
{
"bool" : {
"must_not" : [
{
"terms" : {
"a" : [
12.0,
13.0
],
"boost" : 1.0
}
}
],
"adjust_pure_negative" : true,
"boost" : 1.0
}
},
{
"exists" : {
"field" : "a",
"boost" : 1.0
}
}
],
"adjust_pure_negative" : true,
"boost" : 1.0
}
}\
""",
result.toString());
}
}
Loading