diff --git a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java index 849c60fe4eb..54371d84b79 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java @@ -1260,6 +1260,19 @@ void populate() { builder.makeNullLiteral(arg1.getType()), arg1), PPLTypeChecker.wrapComparable((SameOperandTypeChecker) OperandTypes.SAME_SAME)); + // PPL isempty(x) — TRUE iff x is NULL or an empty string. We express this as + // OR(IS_NULL(x), CHAR_LENGTH(x) = 0) + // rather than reusing SqlStdOperatorTable.IS_EMPTY: the latter is the SQL:2003 + // multiset/collection IS EMPTY predicate (its OperandTypeChecker is + // OperandTypes.COLLECTION_OR_MAP and its enumerable runtime calls + // java.util.Collection.isEmpty() reflectively). Passing a string operand only + // worked by coincidence — RexBuilder.makeCall bypasses the operand checker, and + // Calcite's enumerable codegen emits a bare `target.isEmpty()` call that happens + // to bind to String.isEmpty() at Janino compile time. The CHAR_LENGTH form makes + // the string semantics explicit, lets every backend translate the predicate + // through their normal length / equality bindings, and works on any code path + // that doesn't go through Calcite's enumerable runtime (e.g. Substrait emission + // for analytics-engine, which has no IS EMPTY mapping). register( IS_EMPTY, (FunctionImp1) @@ -1267,7 +1280,10 @@ void populate() { builder.makeCall( SqlStdOperatorTable.OR, builder.makeCall(SqlStdOperatorTable.IS_NULL, arg), - builder.makeCall(SqlStdOperatorTable.IS_EMPTY, arg)), + builder.makeCall( + SqlStdOperatorTable.EQUALS, + builder.makeCall(SqlStdOperatorTable.CHAR_LENGTH, arg), + builder.makeExactLiteral(BigDecimal.ZERO))), PPLTypeChecker.family(SqlTypeFamily.ANY)); register( IS_BLANK, @@ -1277,12 +1293,15 @@ void populate() { SqlStdOperatorTable.OR, builder.makeCall(SqlStdOperatorTable.IS_NULL, arg), builder.makeCall( - SqlStdOperatorTable.IS_EMPTY, + SqlStdOperatorTable.EQUALS, builder.makeCall( - SqlStdOperatorTable.TRIM, - builder.makeFlag(Flag.BOTH), - builder.makeLiteral(" "), - arg))), + SqlStdOperatorTable.CHAR_LENGTH, + builder.makeCall( + SqlStdOperatorTable.TRIM, + builder.makeFlag(Flag.BOTH), + builder.makeLiteral(" "), + arg)), + builder.makeExactLiteral(BigDecimal.ZERO))), PPLTypeChecker.family(SqlTypeFamily.ANY)); register( ILIKE, diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_isblank.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_isblank.yaml index 642d64b4236..4f2c524ba07 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_isblank.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_isblank.yaml @@ -2,7 +2,8 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) - LogicalFilter(condition=[OR(IS NULL($1), IS EMPTY(TRIM(FLAG(BOTH), ' ', $1)))]) + LogicalFilter(condition=[OR(IS NULL($1), =(CHAR_LENGTH(TRIM(FLAG(BOTH), ' ', $1)), 0))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->OR(IS NULL($1), IS EMPTY(TRIM(FLAG(BOTH), ' ', $1))), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQFHHsKICAib3AiOiB7CiAgICAibmFtZSI6ICJPUiIsCiAgICAia2luZCI6ICJPUiIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIklTIE5VTEwiLAogICAgICAgICJraW5kIjogIklTX05VTEwiLAogICAgICAgICJzeW50YXgiOiAiUE9TVEZJWCIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJJUyBFTVBUWSIsCiAgICAgICAgImtpbmQiOiAiT1RIRVIiLAogICAgICAgICJzeW50YXgiOiAiUE9TVEZJWCIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiVFJJTSIsCiAgICAgICAgICAgICJraW5kIjogIlRSSU0iLAogICAgICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJsaXRlcmFsIjogIkJPVEgiLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiU1lNQk9MIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9LAogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0sCiAgICAgICAgICAgIHsKICAgICAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMiwKICAgICAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgICAgIH0KICAgICAgICAgICAgfQogICAgICAgICAgXQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2,0],"DIGESTS":["firstname.keyword"," ","firstname.keyword"]}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->OR(IS NULL($1), =(CHAR_LENGTH(TRIM(FLAG(BOTH), ' ', $1)), 0)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"should":[{"bool":{"must_not":[{"exists":{"field":"firstname","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQEXnsKICAib3AiOiB7CiAgICAibmFtZSI6ICI9IiwKICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIkNIQVJfTEVOR1RIIiwKICAgICAgICAia2luZCI6ICJDSEFSX0xFTkdUSCIsCiAgICAgICAgInN5bnRheCI6ICJGVU5DVElPTiIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJvcCI6IHsKICAgICAgICAgICAgIm5hbWUiOiAiVFJJTSIsCiAgICAgICAgICAgICJraW5kIjogIlRSSU0iLAogICAgICAgICAgICAic3ludGF4IjogIkZVTkNUSU9OIgogICAgICAgICAgfSwKICAgICAgICAgICJvcGVyYW5kcyI6IFsKICAgICAgICAgICAgewogICAgICAgICAgICAgICJsaXRlcmFsIjogIkJPVEgiLAogICAgICAgICAgICAgICJ0eXBlIjogewogICAgICAgICAgICAgICAgInR5cGUiOiAiU1lNQk9MIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IGZhbHNlCiAgICAgICAgICAgICAgfQogICAgICAgICAgICB9LAogICAgICAgICAgICB7CiAgICAgICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDAsCiAgICAgICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgICAgICB9CiAgICAgICAgICAgIH0sCiAgICAgICAgICAgIHsKICAgICAgICAgICAgICAiZHluYW1pY1BhcmFtIjogMSwKICAgICAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgICAgIH0KICAgICAgICAgICAgfQogICAgICAgICAgXQogICAgICAgIH0KICAgICAgXQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkJJR0lOVCIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp":1778662429619956162,"SOURCES":[2,0,2],"DIGESTS":[" ","firstname.keyword",0]}},"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_isempty.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_isempty.yaml index c4f104e6462..f997ead5604 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_isempty.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_isempty.yaml @@ -2,7 +2,8 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) - LogicalFilter(condition=[OR(IS NULL($1), IS EMPTY($1))]) + LogicalFilter(condition=[OR(IS NULL($1), =(CHAR_LENGTH($1), 0))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->OR(IS NULL($1), IS EMPTY($1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQC13sKICAib3AiOiB7CiAgICAibmFtZSI6ICJPUiIsCiAgICAia2luZCI6ICJPUiIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIklTIE5VTEwiLAogICAgICAgICJraW5kIjogIklTX05VTEwiLAogICAgICAgICJzeW50YXgiOiAiUE9TVEZJWCIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICJJUyBFTVBUWSIsCiAgICAgICAgImtpbmQiOiAiT1RIRVIiLAogICAgICAgICJzeW50YXgiOiAiUE9TVEZJWCIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0KICBdCn0=\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,0],"DIGESTS":["firstname.keyword","firstname.keyword"]}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->OR(IS NULL($1), =(CHAR_LENGTH($1), 0)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"should":[{"bool":{"must_not":[{"exists":{"field":"firstname","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCGXsKICAib3AiOiB7CiAgICAibmFtZSI6ICI9IiwKICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIkNIQVJfTEVOR1RIIiwKICAgICAgICAia2luZCI6ICJDSEFSX0xFTkdUSCIsCiAgICAgICAgInN5bnRheCI6ICJGVU5DVElPTiIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp":1778662435626872557,"SOURCES":[0,2],"DIGESTS":["firstname.keyword",0]}},"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_isempty_or_others.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_isempty_or_others.yaml index e7603f5643d..30c9c83813d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_isempty_or_others.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_isempty_or_others.yaml @@ -2,7 +2,8 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) - LogicalFilter(condition=[OR(=($4, 'M'), IS NULL($1), IS EMPTY($1))]) + LogicalFilter(condition=[OR(=($4, 'M'), IS NULL($1), =(CHAR_LENGTH($1), 0))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->OR(IS NULL($1), =($4, 'M'), IS EMPTY($1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQEtnsKICAib3AiOiB7CiAgICAibmFtZSI6ICJPUiIsCiAgICAia2luZCI6ICJPUiIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIklTIE5VTEwiLAogICAgICAgICJraW5kIjogIklTX05VTEwiLAogICAgICAgICJzeW50YXgiOiAiUE9TVEZJWCIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJvcCI6IHsKICAgICAgICAibmFtZSI6ICI9IiwKICAgICAgICAia2luZCI6ICJFUVVBTFMiLAogICAgICAgICJzeW50YXgiOiAiQklOQVJZIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0sCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDIsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfSwKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIklTIEVNUFRZIiwKICAgICAgICAia2luZCI6ICJPVEhFUiIsCiAgICAgICAgInN5bnRheCI6ICJQT1NURklYIgogICAgICB9LAogICAgICAib3BlcmFuZHMiOiBbCiAgICAgICAgewogICAgICAgICAgImR5bmFtaWNQYXJhbSI6IDMsCiAgICAgICAgICAidHlwZSI6IHsKICAgICAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICAgICAgfQogICAgICAgIH0KICAgICAgXQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,0,2,0],"DIGESTS":["firstname.keyword","gender.keyword","M","firstname.keyword"]}},"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], SCRIPT->OR(IS NULL($1), =($4, 'M'), =(CHAR_LENGTH($1), 0)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"should":[{"bool":{"must_not":[{"exists":{"field":"firstname","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},{"term":{"gender.keyword":{"value":"M","boost":1.0}}},{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCGXsKICAib3AiOiB7CiAgICAibmFtZSI6ICI9IiwKICAgICJraW5kIjogIkVRVUFMUyIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIkNIQVJfTEVOR1RIIiwKICAgICAgICAia2luZCI6ICJDSEFSX0xFTkdUSCIsCiAgICAgICAgInN5bnRheCI6ICJGVU5DVElPTiIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUKICAgICAgfQogICAgfQogIF0KfQ==\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp":1778662435212192100,"SOURCES":[0,2],"DIGESTS":["firstname.keyword",0]}},"boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) + diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_isblank.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_isblank.yaml index 887fd96408b..6cc557f2a45 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_isblank.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_isblank.yaml @@ -2,9 +2,9 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) - LogicalFilter(condition=[OR(IS NULL($1), IS EMPTY(TRIM(FLAG(BOTH), ' ', $1)))]) + LogicalFilter(condition=[OR(IS NULL($1), =(CHAR_LENGTH(TRIM(FLAG(BOTH), ' ', $1)), 0))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..16=[{inputs}], expr#17=[IS NULL($t1)], expr#18=[FLAG(BOTH)], expr#19=[' '], expr#20=[TRIM($t18, $t19, $t1)], expr#21=[IS EMPTY($t20)], expr#22=[OR($t17, $t21)], proj#0..10=[{exprs}], $condition=[$t22]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=[IS NULL($t1)], expr#18=[FLAG(BOTH)], expr#19=[' '], expr#20=[TRIM($t18, $t19, $t1)], expr#21=[CHAR_LENGTH($t20)], expr#22=[0], expr#23=[=($t21, $t22)], expr#24=[OR($t17, $t23)], proj#0..10=[{exprs}], $condition=[$t24]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_isempty.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_isempty.yaml index 6115f98e23f..5d9c34e366b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_isempty.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_isempty.yaml @@ -2,9 +2,9 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) - LogicalFilter(condition=[OR(IS NULL($1), IS EMPTY($1))]) + LogicalFilter(condition=[OR(IS NULL($1), =(CHAR_LENGTH($1), 0))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..16=[{inputs}], expr#17=[IS NULL($t1)], expr#18=[IS EMPTY($t1)], expr#19=[OR($t17, $t18)], proj#0..10=[{exprs}], $condition=[$t19]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=[IS NULL($t1)], expr#18=[CHAR_LENGTH($t1)], expr#19=[0], expr#20=[=($t18, $t19)], expr#21=[OR($t17, $t20)], proj#0..10=[{exprs}], $condition=[$t21]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_isempty_or_others.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_isempty_or_others.yaml index 7f43f48dc57..b67f50610d9 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_isempty_or_others.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_isempty_or_others.yaml @@ -2,9 +2,9 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10]) - LogicalFilter(condition=[OR(=($4, 'M'), IS NULL($1), IS EMPTY($1))]) + LogicalFilter(condition=[OR(=($4, 'M'), IS NULL($1), =(CHAR_LENGTH($1), 0))]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableLimit(fetch=[10000]) - EnumerableCalc(expr#0..16=[{inputs}], expr#17=[IS NULL($t1)], expr#18=['M'], expr#19=[=($t4, $t18)], expr#20=[IS EMPTY($t1)], expr#21=[OR($t17, $t19, $t20)], proj#0..10=[{exprs}], $condition=[$t21]) + EnumerableCalc(expr#0..16=[{inputs}], expr#17=[IS NULL($t1)], expr#18=['M'], expr#19=[=($t4, $t18)], expr#20=[CHAR_LENGTH($t1)], expr#21=[0], expr#22=[=($t20, $t21)], expr#23=[OR($t17, $t19, $t22)], proj#0..10=[{exprs}], $condition=[$t23]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java index cc13c3a4a7a..6389fb28395 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java @@ -811,24 +811,7 @@ private static QueryExpression constructQueryExpressionForSearch( } } - private boolean containIsEmptyFunction(RexCall call) { - return call.getKind() == SqlKind.OR - && call.getOperands().stream().anyMatch(o -> o.getKind() == SqlKind.IS_NULL) - && call.getOperands().stream() - .anyMatch( - o -> - o.getKind() == SqlKind.OTHER - && ((RexCall) o).getOperator().equals(SqlStdOperatorTable.IS_EMPTY)); - } - private QueryExpression andOr(RexCall call) { - // For function isEmpty and isBlank, we implement them via expression `isNull or {@function}`, - // Unlike `OR` in Java, `SHOULD` in DSL will evaluate both branches and lead to NPE. - if (containIsEmptyFunction(call)) { - throw new PredicateAnalyzerException( - "DSL will evaluate both branches of OR with isNUll, prevent push-down to avoid NPE"); - } - QueryExpression[] expressions = new QueryExpression[call.getOperands().size()]; PredicateAnalyzerException firstError = null; boolean partial = false; diff --git a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java index 733c2de5213..3deb39d58c0 100644 --- a/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java +++ b/opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java @@ -7,6 +7,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.spy; import com.google.common.collect.ImmutableList; @@ -829,7 +830,8 @@ void isTrue_predicate() throws ExpressionNotAnalyzableException { } @Test - void isNullOr_ScriptPushDown() throws ExpressionNotAnalyzableException { + void isEmpty_pushesIsNullArmAsExistsAndCharLengthArmAsScript() + throws ExpressionNotAnalyzableException { final RelDataType rowType = builder .getTypeFactory() @@ -838,15 +840,56 @@ void isNullOr_ScriptPushDown() throws ExpressionNotAnalyzableException { .add("a", builder.getTypeFactory().createSqlType(SqlTypeName.BIGINT)) .add("b", builder.getTypeFactory().createSqlType(SqlTypeName.VARCHAR)) .build(); - // PPL IS_EMPTY is translated to OR(IS_NULL(arg), IS_EMPTY(arg)) + // PPL isempty(x) lowers to OR(IS_NULL(x), CHAR_LENGTH(x) = 0) (see PPLFuncImpTable). + // The IS_NULL arm has a native DSL form (bool.must_not.exists); the CHAR_LENGTH arm + // has no DSL equivalent and falls back to opensearch_compounded_script. The analyzer + // emits a bool.should that mixes the two — not a single fully-script_query, which is + // strictly better for matching null docs since the IS_NULL arm avoids the script + // engine entirely. + // + // This shape is also why no special-case detector is needed in PredicateAnalyzer.andOr: + // CHAR_LENGTH(null) returns null (Calcite NullPolicy.STRICT) rather than NPE, so DSL's + // non-short-circuiting `should` evaluation is safe even when the field is null. Prior + // to the OR(IS_NULL, CHAR_LENGTH=0) lowering the right arm was IS_EMPTY which compiled + // to `name.isEmpty()` and would NPE on null operands — that is what containIsEmptyFunction + // used to guard against, and is no longer needed. RexNode call = PPLFuncImpTable.INSTANCE.resolve(builder, BuiltinFunctionName.IS_EMPTY, field2); Hook.CURRENT_TIME.addThread((Consumer>) h -> h.set(0L)); QueryExpression expression = PredicateAnalyzer.analyzeExpression(call, schema, fieldTypes, rowType, cluster); - assert (expression - .builder() - .toString() - .contains("\"lang\" : \"opensearch_compounded_script\"")); + + QueryBuilder builderResult = expression.builder(); + assertInstanceOf(BoolQueryBuilder.class, builderResult); + BoolQueryBuilder bool = (BoolQueryBuilder) builderResult; + assertEquals( + 2, + bool.should().size(), + "isempty pushes down as a bool.should mixing native IS_NULL and a script for" + + " CHAR_LENGTH=0"); + assertTrue(bool.must().isEmpty(), "must clauses are not used by isempty pushdown"); + assertTrue( + bool.mustNot().isEmpty(), + "must_not clauses at the top level are not used by isempty pushdown"); + + // Arm 1: IS_NULL($field) → bool.must_not.exists + QueryBuilder isNullArm = bool.should().get(0); + assertInstanceOf(BoolQueryBuilder.class, isNullArm); + BoolQueryBuilder isNullBool = (BoolQueryBuilder) isNullArm; + assertEquals(1, isNullBool.mustNot().size()); + assertInstanceOf( + ExistsQueryBuilder.class, + isNullBool.mustNot().get(0), + "IS_NULL arm must lower to bool.must_not.exists, not to a script"); + + // Arm 2: CHAR_LENGTH($field) = 0 → script (CHAR_LENGTH has no native DSL form) + QueryBuilder charLengthArm = bool.should().get(1); + assertInstanceOf( + ScriptQueryBuilder.class, + charLengthArm, + "CHAR_LENGTH=0 arm must lower to a script_query (no native DSL equivalent)"); + assertTrue( + charLengthArm.toString().contains("\"lang\" : \"opensearch_compounded_script\""), + "script arm uses the Calcite-RexNode-based opensearch_compounded_script lang"); } @Test