Skip to content

Commit fb75d32

Browse files
committed
Allow type checkering on nested & struct fields for isnull, isnotnull, and ispresent (opensearch-project#4044)
* Redefine type checkers of isnull, isnotnull, and ispresent to IGNORE Signed-off-by: Yuanchun Shen <yuanchu@amazon.com> * Prevents filtering with isnull/isnotnull conditions on nested fields Signed-off-by: Yuanchun Shen <yuanchu@amazon.com> * Prevents pushing down isnull(nested) in PredicateAnalyzer to correct pushdown of complex & partial filters containing isnull(nested) Signed-off-by: Yuanchun Shen <yuanchu@amazon.com> * Test explaining partial pushdown filter with isnull Signed-off-by: Yuanchun Shen <yuanchu@amazon.com> * Add interface registerOperator(BuiltinFunctionName, SqlOperator, PPLTypeChecker) to allow registering an operator with a designated type chekcer Signed-off-by: Yuanchun Shen <yuanchu@amazon.com> --------- Signed-off-by: Yuanchun Shen <yuanchu@amazon.com> (cherry picked from commit 284ecc4)
1 parent 9c67fef commit fb75d32

6 files changed

Lines changed: 252 additions & 119 deletions

File tree

core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java

Lines changed: 145 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -570,77 +570,6 @@ private void compulsoryCast(
570570
return null;
571571
}
572572

573-
/**
574-
* Get a string representation of the argument types expressed in ExprType for error messages.
575-
*
576-
* @param argTypes the list of argument types as {@link RelDataType}
577-
* @return a string in the format [type1,type2,...] representing the argument types
578-
*/
579-
private static String getActualSignature(List<RelDataType> argTypes) {
580-
return "["
581-
+ argTypes.stream()
582-
.map(OpenSearchTypeFactory::convertRelDataTypeToExprType)
583-
.map(Objects::toString)
584-
.collect(Collectors.joining(","))
585-
+ "]";
586-
}
587-
588-
/**
589-
* Wraps a {@link SqlOperandTypeChecker} into a {@link PPLTypeChecker} for use in function
590-
* signature validation.
591-
*
592-
* @param typeChecker the original SQL operand type checker
593-
* @param functionName the name of the function for error reporting
594-
* @param isUserDefinedFunction true if the function is user-defined, false otherwise
595-
* @return a {@link PPLTypeChecker} that delegates to the provided {@code typeChecker}
596-
*/
597-
private static PPLTypeChecker wrapSqlOperandTypeChecker(
598-
SqlOperandTypeChecker typeChecker, String functionName, boolean isUserDefinedFunction) {
599-
PPLTypeChecker pplTypeChecker;
600-
// Only the composite operand type checker for UDFs are concerned here.
601-
if (isUserDefinedFunction
602-
&& typeChecker instanceof CompositeOperandTypeChecker) {
603-
// UDFs implement their own composite type checkers, which always use OR logic for
604-
// argument types. Verifying the composition type would require accessing a protected field in
605-
// CompositeOperandTypeChecker. If access to this field is not allowed, type checking will
606-
// be skipped, so we avoid checking the composition type here.
607-
CompositeOperandTypeChecker compositeTypeChecker = (CompositeOperandTypeChecker) typeChecker;
608-
pplTypeChecker = PPLTypeChecker.wrapComposite(compositeTypeChecker, false);
609-
} else if (typeChecker instanceof ImplicitCastOperandTypeChecker) {
610-
ImplicitCastOperandTypeChecker implicitCastTypeChecker = (ImplicitCastOperandTypeChecker) typeChecker;
611-
pplTypeChecker = PPLTypeChecker.wrapFamily(implicitCastTypeChecker);
612-
} else if (typeChecker instanceof CompositeOperandTypeChecker) {
613-
// If compositeTypeChecker contains operand checkers other than family type checkers or
614-
// other than OR compositions, the function with be registered with a null type checker,
615-
// which means the function will not be type checked.
616-
CompositeOperandTypeChecker compositeTypeChecker = (CompositeOperandTypeChecker) typeChecker;
617-
try {
618-
pplTypeChecker = PPLTypeChecker.wrapComposite(compositeTypeChecker, true);
619-
} catch (IllegalArgumentException | UnsupportedOperationException e) {
620-
logger.debug(
621-
String.format(
622-
"Failed to create composite type checker for operator: %s. Will skip its type"
623-
+ " checking",
624-
functionName),
625-
e);
626-
pplTypeChecker = null;
627-
}
628-
} else if (typeChecker instanceof SameOperandTypeChecker) {
629-
// Comparison operators like EQUAL, GREATER_THAN, LESS_THAN, etc.
630-
// SameOperandTypeCheckers like COALESCE, IFNULL, etc.
631-
SameOperandTypeChecker comparableTypeChecker = (SameOperandTypeChecker) typeChecker;
632-
pplTypeChecker = PPLTypeChecker.wrapComparable(comparableTypeChecker);
633-
} else if (typeChecker instanceof UDFOperandMetadata.UDTOperandMetadata) {
634-
UDFOperandMetadata.UDTOperandMetadata udtOperandMetadata = (UDFOperandMetadata.UDTOperandMetadata) typeChecker;
635-
pplTypeChecker = PPLTypeChecker.wrapUDT(udtOperandMetadata.getAllowSignatures());
636-
} else {
637-
logger.info(
638-
"Cannot create type checker for function: {}. Will skip its type checking", functionName);
639-
pplTypeChecker = null;
640-
}
641-
return pplTypeChecker;
642-
}
643-
644573
@SuppressWarnings({"UnusedReturnValue", "SameParameterValue"})
645574
private abstract static class AbstractBuilder {
646575

@@ -673,18 +602,24 @@ void registerOperator(BuiltinFunctionName functionName, SqlOperator... operators
673602
PPLTypeChecker pplTypeChecker =
674603
wrapSqlOperandTypeChecker(
675604
typeChecker, operator.getName(), operator instanceof SqlUserDefinedFunction);
676-
register(
677-
functionName,
678-
(RexBuilder builder, RexNode... args) -> builder.makeCall(operator, args),
679-
pplTypeChecker);
605+
registerOperator(functionName, operator, pplTypeChecker);
680606
}
681607
}
682608

683-
private static SqlOperandTypeChecker extractTypeCheckerFromUDF(
684-
SqlUserDefinedFunction udfOperator) {
685-
UDFOperandMetadata udfOperandMetadata =
686-
(UDFOperandMetadata) udfOperator.getOperandTypeChecker();
687-
return (udfOperandMetadata == null) ? null : udfOperandMetadata.getInnerTypeChecker();
609+
/**
610+
* Registers an operator for a built-in function name with a specified {@link PPLTypeChecker}.
611+
* This allows custom type checking logic to be associated with the operator.
612+
*
613+
* @param functionName the built-in function name
614+
* @param operator the SQL operator to register
615+
* @param typeChecker the type checker to use for validating argument types
616+
*/
617+
protected void registerOperator(
618+
BuiltinFunctionName functionName, SqlOperator operator, PPLTypeChecker typeChecker) {
619+
register(
620+
functionName,
621+
(RexBuilder builder, RexNode... args) -> builder.makeCall(operator, args),
622+
typeChecker);
688623
}
689624

690625
void populate() {
@@ -701,15 +636,6 @@ void populate() {
701636
registerOperator(OR, SqlStdOperatorTable.OR);
702637
registerOperator(NOT, SqlStdOperatorTable.NOT);
703638

704-
// Register ADD (+ symbol) for numeric addition
705-
register(
706-
ADD,
707-
(RexBuilder builder, RexNode... args) -> builder.makeCall(SqlStdOperatorTable.PLUS, args),
708-
new PPLTypeChecker.PPLFamilyTypeChecker(SqlTypeFamily.NUMERIC, SqlTypeFamily.NUMERIC));
709-
710-
// Register ADD (+ symbol) for string concatenation
711-
registerOperator(ADD, SqlStdOperatorTable.CONCAT);
712-
713639
// Register ADDFUNCTION for numeric addition only
714640
registerOperator(ADDFUNCTION, SqlStdOperatorTable.PLUS);
715641
registerOperator(SUBTRACT, SqlStdOperatorTable.MINUS);
@@ -748,9 +674,7 @@ void populate() {
748674
registerOperator(SIGNUM, SqlStdOperatorTable.SIGN);
749675
registerOperator(SIN, SqlStdOperatorTable.SIN);
750676
registerOperator(CBRT, SqlStdOperatorTable.CBRT);
751-
registerOperator(IS_NOT_NULL, SqlStdOperatorTable.IS_NOT_NULL);
752-
registerOperator(IS_PRESENT, SqlStdOperatorTable.IS_NOT_NULL);
753-
registerOperator(IS_NULL, SqlStdOperatorTable.IS_NULL);
677+
754678
registerOperator(IFNULL, SqlStdOperatorTable.COALESCE);
755679
registerOperator(EARLIEST, PPLBuiltinOperators.EARLIEST);
756680
registerOperator(LATEST, PPLBuiltinOperators.LATEST);
@@ -866,15 +790,16 @@ void populate() {
866790
registerOperator(WEEK, PPLBuiltinOperators.WEEK);
867791
registerOperator(WEEK_OF_YEAR, PPLBuiltinOperators.WEEK);
868792
registerOperator(WEEKOFYEAR, PPLBuiltinOperators.WEEK);
869-
registerOperator(INTERNAL_PATTERN_PARSER, PPLBuiltinOperators.PATTERN_PARSER);
870793

794+
registerOperator(INTERNAL_PATTERN_PARSER, PPLBuiltinOperators.PATTERN_PARSER);
871795
registerOperator(ARRAY, PPLBuiltinOperators.ARRAY);
872796
registerOperator(ARRAY_LENGTH, SqlLibraryOperators.ARRAY_LENGTH);
873797
registerOperator(FORALL, PPLBuiltinOperators.FORALL);
874798
registerOperator(EXISTS, PPLBuiltinOperators.EXISTS);
875799
registerOperator(FILTER, PPLBuiltinOperators.FILTER);
876800
registerOperator(TRANSFORM, PPLBuiltinOperators.TRANSFORM);
877801
registerOperator(REDUCE, PPLBuiltinOperators.REDUCE);
802+
878803
// Register Json function
879804
register(
880805
JSON_ARRAY,
@@ -902,6 +827,53 @@ void populate() {
902827
registerOperator(JSON_APPEND, PPLBuiltinOperators.JSON_APPEND);
903828
registerOperator(JSON_EXTEND, PPLBuiltinOperators.JSON_EXTEND);
904829

830+
// Register operators with a different type checker
831+
832+
// Register ADD (+ symbol) for string concatenation
833+
// Replaced type checker since CONCAT also supports array concatenation
834+
registerOperator(
835+
ADD,
836+
SqlStdOperatorTable.CONCAT,
837+
PPLTypeChecker.family(SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER));
838+
// Register ADD (+ symbol) for numeric addition
839+
// Replace type checker since PLUS also supports binary addition
840+
registerOperator(
841+
ADD,
842+
SqlStdOperatorTable.PLUS,
843+
PPLTypeChecker.family(SqlTypeFamily.NUMERIC, SqlTypeFamily.NUMERIC));
844+
// Replace with a custom CompositeOperandTypeChecker to check both operands as
845+
// SqlStdOperatorTable.ITEM.getOperandTypeChecker() checks only the first operand instead
846+
// of all operands.
847+
registerOperator(
848+
INTERNAL_ITEM,
849+
SqlStdOperatorTable.ITEM,
850+
PPLTypeChecker.wrapComposite(
851+
(CompositeOperandTypeChecker)
852+
OperandTypes.family(SqlTypeFamily.ARRAY, SqlTypeFamily.INTEGER)
853+
.or(OperandTypes.family(SqlTypeFamily.MAP, SqlTypeFamily.ANY)),
854+
false));
855+
registerOperator(
856+
XOR,
857+
SqlStdOperatorTable.NOT_EQUALS,
858+
PPLTypeChecker.family(SqlTypeFamily.BOOLEAN, SqlTypeFamily.BOOLEAN));
859+
// SqlStdOperatorTable.CASE.getOperandTypeChecker is null. We manually create a type checker
860+
// for it. The second and third operands are required to be of the same type. If not,
861+
// it will throw an IllegalArgumentException with information Can't find leastRestrictive type
862+
registerOperator(
863+
IF,
864+
SqlStdOperatorTable.CASE,
865+
PPLTypeChecker.family(SqlTypeFamily.BOOLEAN, SqlTypeFamily.ANY, SqlTypeFamily.ANY));
866+
// Re-define the type checker for is not null, is present, and is null since their original
867+
// type checker ANY isn't compatible with struct types.
868+
registerOperator(
869+
IS_NOT_NULL,
870+
SqlStdOperatorTable.IS_NOT_NULL,
871+
PPLTypeChecker.family(SqlTypeFamily.IGNORE));
872+
registerOperator(
873+
IS_PRESENT, SqlStdOperatorTable.IS_NOT_NULL, PPLTypeChecker.family(SqlTypeFamily.IGNORE));
874+
registerOperator(
875+
IS_NULL, SqlStdOperatorTable.IS_NULL, PPLTypeChecker.family(SqlTypeFamily.IGNORE));
876+
905877
// Register implementation.
906878
// Note, make the implementation an individual class if too complex.
907879
register(
@@ -935,10 +907,9 @@ void populate() {
935907
builder.makeLiteral(" "),
936908
arg),
937909
PPLTypeChecker.family(SqlTypeFamily.CHARACTER));
938-
register(
910+
registerOperator(
939911
ATAN,
940-
(FunctionImp2)
941-
(builder, arg1, arg2) -> builder.makeCall(SqlStdOperatorTable.ATAN2, arg1, arg2),
912+
SqlStdOperatorTable.ATAN2,
942913
PPLTypeChecker.family(SqlTypeFamily.NUMERIC, SqlTypeFamily.NUMERIC));
943914
register(
944915
STRCMP,
@@ -973,17 +944,6 @@ void populate() {
973944
SqlTypeFamily.INTEGER,
974945
SqlTypeFamily.INTEGER)),
975946
false));
976-
// SqlStdOperatorTable.ITEM.getOperandTypeChecker() checks only the first operand instead of
977-
// all operands. Therefore, we wrap it with a custom CompositeOperandTypeChecker to check both
978-
// operands.
979-
register(
980-
INTERNAL_ITEM,
981-
(RexBuilder builder, RexNode... args) -> builder.makeCall(SqlStdOperatorTable.ITEM, args),
982-
PPLTypeChecker.wrapComposite(
983-
(CompositeOperandTypeChecker)
984-
OperandTypes.family(SqlTypeFamily.ARRAY, SqlTypeFamily.INTEGER)
985-
.or(OperandTypes.family(SqlTypeFamily.MAP, SqlTypeFamily.ANY)),
986-
false));
987947
register(
988948
LOG,
989949
(FunctionImp2)
@@ -1015,18 +975,6 @@ void populate() {
1015975
(builder, arg) ->
1016976
builder.makeLiteral(getLegacyTypeName(arg.getType(), QueryType.PPL)),
1017977
null);
1018-
register(
1019-
XOR,
1020-
(FunctionImp2)
1021-
(builder, arg1, arg2) -> builder.makeCall(SqlStdOperatorTable.NOT_EQUALS, arg1, arg2),
1022-
PPLTypeChecker.family(SqlTypeFamily.BOOLEAN, SqlTypeFamily.BOOLEAN));
1023-
// SqlStdOperatorTable.CASE.getOperandTypeChecker is null. We manually create a type checker
1024-
// for it. The second and third operands are required to be of the same type. If not,
1025-
// it will throw an IllegalArgumentException with information Can't find leastRestrictive type
1026-
register(
1027-
IF,
1028-
(RexBuilder builder, RexNode... args) -> builder.makeCall(SqlStdOperatorTable.CASE, args),
1029-
PPLTypeChecker.family(SqlTypeFamily.BOOLEAN, SqlTypeFamily.ANY, SqlTypeFamily.ANY));
1030978
register(
1031979
NULLIF,
1032980
(FunctionImp2)
@@ -1074,6 +1022,13 @@ void populate() {
10741022
builder.makeLiteral("\\")),
10751023
PPLTypeChecker.family(SqlTypeFamily.STRING, SqlTypeFamily.STRING));
10761024
}
1025+
1026+
private static SqlOperandTypeChecker extractTypeCheckerFromUDF(
1027+
SqlUserDefinedFunction udfOperator) {
1028+
UDFOperandMetadata udfOperandMetadata =
1029+
(UDFOperandMetadata) udfOperator.getOperandTypeChecker();
1030+
return (udfOperandMetadata == null) ? null : udfOperandMetadata.getInnerTypeChecker();
1031+
}
10771032
}
10781033

10791034
private static class Builder extends AbstractBuilder {
@@ -1212,4 +1167,75 @@ void populate() {
12121167
null);
12131168
}
12141169
}
1170+
1171+
/**
1172+
* Get a string representation of the argument types expressed in ExprType for error messages.
1173+
*
1174+
* @param argTypes the list of argument types as {@link RelDataType}
1175+
* @return a string in the format [type1,type2,...] representing the argument types
1176+
*/
1177+
private static String getActualSignature(List<RelDataType> argTypes) {
1178+
return "["
1179+
+ argTypes.stream()
1180+
.map(OpenSearchTypeFactory::convertRelDataTypeToExprType)
1181+
.map(Objects::toString)
1182+
.collect(Collectors.joining(","))
1183+
+ "]";
1184+
}
1185+
1186+
/**
1187+
* Wraps a {@link SqlOperandTypeChecker} into a {@link PPLTypeChecker} for use in function
1188+
* signature validation.
1189+
*
1190+
* @param typeChecker the original SQL operand type checker
1191+
* @param functionName the name of the function for error reporting
1192+
* @param isUserDefinedFunction true if the function is user-defined, false otherwise
1193+
* @return a {@link PPLTypeChecker} that delegates to the provided {@code typeChecker}
1194+
*/
1195+
private static PPLTypeChecker wrapSqlOperandTypeChecker(
1196+
SqlOperandTypeChecker typeChecker, String functionName, boolean isUserDefinedFunction) {
1197+
PPLTypeChecker pplTypeChecker;
1198+
// Only the composite operand type checker for UDFs are concerned here.
1199+
if (isUserDefinedFunction
1200+
&& typeChecker instanceof CompositeOperandTypeChecker) {
1201+
// UDFs implement their own composite type checkers, which always use OR logic for
1202+
// argument types. Verifying the composition type would require accessing a protected field in
1203+
// CompositeOperandTypeChecker. If access to this field is not allowed, type checking will
1204+
// be skipped, so we avoid checking the composition type here.
1205+
CompositeOperandTypeChecker compositeTypeChecker = (CompositeOperandTypeChecker) typeChecker;
1206+
pplTypeChecker = PPLTypeChecker.wrapComposite(compositeTypeChecker, false);
1207+
} else if (typeChecker instanceof ImplicitCastOperandTypeChecker) {
1208+
ImplicitCastOperandTypeChecker implicitCastTypeChecker = (ImplicitCastOperandTypeChecker) typeChecker;
1209+
pplTypeChecker = PPLTypeChecker.wrapFamily(implicitCastTypeChecker);
1210+
} else if (typeChecker instanceof CompositeOperandTypeChecker) {
1211+
// If compositeTypeChecker contains operand checkers other than family type checkers or
1212+
// other than OR compositions, the function with be registered with a null type checker,
1213+
// which means the function will not be type checked.
1214+
CompositeOperandTypeChecker compositeTypeChecker = (CompositeOperandTypeChecker) typeChecker;
1215+
try {
1216+
pplTypeChecker = PPLTypeChecker.wrapComposite(compositeTypeChecker, true);
1217+
} catch (IllegalArgumentException | UnsupportedOperationException e) {
1218+
logger.debug(
1219+
String.format(
1220+
"Failed to create composite type checker for operator: %s. Will skip its type"
1221+
+ " checking",
1222+
functionName),
1223+
e);
1224+
pplTypeChecker = null;
1225+
}
1226+
} else if (typeChecker instanceof SameOperandTypeChecker) {
1227+
// Comparison operators like EQUAL, GREATER_THAN, LESS_THAN, etc.
1228+
// SameOperandTypeCheckers like COALESCE, IFNULL, etc.
1229+
SameOperandTypeChecker comparableTypeChecker = (SameOperandTypeChecker) typeChecker;
1230+
pplTypeChecker = PPLTypeChecker.wrapComparable(comparableTypeChecker);
1231+
} else if (typeChecker instanceof UDFOperandMetadata.UDTOperandMetadata) {
1232+
UDFOperandMetadata.UDTOperandMetadata udtOperandMetadata = (UDFOperandMetadata.UDTOperandMetadata) typeChecker;
1233+
pplTypeChecker = PPLTypeChecker.wrapUDT(udtOperandMetadata.getAllowSignatures());
1234+
} else {
1235+
logger.info(
1236+
"Cannot create type checker for function: {}. Will skip its type checking", functionName);
1237+
pplTypeChecker = null;
1238+
}
1239+
return pplTypeChecker;
1240+
}
12151241
}

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,12 @@
55

66
package org.opensearch.sql.calcite.remote;
77

8+
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_SIMPLE;
89
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK;
910
import static org.opensearch.sql.util.MatcherUtils.assertJsonEqualsIgnoreId;
1011

1112
import java.io.IOException;
13+
import java.util.Locale;
1214
import org.junit.Assume;
1315
import org.junit.Ignore;
1416
import org.junit.Test;
@@ -19,6 +21,7 @@ public class CalciteExplainIT extends ExplainIT {
1921
public void init() throws Exception {
2022
super.init();
2123
enableCalcite();
24+
loadIndex(Index.NESTED_SIMPLE);
2225
}
2326

2427
@Override
@@ -157,6 +160,20 @@ public void supportPartialPushDownScript() throws IOException {
157160
assertJsonEqualsIgnoreId(expected, result);
158161
}
159162

163+
@Test
164+
public void testPartialPushdownFilterWithIsNull() throws IOException {
165+
// isnull(nested_field) should not be pushed down since DSL doesn't handle it correctly, but
166+
// name='david' can be pushed down
167+
String query =
168+
String.format(
169+
Locale.ROOT,
170+
"source=%s | where isnull(address) and name='david'",
171+
TEST_INDEX_NESTED_SIMPLE);
172+
var result = explainQueryToString(query);
173+
String expected = loadExpectedPlan("explain_partial_filter_isnull.json");
174+
assertJsonEqualsIgnoreId(expected, result);
175+
}
176+
160177
@Test
161178
public void testSkipScriptEncodingOnExtendedFormat() throws IOException {
162179
Assume.assumeTrue("This test is only for push down enabled", isPushdownEnabled());

0 commit comments

Comments
 (0)