Skip to content

Commit f78df64

Browse files
authored
Added coercion rules and placeholder UDF to handle VARBINARY (#5443)
* Added coercion rules and placeholder UDF to handle VARBINARY --------- Signed-off-by: Vinay Krishna Pudyodu <vinkrish.neo@gmail.com>
1 parent 3b30525 commit f78df64

9 files changed

Lines changed: 395 additions & 2 deletions

File tree

core/src/main/java/org/opensearch/sql/calcite/ExtendedRexBuilder.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,12 @@ else if ((SqlTypeUtil.isApproximateNumeric(sourceType) || SqlTypeUtil.isDecimal(
183183
// NUMBER_TO_STRING uses java's built-in method to get the string representation of a number
184184
return makeCall(type, PPLBuiltinOperators.NUMBER_TO_STRING, List.of(exp));
185185
}
186+
// VARCHAR → VARBINARY for ip/binary fields. Emit BINARY(varchar) as a placeholder
187+
// RexCall the analytics backend adapter rewrites into a VARBINARY literal.
188+
else if (sqlType == SqlTypeName.VARBINARY
189+
&& sourceType.getSqlTypeName() == SqlTypeName.VARCHAR) {
190+
return makeCall(type, PPLBuiltinOperators.BINARY, List.of(exp));
191+
}
186192
return super.makeCast(pos, type, exp, matchNullability, safe, format);
187193
}
188194
}

core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,8 @@ public static RelDataType convertExprTypeToRelDataType(ExprType fieldType, boole
174174
return TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIME, nullable);
175175
case TIMESTAMP:
176176
return TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP, nullable);
177+
case BINARY:
178+
return TYPE_FACTORY.createSqlType(SqlTypeName.VARBINARY, nullable);
177179
case ARRAY:
178180
return TYPE_FACTORY.createArrayType(
179181
TYPE_FACTORY.createSqlType(SqlTypeName.ANY, nullable), -1);
@@ -225,6 +227,7 @@ public static ExprType convertSqlTypeNameToExprType(SqlTypeName sqlTypeName) {
225227
case FLOAT, REAL -> FLOAT;
226228
case DOUBLE, DECIMAL -> DOUBLE; // TODO the decimal is only used for literal
227229
case CHAR, VARCHAR, MULTISET -> STRING; // call toString() for MULTISET
230+
case VARBINARY, BINARY -> BINARY;
228231
case BOOLEAN -> BOOLEAN;
229232
case DATE -> DATE;
230233
case TIME, TIME_TZ, TIME_WITH_LOCAL_TIME_ZONE -> TIME;
@@ -411,10 +414,34 @@ public static boolean isNumericType(RelDataType fieldType) {
411414
}
412415
return first;
413416
}
417+
// When the list has a VARBINARY column plus VARCHAR literals, treat VARBINARY
418+
// as the common type so IN / BETWEEN can insert casts.
419+
RelDataType varbinaryResult = leastRestrictiveVarbinaryVarchar(types);
420+
if (varbinaryResult != null) {
421+
return varbinaryResult;
422+
}
414423
}
415424
return super.leastRestrictive(types);
416425
}
417426

427+
private @Nullable RelDataType leastRestrictiveVarbinaryVarchar(List<RelDataType> types) {
428+
boolean hasVarbinary = false;
429+
boolean anyNullable = false;
430+
for (RelDataType t : types) {
431+
SqlTypeName name = t.getSqlTypeName();
432+
if (name == SqlTypeName.VARBINARY) {
433+
hasVarbinary = true;
434+
} else if (name != SqlTypeName.VARCHAR && name != SqlTypeName.CHAR) {
435+
return null;
436+
}
437+
anyNullable |= t.isNullable();
438+
}
439+
if (!hasVarbinary) {
440+
return null;
441+
}
442+
return createTypeWithNullability(createSqlType(SqlTypeName.VARBINARY), anyNullable);
443+
}
444+
418445
/**
419446
* Checks if the RelDataType represents a time-based field (timestamp, date, or time). Supports
420447
* both standard SQL time types (including TIMESTAMP, TIMESTAMP_WITH_LOCAL_TIME_ZONE, DATE, TIME,

core/src/main/java/org/opensearch/sql/expression/function/CoercionUtils.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,12 @@ public static boolean hasString(List<RexNode> rexNodeList) {
181181
(left, right) -> ExprCoreType.TIMESTAMP),
182182
CoercionRule.of(
183183
(left, right) -> hasString(left, right) && hasNumber(left, right),
184-
(left, right) -> ExprCoreType.DOUBLE));
184+
(left, right) -> ExprCoreType.DOUBLE),
185+
// (BINARY, STRING) → BINARY: ip/binary columns compared with string literals.
186+
// Triggers ExtendedRexBuilder.makeCast which wraps the literal with BINARY.
187+
CoercionRule.of(
188+
(left, right) -> hasString(left, right) && hasBinary(left, right),
189+
(left, right) -> ExprCoreType.BINARY));
185190

186191
private static boolean hasString(ExprType left, ExprType right) {
187192
return left == ExprCoreType.STRING || right == ExprCoreType.STRING;
@@ -195,6 +200,10 @@ private static boolean hasBoolean(ExprType left, ExprType right) {
195200
return left == ExprCoreType.BOOLEAN || right == ExprCoreType.BOOLEAN;
196201
}
197202

203+
private static boolean hasBinary(ExprType left, ExprType right) {
204+
return left == ExprCoreType.BINARY || right == ExprCoreType.BINARY;
205+
}
206+
198207
private record CoercionRule(
199208
BiPredicate<ExprType, ExprType> predicate, BinaryOperator<ExprType> resolver) {
200209

core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@
8383
import org.opensearch.sql.expression.function.udf.condition.EarliestFunction;
8484
import org.opensearch.sql.expression.function.udf.condition.EnhancedCoalesceFunction;
8585
import org.opensearch.sql.expression.function.udf.condition.LatestFunction;
86+
import org.opensearch.sql.expression.function.udf.conversion.BinaryFunction;
8687
import org.opensearch.sql.expression.function.udf.datetime.AddSubDateFunction;
8788
import org.opensearch.sql.expression.function.udf.datetime.CurrentFunction;
8889
import org.opensearch.sql.expression.function.udf.datetime.DateAddSubFunction;
@@ -179,6 +180,9 @@ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable {
179180
public static final SqlOperator EARLIEST = new EarliestFunction().toUDF("EARLIEST");
180181
public static final SqlOperator LATEST = new LatestFunction().toUDF("LATEST");
181182

183+
// VARBINARY conversion (placeholder for ip/binary fields rewritten by analytics backend adapter)
184+
public static final SqlOperator BINARY = new BinaryFunction().toUDF("BINARY");
185+
182186
// Datetime function
183187
public static final SqlOperator TIMESTAMP = new TimestampFunction().toUDF("TIMESTAMP");
184188
public static final SqlOperator DATE =

core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,7 @@
268268
import static org.opensearch.sql.expression.function.BuiltinFunctionName.YEARWEEK;
269269

270270
import com.google.common.collect.ImmutableMap;
271+
import inet.ipaddr.IPAddress;
271272
import java.math.BigDecimal;
272273
import java.util.ArrayList;
273274
import java.util.Arrays;
@@ -282,6 +283,7 @@
282283
import java.util.stream.Collectors;
283284
import java.util.stream.Stream;
284285
import javax.annotation.Nullable;
286+
import org.apache.calcite.avatica.util.ByteString;
285287
import org.apache.calcite.rel.type.RelDataType;
286288
import org.apache.calcite.rex.RexBuilder;
287289
import org.apache.calcite.rex.RexLambda;
@@ -311,8 +313,10 @@
311313
import org.opensearch.sql.calcite.utils.PlanUtils;
312314
import org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils;
313315
import org.opensearch.sql.exception.ExpressionEvaluationException;
316+
import org.opensearch.sql.exception.SemanticCheckException;
314317
import org.opensearch.sql.executor.QueryType;
315318
import org.opensearch.sql.expression.function.CollectionUDF.MVIndexFunctionImp;
319+
import org.opensearch.sql.utils.IPUtils;
316320

317321
public class PPLFuncImpTable {
318322
private static final Logger logger = LogManager.getLogger(PPLFuncImpTable.class);
@@ -908,6 +912,29 @@ void populate() {
908912
registerDivideFunction(DIVIDEFUNCTION);
909913
registerOperator(SHA2, PPLBuiltinOperators.SHA2);
910914
registerOperator(CIDRMATCH, PPLBuiltinOperators.CIDRMATCH);
915+
// (VARBINARY, VARCHAR) overload for ip / binary columns. The lambda parses the cidr
916+
// literal at plan time and emits AND(col >= low, col <= high) directly.
917+
// Only literal cidrs are expanded.
918+
register(
919+
CIDRMATCH,
920+
(FunctionImp2)
921+
(builder, col, cidr) -> {
922+
if (cidr instanceof RexLiteral lit
923+
&& col.getType().getSqlTypeName() == SqlTypeName.VARBINARY) {
924+
byte[][] range = parseCidrToIpv6Range(lit.getValueAs(String.class));
925+
RelDataType varbinary =
926+
builder.getTypeFactory().createSqlType(SqlTypeName.VARBINARY);
927+
RexNode low = builder.makeLiteral(new ByteString(range[0]), varbinary, false);
928+
RexNode high = builder.makeLiteral(new ByteString(range[1]), varbinary, false);
929+
// makeCall(AND, ...) auto-flattens at construction, so no Filter.isFlat issue.
930+
return builder.makeCall(
931+
SqlStdOperatorTable.AND,
932+
builder.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, col, low),
933+
builder.makeCall(SqlStdOperatorTable.LESS_THAN_OR_EQUAL, col, high));
934+
}
935+
return builder.makeCall(PPLBuiltinOperators.CIDRMATCH, col, cidr);
936+
},
937+
PPLTypeChecker.family(SqlTypeFamily.BINARY, SqlTypeFamily.STRING));
911938
registerOperator(INTERNAL_GROK, PPLBuiltinOperators.GROK);
912939
registerOperator(INTERNAL_PARSE, PPLBuiltinOperators.PARSE);
913940
registerOperator(MATCH, PPLBuiltinOperators.MATCH);
@@ -1589,4 +1616,22 @@ private static SqlOperandTypeChecker extractTypeCheckerFromUDF(SqlOperator opera
15891616
}
15901617
return typeChecker;
15911618
}
1619+
1620+
/**
1621+
* Parses a CIDR string and returns its lower and upper bounds in canonical 16-byte IPv6-mapped
1622+
* form. Used by the (BINARY, STRING) {@code cidrmatch} overload to expand into a byte-range
1623+
* conjunction at plan time.
1624+
*
1625+
* <p>Delegates to {@link IPUtils#toRange(String)} for parsing; converts both bounds to IPv6 to
1626+
* guarantee 16-byte output regardless of whether the input cidr is IPv4 or IPv6.
1627+
*/
1628+
private static byte[][] parseCidrToIpv6Range(String cidr) {
1629+
if (cidr == null) {
1630+
throw new SemanticCheckException("cidrmatch range argument is null");
1631+
}
1632+
IPAddress range = IPUtils.toRange(cidr);
1633+
byte[] low = range.getLower().toIPv6().getBytes();
1634+
byte[] high = range.getUpper().toIPv6().getBytes();
1635+
return new byte[][] {low, high};
1636+
}
15921637
}
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.expression.function.udf.conversion;
7+
8+
import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.TYPE_FACTORY;
9+
10+
import java.util.List;
11+
import org.apache.calcite.adapter.enumerable.NotNullImplementor;
12+
import org.apache.calcite.adapter.enumerable.NullPolicy;
13+
import org.apache.calcite.adapter.enumerable.RexToLixTranslator;
14+
import org.apache.calcite.linq4j.tree.Expression;
15+
import org.apache.calcite.rex.RexCall;
16+
import org.apache.calcite.sql.type.FamilyOperandTypeChecker;
17+
import org.apache.calcite.sql.type.OperandTypes;
18+
import org.apache.calcite.sql.type.ReturnTypes;
19+
import org.apache.calcite.sql.type.SqlReturnTypeInference;
20+
import org.apache.calcite.sql.type.SqlTypeName;
21+
import org.opensearch.sql.expression.function.ImplementorUDF;
22+
import org.opensearch.sql.expression.function.UDFOperandMetadata;
23+
24+
/** Placeholder UDF that wraps a VARCHAR literal cast to VARBINARY for ip/binary fields. */
25+
public class BinaryFunction extends ImplementorUDF {
26+
27+
private static final SqlReturnTypeInference VARBINARY_FORCE_NULLABLE =
28+
ReturnTypes.explicit(
29+
TYPE_FACTORY.createTypeWithNullability(
30+
TYPE_FACTORY.createSqlType(SqlTypeName.VARBINARY), true));
31+
32+
public BinaryFunction() {
33+
super(new PassThroughImplementor(), NullPolicy.STRICT);
34+
}
35+
36+
@Override
37+
public SqlReturnTypeInference getReturnTypeInference() {
38+
return VARBINARY_FORCE_NULLABLE;
39+
}
40+
41+
@Override
42+
public UDFOperandMetadata getOperandMetadata() {
43+
return UDFOperandMetadata.wrap((FamilyOperandTypeChecker) OperandTypes.CHARACTER);
44+
}
45+
46+
public static class PassThroughImplementor implements NotNullImplementor {
47+
@Override
48+
public Expression implement(
49+
RexToLixTranslator translator, RexCall call, List<Expression> translatedOperands) {
50+
return translatedOperands.get(0);
51+
}
52+
}
53+
}
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.calcite;
7+
8+
import static org.junit.jupiter.api.Assertions.assertEquals;
9+
import static org.junit.jupiter.api.Assertions.assertInstanceOf;
10+
import static org.junit.jupiter.api.Assertions.assertNotNull;
11+
import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.TYPE_FACTORY;
12+
13+
import org.apache.calcite.rel.type.RelDataType;
14+
import org.apache.calcite.rex.RexBuilder;
15+
import org.apache.calcite.rex.RexCall;
16+
import org.apache.calcite.rex.RexNode;
17+
import org.apache.calcite.sql.type.SqlTypeName;
18+
import org.junit.jupiter.api.Test;
19+
import org.opensearch.sql.expression.function.PPLBuiltinOperators;
20+
21+
class ExtendedRexBuilderTest {
22+
23+
private static final RexBuilder REX_BUILDER =
24+
new ExtendedRexBuilder(new RexBuilder(TYPE_FACTORY));
25+
26+
/**
27+
* VARCHAR → VARBINARY casts must be rewritten as a {@code BINARY(varchar)} placeholder {@code
28+
* RexCall}.
29+
*/
30+
@Test
31+
void castVarcharToVarbinaryEmitsBinaryPlaceholder() {
32+
// Use makeInputRef to construct a VARCHAR-typed RexNode reliably. makeLiteral(String) folds
33+
// to CHAR, which would make this test pass-through default cast instead of our placeholder.
34+
RelDataType varchar = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR);
35+
RexNode varcharRef = REX_BUILDER.makeInputRef(varchar, 0);
36+
RelDataType varbinary = TYPE_FACTORY.createSqlType(SqlTypeName.VARBINARY);
37+
38+
RexNode result = REX_BUILDER.makeCast(varbinary, varcharRef);
39+
40+
assertInstanceOf(RexCall.class, result);
41+
RexCall call = (RexCall) result;
42+
assertEquals(PPLBuiltinOperators.BINARY, call.getOperator());
43+
assertEquals("BINARY", call.getOperator().getName());
44+
assertEquals(SqlTypeName.VARBINARY, call.getType().getSqlTypeName());
45+
assertEquals(1, call.getOperands().size());
46+
assertEquals(SqlTypeName.VARCHAR, call.getOperands().get(0).getType().getSqlTypeName());
47+
}
48+
49+
/**
50+
* Casts targeting a SqlTypeName other than VARBINARY must NOT trigger the BINARY rewrite — they
51+
* fall through to Calcite's default cast handling.
52+
*/
53+
@Test
54+
void castVarcharToIntegerDoesNotEmitBinaryPlaceholder() {
55+
RexNode varcharLiteral = REX_BUILDER.makeLiteral("42");
56+
RelDataType integer = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER);
57+
58+
RexNode result = REX_BUILDER.makeCast(integer, varcharLiteral);
59+
60+
assertNotNull(result);
61+
if (result instanceof RexCall call) {
62+
assertEquals(
63+
"BINARY".equals(call.getOperator().getName()),
64+
false,
65+
"VARCHAR → INTEGER must not emit BINARY placeholder");
66+
}
67+
}
68+
69+
/**
70+
* Casts whose source is not VARCHAR must also fall through. The placeholder is only meant for the
71+
* (VARCHAR → VARBINARY) case where a string IP / base64 literal is being compared against a
72+
* VARBINARY column — non-string sources have well-defined Calcite cast semantics that should not
73+
* be hijacked.
74+
*/
75+
@Test
76+
void castIntegerToVarbinaryDoesNotEmitBinaryPlaceholder() {
77+
RexNode intLiteral = REX_BUILDER.makeExactLiteral(java.math.BigDecimal.ONE);
78+
RelDataType varbinary = TYPE_FACTORY.createSqlType(SqlTypeName.VARBINARY);
79+
80+
RexNode result = REX_BUILDER.makeCast(varbinary, intLiteral);
81+
82+
assertNotNull(result);
83+
if (result instanceof RexCall call) {
84+
assertEquals(
85+
"BINARY".equals(call.getOperator().getName()),
86+
false,
87+
"non-VARCHAR → VARBINARY must not emit BINARY placeholder");
88+
}
89+
}
90+
}

0 commit comments

Comments
 (0)