Skip to content

Commit 6cf2b65

Browse files
committed
Widen mvappend element type via leastRestrictive + pre-cast operands
The previous nullability-only bridge fixed `array(1, 2) + literal 3` but left `mvappend(1, 2.5)` falling back to ARRAY<ANY>. ARRAY<ANY> is not substrait-serializable, so any analytics-engine query through that call fails at substrait conversion. Aggressive `leastRestrictive` widening was the obvious next step but earlier triggered a runtime corruption — Integer 1 showed up as 0 in the response — because the Avatica result-set's ArrayAccessor uses element-type-specific accessors (e.g. `DoubleAccessor.getDouble` does `(Double) value`), and an Integer cell in a declared-DOUBLE list triggered a ClassCastException that the error path masked as `[0, 2.5]`. Fix the corruption by pre-casting each scalar operand to the call's element Java class in `MVAppendImplementor` via `EnumUtils.convert`. The result list is now homogeneously typed at codegen, so Avatica's per-element cast succeeds. Promote DECIMAL → DOUBLE on the way through `updateMostGeneralType` because `RowResponseCodec` maps DECIMAL cells to FloatingPoint(DOUBLE) anyway; an explicit DECIMAL element type triggers Calcite's element coercion to BigDecimal, which the JSON formatter renders inconsistently across paths. For genuinely incompatible operand pairs (INT + VARCHAR, …) `leastRestrictive` returns null and the existing `ANY` fallback stands — heterogeneous mvappend output stays on the Calcite engine path; only the analytics-engine route can't emit substrait for those. Local verification: - :core:test --tests *MVAppend* — green - :integ-test:integTest --tests CalciteMVAppendFunctionIT — 15/15 - :integ-test:integTest --tests CalciteArrayFunctionIT — 60/60 Signed-off-by: Kai Huang <huangkaics@gmail.com> Signed-off-by: Kai Huang <ahkcs@amazon.com>
1 parent 5fc778f commit 6cf2b65

1 file changed

Lines changed: 55 additions & 15 deletions

File tree

core/src/main/java/org/opensearch/sql/expression/function/CollectionUDF/MVAppendFunctionImpl.java

Lines changed: 55 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@
77

88
import static org.apache.calcite.sql.type.SqlTypeUtil.createArrayType;
99

10+
import java.math.BigDecimal;
11+
import java.util.ArrayList;
1012
import java.util.List;
13+
import org.apache.calcite.adapter.enumerable.EnumUtils;
1114
import org.apache.calcite.adapter.enumerable.NotNullImplementor;
1215
import org.apache.calcite.adapter.enumerable.NullPolicy;
1316
import org.apache.calcite.adapter.enumerable.RexToLixTranslator;
@@ -20,7 +23,6 @@
2023
import org.apache.calcite.sql.SqlOperatorBinding;
2124
import org.apache.calcite.sql.type.SqlReturnTypeInference;
2225
import org.apache.calcite.sql.type.SqlTypeName;
23-
import org.apache.calcite.sql.type.SqlTypeUtil;
2426
import org.opensearch.sql.expression.function.ImplementorUDF;
2527
import org.opensearch.sql.expression.function.UDFOperandMetadata;
2628

@@ -82,33 +84,71 @@ private static RelDataType updateMostGeneralType(
8284
if (current.equals(candidate)) {
8385
return current;
8486
}
85-
// Tolerate a nullability-only mismatch by widening to the nullable form. {@code Object.equals}
86-
// distinguishes INTEGER NULLABLE from INTEGER NOT NULL — e.g. {@code array(1, 2)} synthesizes
87-
// an INTEGER NULLABLE component while a bare literal {@code 3} is INTEGER NOT NULL — which
88-
// would otherwise fall into the {@code ANY} fallback. Calcite's {@code ANY} type isn't
89-
// substrait-serializable, so any analytics-engine query passing through {@code mvappend} with
90-
// mixed-nullability operands would fail substrait conversion with "Unable to convert the type
91-
// ANY". For genuinely different types (INT + DOUBLE, INT + VARCHAR, …) keep the {@code ANY}
92-
// fallback unchanged: the Calcite engine relies on {@code Object[]} runtime semantics to
93-
// preserve heterogeneous values as-is, and widening to a single numeric type would corrupt
94-
// values at codegen time (e.g. casting integer 1 to {@code DECIMAL(11,1)}).
95-
if (SqlTypeUtil.equalSansNullability(typeFactory, current, candidate)) {
96-
return typeFactory.createTypeWithNullability(current, true);
87+
// Widen via Calcite's {@code leastRestrictive} — the same routine
88+
// {@code SqlLibraryOperators.ARRAY} uses for its return-type inference. For genuinely
89+
// incompatible operand types (INT + VARCHAR, …) it returns null; fall back to {@code ANY}
90+
// there to preserve the in-process Calcite engine's {@code Object[]} runtime semantics
91+
// that pre-existing tests rely on. Promote DECIMAL → DOUBLE on the way through: the row
92+
// codec on the analytics-engine route maps DECIMAL cells to {@code FloatingPoint(DOUBLE)}
93+
// anyway, and an explicit DECIMAL element type triggers Calcite's element coercion to
94+
// BigDecimal, which downstream Avatica array accessors and the JSON formatter render
95+
// inconsistently across paths.
96+
RelDataType least = typeFactory.leastRestrictive(java.util.List.of(current, candidate));
97+
if (least == null) {
98+
return typeFactory.createSqlType(SqlTypeName.ANY);
9799
}
98-
return typeFactory.createSqlType(SqlTypeName.ANY);
100+
if (least.getSqlTypeName() == SqlTypeName.DECIMAL) {
101+
return typeFactory.createTypeWithNullability(
102+
typeFactory.createSqlType(SqlTypeName.DOUBLE), true);
103+
}
104+
return least;
99105
}
100106

101107
public static class MVAppendImplementor implements NotNullImplementor {
102108
@Override
103109
public Expression implement(
104110
RexToLixTranslator translator, RexCall call, List<Expression> translatedOperands) {
111+
// Pre-cast each scalar operand to the call's element Java class so the result list is
112+
// homogeneously typed. Avatica's {@code AbstractCursor.ArrayAccessor} dispatches the
113+
// per-element accessor by the declared SQL type — e.g. {@code DoubleAccessor.getDouble}
114+
// does {@code (Double) value} — and would throw a runtime ClassCastException on an
115+
// {@code Integer} cell when the call's element type widens to DOUBLE. Array operands
116+
// pass through; their element-type alignment is the planner's responsibility.
117+
RelDataType elementType = call.getType().getComponentType();
118+
Class<?> elementClass =
119+
elementType == null ? Object.class : boxedJavaClass(elementType.getSqlTypeName());
120+
List<Expression> coerced = new ArrayList<>(translatedOperands.size());
121+
for (int i = 0; i < translatedOperands.size(); i++) {
122+
Expression op = translatedOperands.get(i);
123+
RelDataType opType = call.getOperands().get(i).getType();
124+
if (opType.getComponentType() != null || elementClass == Object.class) {
125+
coerced.add(op);
126+
} else {
127+
coerced.add(EnumUtils.convert(op, elementClass));
128+
}
129+
}
105130
return Expressions.call(
106131
Types.lookupMethod(MVAppendFunctionImpl.class, "mvappend", Object[].class),
107-
Expressions.newArrayInit(Object.class, translatedOperands));
132+
Expressions.newArrayInit(Object.class, coerced));
108133
}
109134
}
110135

111136
public static Object mvappend(Object... args) {
112137
return MVAppendCore.collectElements(args);
113138
}
139+
140+
private static Class<?> boxedJavaClass(SqlTypeName sqlType) {
141+
return switch (sqlType) {
142+
case BOOLEAN -> Boolean.class;
143+
case TINYINT -> Byte.class;
144+
case SMALLINT -> Short.class;
145+
case INTEGER -> Integer.class;
146+
case BIGINT -> Long.class;
147+
case FLOAT, REAL -> Float.class;
148+
case DOUBLE -> Double.class;
149+
case DECIMAL -> BigDecimal.class;
150+
case CHAR, VARCHAR -> String.class;
151+
default -> Object.class;
152+
};
153+
}
114154
}

0 commit comments

Comments
 (0)