diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java index 17d99fb4fbb..5722014c241 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactory.java @@ -45,6 +45,7 @@ import org.apache.calcite.sql.SqlCollation; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.sql.type.SqlTypeUtil; +import org.checkerframework.checker.nullness.qual.Nullable; import org.opensearch.sql.calcite.type.AbstractExprRelDataType; import org.opensearch.sql.calcite.type.ExprBinaryType; import org.opensearch.sql.calcite.type.ExprDateType; @@ -377,6 +378,43 @@ public static boolean isNumericType(RelDataType fieldType) { return false; } + /** + * Preserves OpenSearch UDT types through set operations (UNION, INTERSECT, EXCEPT). When all + * input types share the same {@link AbstractExprRelDataType} with the same {@link + * AbstractExprRelDataType#getUdt() UDT}, the result retains the UDT wrapper instead of being + * downgraded to the underlying SQL type (e.g., VARCHAR). This is critical for operations like + * multisearch that use UNION ALL, where downstream operators (bin, span) rely on the UDT type to + * determine how to process the field. When inputs include non-UDT types or different UDTs, this + * method falls back to {@link super#leastRestrictive}. + * + * @param types the list of input {@link RelDataType} instances to find the least restrictive + * common type for + * @return the least restrictive {@link RelDataType} preserving the UDT wrapper when all inputs + * share the same UDT, or {@code null} if no common type exists (as determined by {@link + * super#leastRestrictive}) + */ + @Override + public @Nullable RelDataType leastRestrictive(List types) { + if (types.size() > 1) { + RelDataType first = types.get(0); + if (first instanceof AbstractExprRelDataType firstUdt) { + boolean anyNullable = false; + for (RelDataType t : types) { + if (t instanceof AbstractExprRelDataType udt && udt.getUdt() == firstUdt.getUdt()) { + anyNullable |= t.isNullable(); + } else { + return super.leastRestrictive(types); + } + } + if (anyNullable && !first.isNullable()) { + return firstUdt.createWithNullability(this, true); + } + return first; + } + } + return super.leastRestrictive(types); + } + /** * Checks if the RelDataType represents a time-based field (timestamp, date, or time). Supports * both standard SQL time types (including TIMESTAMP, TIMESTAMP_WITH_LOCAL_TIME_ZONE, DATE, TIME, diff --git a/core/src/test/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactoryTest.java b/core/src/test/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactoryTest.java new file mode 100644 index 00000000000..583d1aea6cb --- /dev/null +++ b/core/src/test/java/org/opensearch/sql/calcite/utils/OpenSearchTypeFactoryTest.java @@ -0,0 +1,129 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.utils; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.TYPE_FACTORY; + +import java.util.List; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.sql.type.SqlTypeName; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.calcite.type.AbstractExprRelDataType; +import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.ExprUDT; + +public class OpenSearchTypeFactoryTest { + + @Test + public void testLeastRestrictivePreservesUdtWhenAllInputsSameUdt() { + RelDataType ts1 = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP); + RelDataType ts2 = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(ts1, ts2)); + + assertNotNull(result); + assertInstanceOf(AbstractExprRelDataType.class, result); + assertEquals(ExprUDT.EXPR_TIMESTAMP, ((AbstractExprRelDataType) result).getUdt()); + } + + @Test + public void testLeastRestrictivePreservesUdtForDateType() { + RelDataType d1 = TYPE_FACTORY.createUDT(ExprUDT.EXPR_DATE); + RelDataType d2 = TYPE_FACTORY.createUDT(ExprUDT.EXPR_DATE); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(d1, d2)); + + assertNotNull(result); + assertInstanceOf(AbstractExprRelDataType.class, result); + assertEquals(ExprUDT.EXPR_DATE, ((AbstractExprRelDataType) result).getUdt()); + } + + @Test + public void testLeastRestrictivePreservesUdtForThreeInputs() { + RelDataType ts1 = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP); + RelDataType ts2 = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP); + RelDataType ts3 = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(ts1, ts2, ts3)); + + assertNotNull(result); + assertInstanceOf(AbstractExprRelDataType.class, result); + assertEquals(ExprUDT.EXPR_TIMESTAMP, ((AbstractExprRelDataType) result).getUdt()); + } + + @Test + public void testLeastRestrictiveReturnsNullableWhenAnyInputIsNullable() { + RelDataType nonNullable = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP, false); + RelDataType nullable = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP, true); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(nonNullable, nullable)); + + assertNotNull(result); + assertInstanceOf(AbstractExprRelDataType.class, result); + assertEquals(ExprUDT.EXPR_TIMESTAMP, ((AbstractExprRelDataType) result).getUdt()); + assertTrue(result.isNullable()); + } + + @Test + public void testLeastRestrictiveReturnsNullableWhenFirstNullableSecondNot() { + RelDataType nullable = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP, true); + RelDataType nonNullable = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP, false); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(nullable, nonNullable)); + + assertNotNull(result); + assertInstanceOf(AbstractExprRelDataType.class, result); + assertTrue(result.isNullable()); + } + + @Test + public void testLeastRestrictiveFallsBackForMixedUdtAndNonUdt() { + RelDataType udt = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP); + RelDataType plain = TYPE_FACTORY.createSqlType(SqlTypeName.VARCHAR); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(udt, plain)); + + // Falls back to super.leastRestrictive — both backed by VARCHAR, so result is non-null + assertNotNull(result); + assertEquals(SqlTypeName.VARCHAR, result.getSqlTypeName()); + } + + @Test + public void testLeastRestrictiveFallsBackForDifferentUdts() { + RelDataType timestamp = TYPE_FACTORY.createUDT(ExprUDT.EXPR_TIMESTAMP); + RelDataType date = TYPE_FACTORY.createUDT(ExprUDT.EXPR_DATE); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(timestamp, date)); + + // Different UDTs — falls back to super.leastRestrictive, both backed by VARCHAR + assertNotNull(result); + assertEquals(SqlTypeName.VARCHAR, result.getSqlTypeName()); + } + + @Test + public void testLeastRestrictiveDelegatesToSuperForSingleType() { + RelDataType single = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(single)); + + assertNotNull(result); + assertEquals(SqlTypeName.INTEGER, result.getSqlTypeName()); + } + + @Test + public void testLeastRestrictiveDelegatesToSuperForPlainTypes() { + RelDataType int1 = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + RelDataType int2 = TYPE_FACTORY.createSqlType(SqlTypeName.INTEGER); + + RelDataType result = TYPE_FACTORY.leastRestrictive(List.of(int1, int2)); + + assertNotNull(result); + assertEquals(SqlTypeName.INTEGER, result.getSqlTypeName()); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMultisearchCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMultisearchCommandIT.java index 393b0a4a501..d0a4882fb1f 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMultisearchCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMultisearchCommandIT.java @@ -152,10 +152,10 @@ public void testMultisearchWithTimestampInterleaving() throws IOException { verifySchema( result, - schema("@timestamp", null, "string"), + schema("@timestamp", null, "timestamp"), schema("category", null, "string"), schema("value", null, "int"), - schema("timestamp", null, "string")); + schema("timestamp", null, "timestamp")); verifyDataRows( result, @@ -344,6 +344,95 @@ public void testMultisearchCrossIndexFieldSelection() throws IOException { rows(null, null, "Times Square", 1002)); } + // ======================================================================== + // Reproduction tests for GitHub issues #5145, #5146, #5147 + // ======================================================================== + + /** Reproduce #5145: multisearch without further processing should return all rows. */ + @Test + public void testMultisearchWithoutFurtherProcessing() throws IOException { + JSONObject result = + executeQuery( + "| multisearch [search source=opensearch-sql_test_index_time_data | where category =" + + " \\\"A\\\"] [search source=opensearch-sql_test_index_time_data | where category" + + " = \\\"B\\\"]"); + + verifySchema( + result, + schema("@timestamp", null, "timestamp"), + schema("category", null, "string"), + schema("value", null, "int"), + schema("timestamp", null, "timestamp")); + + // category A has 26 rows, category B has 25 rows = 51 total + assertEquals(51, result.getInt("total")); + } + + /** Reproduce #5146: span expression used after multisearch should work. */ + @Test + public void testMultisearchWithSpanExpression() throws IOException { + JSONObject result = + executeQuery( + "| multisearch [search source=opensearch-sql_test_index_time_data | where category =" + + " \\\"A\\\"] [search source=opensearch-sql_test_index_time_data2 | where category" + + " = \\\"E\\\"] | stats count() by span(@timestamp, 1d)"); + + verifySchema( + result, + schema("count()", null, "bigint"), + schema("span(@timestamp,1d)", null, "timestamp")); + + // Category A: 26 rows spanning Jul 28 – Aug 1; Category E: 10 rows spanning Jul 30 – Aug 1 + verifyDataRows( + result, + rows(7L, "2025-07-28 00:00:00"), + rows(6L, "2025-07-29 00:00:00"), + rows(8L, "2025-07-30 00:00:00"), + rows(12L, "2025-07-31 00:00:00"), + rows(3L, "2025-08-01 00:00:00")); + } + + /** Reproduce #5147: bin command after multisearch should produce non-null @timestamp. */ + @Test + public void testMultisearchBinTimestamp() throws IOException { + JSONObject result = + executeQuery( + "| multisearch [search source=opensearch-sql_test_index_time_data | where category =" + + " \\\"A\\\"] [search source=opensearch-sql_test_index_time_data2 | where category" + + " = \\\"E\\\"] | fields @timestamp, category, value | bin @timestamp span=1d"); + + verifySchema( + result, + schema("category", null, "string"), + schema("value", null, "int"), + schema("@timestamp", null, "timestamp")); + + // bin floors @timestamp to 1-day boundaries; 26 A-rows + 10 E-rows = 36 total + assertEquals(36, result.getInt("total")); + } + + /** Reproduce #5147 full pattern: bin + stats after multisearch. */ + @Test + public void testMultisearchBinAndStats() throws IOException { + JSONObject result = + executeQuery( + "| multisearch [search source=opensearch-sql_test_index_time_data | where category =" + + " \\\"A\\\"] [search source=opensearch-sql_test_index_time_data2 | where category" + + " = \\\"E\\\"] | bin @timestamp span=1d | stats count() by @timestamp"); + + verifySchema( + result, schema("count()", null, "bigint"), schema("@timestamp", null, "timestamp")); + + // Category A: 26 rows spanning Jul 28 – Aug 1; Category E: 10 rows spanning Jul 30 – Aug 1 + verifyDataRows( + result, + rows(7L, "2025-07-28 00:00:00"), + rows(6L, "2025-07-29 00:00:00"), + rows(8L, "2025-07-30 00:00:00"), + rows(12L, "2025-07-31 00:00:00"), + rows(3L, "2025-08-01 00:00:00")); + } + @Test public void testMultisearchTypeConflictWithStats() { Exception exception = diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAppendCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAppendCommandIT.java index d01ddfb2a44..6372b818b2b 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAppendCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAppendCommandIT.java @@ -253,7 +253,7 @@ public void testAppendSchemaMergeWithTimestampUDT() throws IOException { schema("account_number", "bigint"), schema("firstname", "string"), schema("age", "int"), - schema("birthdate", "string")); + schema("birthdate", "timestamp")); verifyDataRows(actual, rows(32, null, 34, "2018-08-11 00:00:00")); }