diff --git a/integ-test/build.gradle b/integ-test/build.gradle index 4428668f15..ea0f6fce74 100644 --- a/integ-test/build.gradle +++ b/integ-test/build.gradle @@ -1147,6 +1147,23 @@ task integTestRemote(type: RestIntegTestTask) { // - subsearch.maxout is lowered as a LIMIT on the in-subquery semi-join's right side, // which the AE route does not honor, so the subsearch returns all rows. excludeTestsMatching '*CalcitePPLInSubqueryIT.testSubsearchMaxOut' + + // === Excludes: CalcitePPLConditionBuiltinFunctionIT route divergences === + // Each test also carries an in-test assumeNotAnalytics(...) recording the reason (see + // AnalyticsRouteLimitation); listed here so the AE-route skip set stays countable. + // - isnull/isnotnull on the object/struct parent field big5.aws: objects are flattened + // to dotted leaf columns and the struct parent is not a queryable column. + excludeTestsMatching '*CalcitePPLConditionBuiltinFunctionIT.testIsNullWithStruct' + excludeTestsMatching '*CalcitePPLConditionBuiltinFunctionIT.testIsNotNullWithStruct' + // - isnull/isnotnull on the nested field nested_simple.address: nested fields are + // stripped at index creation (the route can't store them). + excludeTestsMatching '*CalcitePPLConditionBuiltinFunctionIT.testIsNullWithNested' + excludeTestsMatching '*CalcitePPLConditionBuiltinFunctionIT.testIsNotNullWithNested' + // - concat('H', null): DataFusion treats NULL as empty string; v2/Calcite propagates NULL. + excludeTestsMatching '*CalcitePPLConditionBuiltinFunctionIT.testNullIfWithExpression' + // - earliest('now', utc_timestamp()): 'now' and utc_timestamp() resolve to the same + // instant on the route (true) but differ on v2 (false). + excludeTestsMatching '*CalcitePPLConditionBuiltinFunctionIT.testEarliestWithEval' } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLConditionBuiltinFunctionIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLConditionBuiltinFunctionIT.java index a05df6ba7c..e5f29e59bf 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLConditionBuiltinFunctionIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLConditionBuiltinFunctionIT.java @@ -5,7 +5,12 @@ package org.opensearch.sql.calcite.remote; +import static org.opensearch.sql.legacy.TestUtils.isIndexExist; import static org.opensearch.sql.legacy.TestsConstants.*; +import static org.opensearch.sql.util.AnalyticsRouteLimitation.CONCAT_NULL_AS_EMPTY; +import static org.opensearch.sql.util.AnalyticsRouteLimitation.EARLIEST_LATEST_NOW_CLOCK; +import static org.opensearch.sql.util.AnalyticsRouteLimitation.NESTED_FIELDS; +import static org.opensearch.sql.util.AnalyticsRouteLimitation.STRUCT_PARENT_FIELD; import static org.opensearch.sql.util.MatcherUtils.*; import static org.opensearch.sql.util.MatcherUtils.rows; @@ -22,22 +27,29 @@ public void init() throws Exception { super.init(); enableCalcite(); + // init() runs as @Before, before every test method. On the analytics route the parquet-backed + // store is append-only on same-_id PUT, so seed the extra docs only when the index is first + // created — otherwise they accumulate a duplicate per test method and inflate row counts. + boolean stateCountryWithNullExisted = + isIndexExist(client(), TEST_INDEX_STATE_COUNTRY_WITH_NULL); loadIndex(Index.STATE_COUNTRY); loadIndex(Index.STATE_COUNTRY_WITH_NULL); loadIndex(Index.CALCS); loadIndex(Index.NESTED_SIMPLE); loadIndex(Index.BIG5); - Request request1 = - new Request("PUT", "/" + TEST_INDEX_STATE_COUNTRY_WITH_NULL + "/_doc/7?refresh=true"); - request1.setJsonEntity( - "{\"name\":\" " - + " \",\"age\":27,\"state\":\"B.C\",\"country\":\"Canada\",\"year\":2023,\"month\":4}"); - client().performRequest(request1); - Request request2 = - new Request("PUT", "/" + TEST_INDEX_STATE_COUNTRY_WITH_NULL + "/_doc/8?refresh=true"); - request2.setJsonEntity( - "{\"name\":\"\",\"age\":57,\"state\":\"B.C\",\"country\":\"Canada\",\"year\":2023,\"month\":4}"); - client().performRequest(request2); + if (!stateCountryWithNullExisted) { + Request request1 = + new Request("PUT", "/" + TEST_INDEX_STATE_COUNTRY_WITH_NULL + "/_doc/7?refresh=true"); + request1.setJsonEntity( + "{\"name\":\" " + + " \",\"age\":27,\"state\":\"B.C\",\"country\":\"Canada\",\"year\":2023,\"month\":4}"); + client().performRequest(request1); + Request request2 = + new Request("PUT", "/" + TEST_INDEX_STATE_COUNTRY_WITH_NULL + "/_doc/8?refresh=true"); + request2.setJsonEntity( + "{\"name\":\"\",\"age\":57,\"state\":\"B.C\",\"country\":\"Canada\",\"year\":2023,\"month\":4}"); + client().performRequest(request2); + } } @Test @@ -54,6 +66,8 @@ public void testIsNull() throws IOException { @Test public void testIsNullWithStruct() throws IOException { + // Queries the object/struct parent field 'aws' directly. + assumeNotAnalytics(STRUCT_PARENT_FIELD); JSONObject actual = executeQuery("source=big5 | where isnull(aws) | fields aws"); verifySchema(actual, schema("aws", "struct")); verifyNumOfRows(actual, 0); @@ -61,6 +75,8 @@ public void testIsNullWithStruct() throws IOException { @Test public void testIsNullWithNested() throws IOException { + // Queries a nested field; the route strips nested fields at index creation. + assumeNotAnalytics(NESTED_FIELDS); JSONObject actual = executeQuery( String.format( @@ -124,6 +140,8 @@ public void testIsNotNullWithSingleNotEquals() throws IOException { @Test public void testIsNotNullWithStruct() throws IOException { + // Queries the object/struct parent field 'aws' directly. + assumeNotAnalytics(STRUCT_PARENT_FIELD); JSONObject actual = executeQuery("source=big5 | where isnotnull(aws) | fields aws"); verifySchema(actual, schema("aws", "struct")); verifyNumOfRows(actual, 3); @@ -131,6 +149,8 @@ public void testIsNotNullWithStruct() throws IOException { @Test public void testIsNotNullWithNested() throws IOException { + // Queries a nested field; the route strips nested fields at index creation. + assumeNotAnalytics(NESTED_FIELDS); JSONObject actual = executeQuery( String.format( @@ -165,6 +185,8 @@ public void testNullIf() throws IOException { @Test public void testNullIfWithExpression() throws IOException { + // concat('H', name) over the null-name row diverges (NULL-as-empty vs NULL-propagating). + assumeNotAnalytics(CONCAT_NULL_AS_EMPTY); JSONObject actual = executeQuery( String.format( @@ -354,6 +376,8 @@ public void testLatest() throws IOException { @Test public void testEarliestWithEval() throws IOException { + // earliest('now', utc_timestamp()) resolves true on the route but false on v2 (clock source). + assumeNotAnalytics(EARLIEST_LATEST_NOW_CLOCK); JSONObject actual = executeQuery( String.format( diff --git a/integ-test/src/test/java/org/opensearch/sql/util/AnalyticsRouteLimitation.java b/integ-test/src/test/java/org/opensearch/sql/util/AnalyticsRouteLimitation.java index 952fd08593..24a820eb23 100644 --- a/integ-test/src/test/java/org/opensearch/sql/util/AnalyticsRouteLimitation.java +++ b/integ-test/src/test/java/org/opensearch/sql/util/AnalyticsRouteLimitation.java @@ -133,7 +133,42 @@ public enum AnalyticsRouteLimitation { SUBSEARCH_MAXOUT_IN_SUBQUERY( "subsearch.maxout is not honored on the analytics-engine route: the LIMIT lowered onto the" + " in-subquery semi-join's right side is dropped, so the subsearch returns all rows" - + " regardless of the cap."); + + " regardless of the cap."), + + /** + * Querying an {@code object}/struct parent field directly (e.g. {@code isnull(aws)} where {@code + * aws} is an {@code object}) fails on the analytics-engine route with {@code FIELD_NOT_FOUND}. + * The route flattens objects into dotted leaf columns — {@code aws.cloudwatch.log_group} scans + * fine — but the struct parent is not exposed as a queryable column. Distinct from {@link + * #NESTED_FIELDS}: {@code object} parents survive in the OpenSearch mapping (they aren't stripped + * at load) yet still can't be referenced as a whole. + */ + STRUCT_PARENT_FIELD( + "Querying an object/struct parent field directly is unsupported on the analytics-engine" + + " route: objects are flattened to dotted leaf columns and the parent resolves to" + + " FIELD_NOT_FOUND."), + + /** + * {@code concat()} over a NULL argument diverges: the analytics-engine route (DataFusion) treats + * NULL as an empty string (e.g. {@code concat('H', null)} = {@code 'H'}), whereas the v2/Calcite + * engine propagates NULL ({@code concat('H', null)} = {@code null}). Any expression that depends + * on the NULL-propagating behavior over a possibly-null operand diverges. + */ + CONCAT_NULL_AS_EMPTY( + "concat() treats a NULL argument as an empty string on the analytics-engine route (DataFusion" + + " semantics), whereas the v2/Calcite engine propagates NULL."), + + /** + * {@code earliest('now', )} / {@code latest('now', )} where {@code } is {@code + * utc_timestamp()} diverge: on the analytics-engine route the relative-time {@code 'now'} and + * {@code utc_timestamp()} resolve to the same instant (so {@code earliest('now', now)} is {@code + * true}), whereas on the v2/Calcite path they differ (it is {@code false}) — a clock-source + * divergence between the relative-time evaluation and {@code utc_timestamp()}. + */ + EARLIEST_LATEST_NOW_CLOCK( + "earliest/latest with relative-time 'now' against utc_timestamp() diverges on the" + + " analytics-engine route: 'now' and utc_timestamp() resolve to the same instant" + + " (earliest('now', now) is true), but differ on the v2/Calcite path (false)."); private final String reason;