diff --git a/integ-test/build.gradle b/integ-test/build.gradle index 27de0eee9e..0d46f7feea 100644 --- a/integ-test/build.gradle +++ b/integ-test/build.gradle @@ -1102,6 +1102,28 @@ task integTestRemote(type: RestIntegTestTask) { // - Exact == on a dynamically-mapped string (email) returns no rows: dynamic mapping // omits the .keyword sub-field on the AE route. excludeTestsMatching '*WhereCommandIT.testDoubleEqualWithSpecialCharacters' + + // === Excludes: CalciteMultisearchCommandIT route divergences === + // Each test also carries an in-test assumeNotAnalytics(...) recording the reason (see + // AnalyticsRouteLimitation); listed here so the AE-route skip set stays countable. + // - Same-index subsearch conflation: when every subsearch reads the same index, the AE + // route applies the first subsearch's filter to all of them, so counts are wrong. + excludeTestsMatching '*CalciteMultisearchCommandIT.testMultisearchWithThreeSubsearches' + excludeTestsMatching '*CalciteMultisearchCommandIT.testMultisearchWithComplexAggregation' + excludeTestsMatching '*CalciteMultisearchCommandIT.testMultisearchWithoutFurtherProcessing' + // - Column-order divergence: multisearch over different indices returns merged columns + // in a different order than the v2/Calcite path. + excludeTestsMatching '*CalciteMultisearchCommandIT.testMultisearchWithTimestampInterleaving' + + // === Excludes: CalciteBinCommandIT route divergences === + // bin on a time field then grouping by it: the AE route types the date-histogram bucket + // column as string (not timestamp) AND produces a different bucket set (auto-histogram + // span / empty-bucket filtering differ), so both schema and row counts diverge. Each + // test also carries an in-test assumeNotAnalytics(...) (see AnalyticsRouteLimitation). + excludeTestsMatching '*CalciteBinCommandIT.testStatsWithBinsOnTimeField_Count' + excludeTestsMatching '*CalciteBinCommandIT.testStatsWithBinsOnTimeField_Avg' + excludeTestsMatching '*CalciteBinCommandIT.testStatsWithBinsOnTimeAndTermField_Count' + excludeTestsMatching '*CalciteBinCommandIT.testStatsWithBinsOnTimeAndTermField_Avg' } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java index 3fb8219fe1..b202c8da48 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java @@ -9,6 +9,7 @@ import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.opensearch.sql.legacy.TestsConstants.*; +import static org.opensearch.sql.util.AnalyticsRouteLimitation.BIN_TIME_FIELD_BUCKETING; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; @@ -872,6 +873,7 @@ public void testBinFloatingPointSpanWithStatsCount() throws IOException { public void testStatsWithBinsOnTimeField_Count() throws IOException { // TODO: Remove this after addressing https://github.com/opensearch-project/sql/issues/4317 enabledOnlyWhenPushdownIsEnabled(); + assumeNotAnalytics(BIN_TIME_FIELD_BUCKETING); JSONObject result = executeQuery("source=events_null | bin @timestamp bins=3 | stats count() by @timestamp"); @@ -910,6 +912,7 @@ public void testStatsWithBinsOnTimeField_Count() throws IOException { public void testStatsWithBinsOnTimeField_Avg() throws IOException { // TODO: Remove this after addressing https://github.com/opensearch-project/sql/issues/4317 enabledOnlyWhenPushdownIsEnabled(); + assumeNotAnalytics(BIN_TIME_FIELD_BUCKETING); JSONObject result = executeQuery( @@ -951,6 +954,7 @@ public void testStatsWithBinsOnTimeField_Avg() throws IOException { public void testStatsWithBinsOnTimeAndTermField_Count() throws IOException { // TODO: Remove this after addressing https://github.com/opensearch-project/sql/issues/4317 enabledOnlyWhenPushdownIsEnabled(); + assumeNotAnalytics(BIN_TIME_FIELD_BUCKETING); JSONObject result = executeQuery( @@ -974,6 +978,7 @@ public void testStatsWithBinsOnTimeAndTermField_Count() throws IOException { public void testStatsWithBinsOnTimeAndTermField_Avg() throws IOException { // TODO: Remove this after addressing https://github.com/opensearch-project/sql/issues/4317 enabledOnlyWhenPushdownIsEnabled(); + assumeNotAnalytics(BIN_TIME_FIELD_BUCKETING); JSONObject result = executeQuery( diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMultisearchCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMultisearchCommandIT.java index a738132744..dd32c8eb8e 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMultisearchCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMultisearchCommandIT.java @@ -6,6 +6,8 @@ package org.opensearch.sql.calcite.remote; import static org.opensearch.sql.legacy.TestsConstants.*; +import static org.opensearch.sql.util.AnalyticsRouteLimitation.MULTISEARCH_COLUMN_ORDER; +import static org.opensearch.sql.util.AnalyticsRouteLimitation.MULTISEARCH_SAME_INDEX_CONFLATION; import static org.opensearch.sql.util.MatcherUtils.rows; import static org.opensearch.sql.util.MatcherUtils.schema; import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; @@ -65,6 +67,7 @@ public void testMultisearchSuccessRatePattern() throws IOException { @Test public void testMultisearchWithThreeSubsearches() throws IOException { + assumeNotAnalytics(MULTISEARCH_SAME_INDEX_CONFLATION); JSONObject result = executeQuery( String.format( @@ -81,6 +84,7 @@ public void testMultisearchWithThreeSubsearches() throws IOException { @Test public void testMultisearchWithComplexAggregation() throws IOException { + assumeNotAnalytics(MULTISEARCH_SAME_INDEX_CONFLATION); JSONObject result = executeQuery( String.format( @@ -143,6 +147,7 @@ public void testMultisearchWithFieldsProjection() throws IOException { @Test public void testMultisearchWithTimestampInterleaving() throws IOException { + assumeNotAnalytics(MULTISEARCH_COLUMN_ORDER); JSONObject result = executeQuery( "| multisearch [search" @@ -353,6 +358,7 @@ public void testMultisearchCrossIndexFieldSelection() throws IOException { /** Reproduce #5145: multisearch without further processing should return all rows. */ @Test public void testMultisearchWithoutFurtherProcessing() throws IOException { + assumeNotAnalytics(MULTISEARCH_SAME_INDEX_CONFLATION); JSONObject result = executeQuery( "| multisearch [search source=opensearch-sql_test_index_time_data | where category =" diff --git a/integ-test/src/test/java/org/opensearch/sql/util/AnalyticsRouteLimitation.java b/integ-test/src/test/java/org/opensearch/sql/util/AnalyticsRouteLimitation.java index 0510baea65..b16c8a1065 100644 --- a/integ-test/src/test/java/org/opensearch/sql/util/AnalyticsRouteLimitation.java +++ b/integ-test/src/test/java/org/opensearch/sql/util/AnalyticsRouteLimitation.java @@ -54,7 +54,38 @@ public enum AnalyticsRouteLimitation { */ DOC_MUTATION( "Test mutates docs via PUT+DELETE, which DataFormatAwareEngine (analytics-engine storage" - + " path) does not support."); + + " path) does not support."), + + /** + * When every {@code multisearch} subsearch reads the same index, the analytics-engine route + * applies the first subsearch's filter to all of them (each keeps its own {@code eval} label), so + * later subsearches silently return the first subsearch's rows. Produces wrong counts/duplication + * — the route can't be asserted against. Reproduces single-shard. + */ + MULTISEARCH_SAME_INDEX_CONFLATION( + "multisearch with same-index subsearches conflates on the analytics-engine route: every" + + " subsearch executes the first subsearch's filter, so counts/rows are wrong."), + + /** + * A {@code multisearch} over heterogeneous indices returns the merged columns in a different + * order than the v2/Calcite path (e.g. trailing fields swapped), so row-order-sensitive + * assertions diverge even though the values are correct. + */ + MULTISEARCH_COLUMN_ORDER( + "multisearch over different indices returns merged columns in a different order on the" + + " analytics-engine route than the v2/Calcite path."), + + /** + * Binning a time field then grouping by it ({@code bin bins=N | stats ... by + * }) diverges on the analytics-engine route: the date-histogram bucket column comes + * back typed {@code string} rather than {@code timestamp}, and the route produces a different + * bucket set (different auto-histogram span / empty buckets not filtered) so the row counts don't + * match the v2/Calcite path. + */ + BIN_TIME_FIELD_BUCKETING( + "bin on a time field then grouping by it diverges on the analytics-engine route: the bucket" + + " column is typed string (not timestamp) and the bucket set differs from the v2/Calcite" + + " path."); private final String reason;