Skip to content

Commit da97639

Browse files
committed
Bring CalciteBinCommandIT and CalciteMultisearchCommandIT to parity on the analytics-engine route
Skip the analytics-route divergences in both ITs behind assumeNotAnalytics(...) and the gradle exclude list, mirroring the CalciteWhereCommandIT pattern. CalciteBinCommandIT (4 tests): bin on a time field then grouping by it (bin <timefield> bins=N | stats ... by <timefield>) diverges — the date-histogram bucket column is typed string (not timestamp) and the route produces a different bucket set (auto-histogram span / empty-bucket filtering differ), so both schema and row counts diverge. CalciteMultisearchCommandIT (4 tests): same-index subsearch conflation (every subsearch executes the first subsearch's filter, producing wrong counts) on 3 tests, and merged-column-order divergence over heterogeneous indices on 1. Both are analytics-engine behaviors, recorded as AnalyticsRouteLimitation constants. The v2/Calcite path is unchanged — all tests still run and pass there. Signed-off-by: Kai Huang <ahkcs@amazon.com>
1 parent 9aad427 commit da97639

4 files changed

Lines changed: 65 additions & 1 deletion

File tree

integ-test/build.gradle

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1102,6 +1102,28 @@ task integTestRemote(type: RestIntegTestTask) {
11021102
// - Exact == on a dynamically-mapped string (email) returns no rows: dynamic mapping
11031103
// omits the .keyword sub-field on the AE route.
11041104
excludeTestsMatching '*WhereCommandIT.testDoubleEqualWithSpecialCharacters'
1105+
1106+
// === Excludes: CalciteMultisearchCommandIT route divergences ===
1107+
// Each test also carries an in-test assumeNotAnalytics(...) recording the reason (see
1108+
// AnalyticsRouteLimitation); listed here so the AE-route skip set stays countable.
1109+
// - Same-index subsearch conflation: when every subsearch reads the same index, the AE
1110+
// route applies the first subsearch's filter to all of them, so counts are wrong.
1111+
excludeTestsMatching '*CalciteMultisearchCommandIT.testMultisearchWithThreeSubsearches'
1112+
excludeTestsMatching '*CalciteMultisearchCommandIT.testMultisearchWithComplexAggregation'
1113+
excludeTestsMatching '*CalciteMultisearchCommandIT.testMultisearchWithoutFurtherProcessing'
1114+
// - Column-order divergence: multisearch over different indices returns merged columns
1115+
// in a different order than the v2/Calcite path.
1116+
excludeTestsMatching '*CalciteMultisearchCommandIT.testMultisearchWithTimestampInterleaving'
1117+
1118+
// === Excludes: CalciteBinCommandIT route divergences ===
1119+
// bin on a time field then grouping by it: the AE route types the date-histogram bucket
1120+
// column as string (not timestamp) AND produces a different bucket set (auto-histogram
1121+
// span / empty-bucket filtering differ), so both schema and row counts diverge. Each
1122+
// test also carries an in-test assumeNotAnalytics(...) (see AnalyticsRouteLimitation).
1123+
excludeTestsMatching '*CalciteBinCommandIT.testStatsWithBinsOnTimeField_Count'
1124+
excludeTestsMatching '*CalciteBinCommandIT.testStatsWithBinsOnTimeField_Avg'
1125+
excludeTestsMatching '*CalciteBinCommandIT.testStatsWithBinsOnTimeAndTermField_Count'
1126+
excludeTestsMatching '*CalciteBinCommandIT.testStatsWithBinsOnTimeAndTermField_Avg'
11051127
}
11061128
}
11071129

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinCommandIT.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import static org.junit.Assert.assertTrue;
1010
import static org.junit.jupiter.api.Assertions.assertThrows;
1111
import static org.opensearch.sql.legacy.TestsConstants.*;
12+
import static org.opensearch.sql.util.AnalyticsRouteLimitation.BIN_TIME_FIELD_BUCKETING;
1213
import static org.opensearch.sql.util.MatcherUtils.rows;
1314
import static org.opensearch.sql.util.MatcherUtils.schema;
1415
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;
@@ -872,6 +873,7 @@ public void testBinFloatingPointSpanWithStatsCount() throws IOException {
872873
public void testStatsWithBinsOnTimeField_Count() throws IOException {
873874
// TODO: Remove this after addressing https://github.com/opensearch-project/sql/issues/4317
874875
enabledOnlyWhenPushdownIsEnabled();
876+
assumeNotAnalytics(BIN_TIME_FIELD_BUCKETING);
875877

876878
JSONObject result =
877879
executeQuery("source=events_null | bin @timestamp bins=3 | stats count() by @timestamp");
@@ -910,6 +912,7 @@ public void testStatsWithBinsOnTimeField_Count() throws IOException {
910912
public void testStatsWithBinsOnTimeField_Avg() throws IOException {
911913
// TODO: Remove this after addressing https://github.com/opensearch-project/sql/issues/4317
912914
enabledOnlyWhenPushdownIsEnabled();
915+
assumeNotAnalytics(BIN_TIME_FIELD_BUCKETING);
913916

914917
JSONObject result =
915918
executeQuery(
@@ -951,6 +954,7 @@ public void testStatsWithBinsOnTimeField_Avg() throws IOException {
951954
public void testStatsWithBinsOnTimeAndTermField_Count() throws IOException {
952955
// TODO: Remove this after addressing https://github.com/opensearch-project/sql/issues/4317
953956
enabledOnlyWhenPushdownIsEnabled();
957+
assumeNotAnalytics(BIN_TIME_FIELD_BUCKETING);
954958

955959
JSONObject result =
956960
executeQuery(
@@ -974,6 +978,7 @@ public void testStatsWithBinsOnTimeAndTermField_Count() throws IOException {
974978
public void testStatsWithBinsOnTimeAndTermField_Avg() throws IOException {
975979
// TODO: Remove this after addressing https://github.com/opensearch-project/sql/issues/4317
976980
enabledOnlyWhenPushdownIsEnabled();
981+
assumeNotAnalytics(BIN_TIME_FIELD_BUCKETING);
977982

978983
JSONObject result =
979984
executeQuery(

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteMultisearchCommandIT.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
package org.opensearch.sql.calcite.remote;
77

88
import static org.opensearch.sql.legacy.TestsConstants.*;
9+
import static org.opensearch.sql.util.AnalyticsRouteLimitation.MULTISEARCH_COLUMN_ORDER;
10+
import static org.opensearch.sql.util.AnalyticsRouteLimitation.MULTISEARCH_SAME_INDEX_CONFLATION;
911
import static org.opensearch.sql.util.MatcherUtils.rows;
1012
import static org.opensearch.sql.util.MatcherUtils.schema;
1113
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;
@@ -65,6 +67,7 @@ public void testMultisearchSuccessRatePattern() throws IOException {
6567

6668
@Test
6769
public void testMultisearchWithThreeSubsearches() throws IOException {
70+
assumeNotAnalytics(MULTISEARCH_SAME_INDEX_CONFLATION);
6871
JSONObject result =
6972
executeQuery(
7073
String.format(
@@ -81,6 +84,7 @@ public void testMultisearchWithThreeSubsearches() throws IOException {
8184

8285
@Test
8386
public void testMultisearchWithComplexAggregation() throws IOException {
87+
assumeNotAnalytics(MULTISEARCH_SAME_INDEX_CONFLATION);
8488
JSONObject result =
8589
executeQuery(
8690
String.format(
@@ -143,6 +147,7 @@ public void testMultisearchWithFieldsProjection() throws IOException {
143147

144148
@Test
145149
public void testMultisearchWithTimestampInterleaving() throws IOException {
150+
assumeNotAnalytics(MULTISEARCH_COLUMN_ORDER);
146151
JSONObject result =
147152
executeQuery(
148153
"| multisearch [search"
@@ -353,6 +358,7 @@ public void testMultisearchCrossIndexFieldSelection() throws IOException {
353358
/** Reproduce #5145: multisearch without further processing should return all rows. */
354359
@Test
355360
public void testMultisearchWithoutFurtherProcessing() throws IOException {
361+
assumeNotAnalytics(MULTISEARCH_SAME_INDEX_CONFLATION);
356362
JSONObject result =
357363
executeQuery(
358364
"| multisearch [search source=opensearch-sql_test_index_time_data | where category ="

integ-test/src/test/java/org/opensearch/sql/util/AnalyticsRouteLimitation.java

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,38 @@ public enum AnalyticsRouteLimitation {
5454
*/
5555
DOC_MUTATION(
5656
"Test mutates docs via PUT+DELETE, which DataFormatAwareEngine (analytics-engine storage"
57-
+ " path) does not support.");
57+
+ " path) does not support."),
58+
59+
/**
60+
* When every {@code multisearch} subsearch reads the same index, the analytics-engine route
61+
* applies the first subsearch's filter to all of them (each keeps its own {@code eval} label), so
62+
* later subsearches silently return the first subsearch's rows. Produces wrong counts/duplication
63+
* — the route can't be asserted against. Reproduces single-shard.
64+
*/
65+
MULTISEARCH_SAME_INDEX_CONFLATION(
66+
"multisearch with same-index subsearches conflates on the analytics-engine route: every"
67+
+ " subsearch executes the first subsearch's filter, so counts/rows are wrong."),
68+
69+
/**
70+
* A {@code multisearch} over heterogeneous indices returns the merged columns in a different
71+
* order than the v2/Calcite path (e.g. trailing fields swapped), so row-order-sensitive
72+
* assertions diverge even though the values are correct.
73+
*/
74+
MULTISEARCH_COLUMN_ORDER(
75+
"multisearch over different indices returns merged columns in a different order on the"
76+
+ " analytics-engine route than the v2/Calcite path."),
77+
78+
/**
79+
* Binning a time field then grouping by it ({@code bin <timefield> bins=N | stats ... by
80+
* <timefield>}) diverges on the analytics-engine route: the date-histogram bucket column comes
81+
* back typed {@code string} rather than {@code timestamp}, and the route produces a different
82+
* bucket set (different auto-histogram span / empty buckets not filtered) so the row counts don't
83+
* match the v2/Calcite path.
84+
*/
85+
BIN_TIME_FIELD_BUCKETING(
86+
"bin on a time field then grouping by it diverges on the analytics-engine route: the bucket"
87+
+ " column is typed string (not timestamp) and the bucket set differs from the v2/Calcite"
88+
+ " path.");
5889

5990
private final String reason;
6091

0 commit comments

Comments
 (0)