Skip to content

Commit cd45dd1

Browse files
committed
Stabilize CalcitePPLCaseFunctionIT on the analytics-engine route
Two test-infra issues caused 5 of the class's 6 analytics-route failures: - init() seeds four extra weblogs docs via unconditional raw PUTs (appendDataForBadResponse). init() runs as @before before every test method, and the analytics-engine parquet-backed store is append-only on same-_id PUT, so the docs accumulated a duplicate per method and inflated row counts across the suite. Guard the seeding on a pre-loadIndex isIndexExist check. - The otel_logs dataset has a multi-value attributes.email.invalid_recipients array, which the parquet/composite store rejects ('Cannot accept multiple values for field ... of type keyword'), aborting init() for every test. Only testNestedCaseAggWithAutoDateHistogram uses otel_logs, so skip the load on the analytics route. The remaining failure is a genuine route divergence, skipped via the assumeNotAnalytics(...) registry plus a matching excludeTestsMatching entry: - BIN_TIME_FIELD_BUCKETING: bin @timestamp then grouping by it returns the date-histogram bucket column typed string (not timestamp) on the AE route — testNestedCaseAggWithAutoDateHistogram. Results (-Dtests.analytics.parquet_indices=true against the analytics route): CalcitePPLCaseFunctionIT: 3/9 -> 8/9 pass, 1 excluded, 0 fail v2/Calcite route unchanged: 9/9 pass. Signed-off-by: Kai Huang <ahkcs@amazon.com>
1 parent 4c1165a commit cd45dd1

2 files changed

Lines changed: 28 additions & 2 deletions

File tree

integ-test/build.gradle

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1164,6 +1164,14 @@ task integTestRemote(type: RestIntegTestTask) {
11641164
// - earliest('now', utc_timestamp()): 'now' and utc_timestamp() resolve to the same
11651165
// instant on the route (true) but differ on v2 (false).
11661166
excludeTestsMatching '*CalcitePPLConditionBuiltinFunctionIT.testEarliestWithEval'
1167+
1168+
// === Excludes: CalcitePPLCaseFunctionIT route divergences ===
1169+
// The rest of the class passes once the weblogs raw-PUT seeding is guarded against
1170+
// append-only accumulation and the multi-value otel_logs dataset is skipped on the AE
1171+
// route (both in the IT's init()). This one carries an in-test assumeNotAnalytics(...).
1172+
// - bin @timestamp then grouping by it returns the bucket column typed string (not
1173+
// timestamp) on the AE route.
1174+
excludeTestsMatching '*CalcitePPLCaseFunctionIT.testNestedCaseAggWithAutoDateHistogram'
11671175
}
11681176
}
11691177

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLCaseFunctionIT.java

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,12 @@
55

66
package org.opensearch.sql.calcite.remote;
77

8+
import static org.opensearch.sql.legacy.TestUtils.isIndexExist;
89
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK;
910
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_OTEL_LOGS;
1011
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_STATE_COUNTRY_WITH_NULL;
1112
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WEBLOGS;
13+
import static org.opensearch.sql.util.AnalyticsRouteLimitation.BIN_TIME_FIELD_BUCKETING;
1214
import static org.opensearch.sql.util.MatcherUtils.closeTo;
1315
import static org.opensearch.sql.util.MatcherUtils.rows;
1416
import static org.opensearch.sql.util.MatcherUtils.schema;
@@ -31,12 +33,25 @@ public void init() throws Exception {
3133
super.init();
3234
enableCalcite();
3335

36+
// init() runs as @Before, before every test method. On the analytics route the parquet-backed
37+
// store is append-only on same-_id PUT, so seed the extra weblogs docs only when the index is
38+
// first created — otherwise they accumulate a duplicate per test method and inflate row counts.
39+
boolean weblogsExisted = isIndexExist(client(), TEST_INDEX_WEBLOGS);
3440
loadIndex(Index.WEBLOG);
3541
loadIndex(Index.TIME_TEST_DATA);
3642
loadIndex(Index.STATE_COUNTRY_WITH_NULL);
3743
loadIndex(Index.BANK);
38-
loadIndex(Index.OTELLOGS);
39-
appendDataForBadResponse();
44+
// The otel_logs dataset has a multi-value attributes.email.invalid_recipients array, which the
45+
// parquet/composite store rejects ("Cannot accept multiple values for field ... of type
46+
// keyword"), aborting init() for every test. Only testNestedCaseAggWithAutoDateHistogram uses
47+
// this index and it is skipped on the analytics route (BIN_TIME_FIELD_BUCKETING), so skip the
48+
// load there.
49+
if (!isAnalyticsParquetIndicesEnabled()) {
50+
loadIndex(Index.OTELLOGS);
51+
}
52+
if (!weblogsExisted) {
53+
appendDataForBadResponse();
54+
}
4055
}
4156

4257
private void appendDataForBadResponse() throws IOException {
@@ -484,6 +499,9 @@ public void testNestedCaseAggWithAutoDateHistogram() throws IOException {
484499
"The query cannot be executed when pushdown is disabled due to implementation defects of"
485500
+ " the bin command",
486501
isPushdownDisabled());
502+
// bin @timestamp then grouping by it returns the bucket column typed string (not timestamp) on
503+
// the analytics route; the query also reads otel_logs, which isn't loaded on that route.
504+
assumeNotAnalytics(BIN_TIME_FIELD_BUCKETING);
487505
JSONObject actual1 =
488506
executeQuery(
489507
String.format(

0 commit comments

Comments
 (0)