Skip to content

Commit 6e7e1f0

Browse files
committed
test: add analytics-engine regression IT for singleton stack-corruption
CalciteDatetimeUdtNormalizeRegressionIT exercises the failure pattern that triggered the cluster-side NoSuchElementException: stats + distinct_count over datetime columns, repeated 20 times to amplify any plan() carry-over. The IT is harness-aware: - Without `-Dtests.analytics.force_routing=true`: queries go through the V2 / Calcite engine path. The DatetimeUdtNormalizeRule path is not exercised, so the IT passes as a baseline correctness check. - With `-Dtests.analytics.force_routing=true -Dtests.analytics.parquet_indices=true`: every query routes through the analytics-engine path and hits the DatetimeUdtNormalizeRule shuttle that this PR fixes. The 20-iteration pattern surfaces any remaining singleton-stack carry-over. CI's :integTest task (in-process testCluster without analytics-engine) runs the IT through the V2 path, which is safe and fast. The analytics-engine verification path is via :integTestRemote against an externally-managed cluster built per `docs/dev/ppl-analytics-engine-routing.md`. Signed-off-by: Kai Huang <ahkcs@amazon.com>
1 parent 35bf78f commit 6e7e1f0

1 file changed

Lines changed: 117 additions & 0 deletions

File tree

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.calcite.remote;
7+
8+
import static org.junit.Assert.assertNotNull;
9+
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DATE_FORMATS;
10+
11+
import java.io.IOException;
12+
import org.json.JSONObject;
13+
import org.junit.jupiter.api.Test;
14+
import org.opensearch.sql.ppl.PPLIntegTestCase;
15+
16+
/**
17+
* Regression IT for the {@code DatetimeUdtNormalizeRule} / {@code DatetimeOutputCastRule}
18+
* singleton-stack-corruption bug.
19+
*
20+
* <p>Both rules extend Calcite's {@code RelHomogeneousShuttle}, which inherits a stateful {@code
21+
* Deque<RelNode>} stack from {@code RelShuttleImpl}. Earlier code returned the same {@code
22+
* INSTANCE} of each rule from {@code DatetimeExtension.postAnalysisRules()} on every {@code
23+
* UnifiedQueryPlanner.plan()} call. If any traversal ever finished with an unbalanced stack,
24+
* residual entries persisted to the next query and the next {@code visitChild()} popped a stale or
25+
* empty stack — surfacing as {@code NoSuchElementException} at {@code RelShuttleImpl.visitChild}
26+
* line 67.
27+
*
28+
* <p>The failure was reported as intermittent on dashboards issuing field-statistics queries of the
29+
* shape {@code stats count() as field_count, distinct_count(field)} against parquet-backed indices.
30+
* This IT runs the same query shape repeatedly against a parquet-backed (composite) index with
31+
* multiple datetime columns to exercise the analytics-engine route end-to-end.
32+
*
33+
* <p>Run via:
34+
*
35+
* <pre>{@code
36+
* ./gradlew :integ-test:integTestRemote \
37+
* -Dtests.rest.cluster=localhost:9200 -Dtests.cluster=localhost:9300 \
38+
* -Dtests.clustername=runTask \
39+
* -Dtests.analytics.force_routing=true \
40+
* -Dtests.analytics.parquet_indices=true \
41+
* --tests org.opensearch.sql.calcite.remote.CalciteDatetimeUdtNormalizeRegressionIT
42+
* }</pre>
43+
*/
44+
public class CalciteDatetimeUdtNormalizeRegressionIT extends PPLIntegTestCase {
45+
46+
/** Number of repetitions per test. Singleton bug surfaces order-dependent across plan() calls. */
47+
private static final int ITERATIONS = 20;
48+
49+
@Override
50+
public void init() throws Exception {
51+
super.init();
52+
enableCalcite();
53+
54+
// DATE_FORMATS index has many date columns of different formats / precisions, which is what
55+
// dashboard field-statistics panels iterate over. Loaded through the helper so that with
56+
// -Dtests.analytics.parquet_indices=true it gets provisioned as a parquet-backed composite
57+
// index — required for analytics-engine routing.
58+
loadIndex(Index.DATE_FORMATS);
59+
}
60+
61+
@Test
62+
public void testSequentialStatsDistinctCountOverDatetime() throws IOException {
63+
// Bug repro: each iteration runs a stats + distinct_count over a datetime field. With the
64+
// singleton rule, residual entries on the shuttle's internal stack from the previous plan()
65+
// would cause NoSuchElementException on a subsequent traversal. With fresh instances per
66+
// plan(), the stack is always empty at entry and the iterations all succeed.
67+
String query =
68+
String.format(
69+
"source=%s | stats count() as field_count, distinct_count(epoch_millis) as"
70+
+ " distinct_count",
71+
TEST_INDEX_DATE_FORMATS);
72+
for (int i = 0; i < ITERATIONS; i++) {
73+
JSONObject result = executeQuery(query);
74+
// Sanity: response was returned without a cluster-side exception.
75+
assertNotNull("iteration " + i + " produced no result", result);
76+
}
77+
}
78+
79+
@Test
80+
public void testInterleavedStatsAndDatetimeProjection() throws IOException {
81+
// Bug repro variant: interleave stats+distinct_count with a plain datetime projection that
82+
// exercises the DatetimeOutputCastRule. Different plan shapes per iteration push the rule
83+
// through different visitChild paths and surface stack desync faster.
84+
for (int i = 0; i < ITERATIONS; i++) {
85+
executeQuery(
86+
String.format(
87+
"source=%s | stats count() as field_count, distinct_count(epoch_millis) as"
88+
+ " distinct_count",
89+
TEST_INDEX_DATE_FORMATS));
90+
executeQuery(
91+
String.format(
92+
"source=%s | fields epoch_millis, epoch_second, date_optional_time",
93+
TEST_INDEX_DATE_FORMATS));
94+
executeQuery(
95+
String.format(
96+
"source=%s | stats count() as field_count, distinct_count(epoch_second) as"
97+
+ " distinct_count",
98+
TEST_INDEX_DATE_FORMATS));
99+
}
100+
}
101+
102+
@Test
103+
public void testDistinctCountOverMultipleDatetimeFields() throws IOException {
104+
// Bug repro variant: iterate distinct_count over different datetime fields in sequence —
105+
// mirrors the dashboard field-statistics tab that probes every field in the index.
106+
String[] datetimeFields = {
107+
"epoch_millis", "epoch_second", "date_optional_time", "strict_date_optional_time"
108+
};
109+
for (int i = 0; i < ITERATIONS; i++) {
110+
String field = datetimeFields[i % datetimeFields.length];
111+
executeQuery(
112+
String.format(
113+
"source=%s | stats count() as field_count, distinct_count(%s) as distinct_count",
114+
TEST_INDEX_DATE_FORMATS, field));
115+
}
116+
}
117+
}

0 commit comments

Comments
 (0)