Skip to content

Commit 36f14ca

Browse files
committed
Bring CalcitePPLEnhancedCoalesceIT to parity on the analytics-engine route
Four ordering-sensitive tests failed on the analytics route only because they used 'head N' without a sort, and the analytics scan surfaces raw-PUT docs ahead of bulk-loaded docs (non-deterministic row set). Fixed in place by adding an explicit 'sort - age' where the sort key is unique over the head window (or the tie projects identical values), so the expectations are unchanged and hold on both routes: - testCoalesceNested - testCoalesceEmptyFieldWithFallback - testCoalesceWithMultipleNonExistentFields - testCoalesceWithNullLiteralAndIntegerField Three tests are skipped on the analytics route (assumeNotAnalytics + gradle exclude list, recorded as AnalyticsRouteLimitation constants): - testCoalesceBasic, testCoalesceWithMixedTypes: 'head 3' whose third row needs a nullable tiebreak (age=25 tie between a real doc and a raw-PUT null-name doc); null placement diverges between the v2/Calcite and analytics routes, so a sort can't make the expectation hold on both. - testCoalesceWithAllNonExistentFields: COALESCE over all-untyped-null operands is rejected by the analytics-engine capability registry (No backend supports scalar function [COALESCE]); the v2 path returns an undefined-typed null (#5175). Genuine engine gap. The v2/Calcite path is unchanged: all 19 tests run and pass there. Signed-off-by: Kai Huang <ahkcs@amazon.com>
1 parent 9aad427 commit 36f14ca

3 files changed

Lines changed: 46 additions & 9 deletions

File tree

integ-test/build.gradle

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1102,6 +1102,17 @@ task integTestRemote(type: RestIntegTestTask) {
11021102
// - Exact == on a dynamically-mapped string (email) returns no rows: dynamic mapping
11031103
// omits the .keyword sub-field on the AE route.
11041104
excludeTestsMatching '*WhereCommandIT.testDoubleEqualWithSpecialCharacters'
1105+
1106+
// === Excludes: CalcitePPLEnhancedCoalesceIT route divergences ===
1107+
// Each test also carries an in-test assumeNotAnalytics(...) recording the reason (see
1108+
// AnalyticsRouteLimitation). The other ordering-sensitive coalesce tests were fixed in
1109+
// place with an explicit `sort` instead of being skipped.
1110+
// - COALESCE over all-untyped-null operands is rejected by the capability registry.
1111+
excludeTestsMatching '*CalcitePPLEnhancedCoalesceIT.testCoalesceWithAllNonExistentFields'
1112+
// - head N without a sort unique over the head window is non-deterministic, and the
1113+
// nullable tiebreak these would need orders nulls differently than v2/Calcite.
1114+
excludeTestsMatching '*CalcitePPLEnhancedCoalesceIT.testCoalesceWithMixedTypes'
1115+
excludeTestsMatching '*CalcitePPLEnhancedCoalesceIT.testCoalesceBasic'
11051116
}
11061117
}
11071118

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLEnhancedCoalesceIT.java

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
package org.opensearch.sql.calcite.remote;
77

88
import static org.opensearch.sql.legacy.TestsConstants.*;
9+
import static org.opensearch.sql.util.AnalyticsRouteLimitation.COALESCE_ALL_NULL_OPERANDS;
10+
import static org.opensearch.sql.util.AnalyticsRouteLimitation.HEAD_WITHOUT_STABLE_SORT;
911
import static org.opensearch.sql.util.MatcherUtils.*;
1012

1113
import java.io.IOException;
@@ -37,7 +39,7 @@ public void init() throws Exception {
3739

3840
@Test
3941
public void testCoalesceBasic() throws IOException {
40-
42+
assumeNotAnalytics(HEAD_WITHOUT_STABLE_SORT);
4143
JSONObject actual =
4244
executeQuery(
4345
String.format(
@@ -53,7 +55,7 @@ public void testCoalesceBasic() throws IOException {
5355

5456
@Test
5557
public void testCoalesceWithMixedTypes() throws IOException {
56-
58+
assumeNotAnalytics(HEAD_WITHOUT_STABLE_SORT);
5759
JSONObject actual =
5860
executeQuery(
5961
String.format(
@@ -121,7 +123,7 @@ public void testCoalesceNested() throws IOException {
121123
executeQuery(
122124
String.format(
123125
"source=%s | eval result1 = coalesce(name, 'default'), result2 = coalesce(result1,"
124-
+ " age) | fields name, age, result1, result2 | head 2",
126+
+ " age) | sort - age | fields name, age, result1, result2 | head 2",
125127
TEST_INDEX_STATE_COUNTRY_WITH_NULL));
126128

127129
verifySchema(
@@ -153,8 +155,8 @@ public void testCoalesceWithMultipleNonExistentFields() throws IOException {
153155
JSONObject actual =
154156
executeQuery(
155157
String.format(
156-
"source=%s | eval result = coalesce(field1, field2, name, 'fallback') | fields"
157-
+ " name, result | head 1",
158+
"source=%s | eval result = coalesce(field1, field2, name, 'fallback') | sort - age"
159+
+ " | fields name, result | head 1",
158160
TEST_INDEX_STATE_COUNTRY_WITH_NULL));
159161

160162
verifySchema(actual, schema("name", "string"), schema("result", "string"));
@@ -163,7 +165,7 @@ public void testCoalesceWithMultipleNonExistentFields() throws IOException {
163165

164166
@Test
165167
public void testCoalesceWithAllNonExistentFields() throws IOException {
166-
168+
assumeNotAnalytics(COALESCE_ALL_NULL_OPERANDS);
167169
JSONObject actual =
168170
executeQuery(
169171
String.format(
@@ -221,7 +223,8 @@ public void testCoalesceWithNullLiteralAndIntegerField() throws IOException {
221223
JSONObject actual =
222224
executeQuery(
223225
String.format(
224-
"source=%s | eval result = coalesce(null, age) | fields age, result | head 3",
226+
"source=%s | eval result = coalesce(null, age) | sort - age | fields age, result |"
227+
+ " head 3",
225228
TEST_INDEX_STATE_COUNTRY_WITH_NULL));
226229

227230
verifySchema(actual, schema("age", "int"), schema("result", "int"));
@@ -263,7 +266,7 @@ public void testCoalesceEmptyFieldWithFallback() throws IOException {
263266
executeQuery(
264267
String.format(
265268
"source=%s | eval empty_field = '' | eval result = coalesce(empty_field, name) |"
266-
+ " fields name, result | head 1",
269+
+ " sort - age | fields name, result | head 1",
267270
TEST_INDEX_STATE_COUNTRY_WITH_NULL));
268271

269272
verifySchema(actual, schema("name", "string"), schema("result", "string"));

integ-test/src/test/java/org/opensearch/sql/util/AnalyticsRouteLimitation.java

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,30 @@ public enum AnalyticsRouteLimitation {
5454
*/
5555
DOC_MUTATION(
5656
"Test mutates docs via PUT+DELETE, which DataFormatAwareEngine (analytics-engine storage"
57-
+ " path) does not support.");
57+
+ " path) does not support."),
58+
59+
/**
60+
* {@code COALESCE} over operands that are all untyped NULL (e.g. {@code coalesce(field1, field2,
61+
* field3)} where every field is missing from the mapping) is rejected by the analytics-engine
62+
* capability registry with {@code No backend supports scalar function [COALESCE] among
63+
* [datafusion]}. The v2/Calcite path returns an {@code undefined}-typed null instead (#5175).
64+
*/
65+
COALESCE_ALL_NULL_OPERANDS(
66+
"COALESCE over all-untyped-null operands is unsupported on the analytics-engine route (the"
67+
+ " capability registry rejects it); the v2/Calcite path returns an undefined-typed"
68+
+ " null."),
69+
70+
/**
71+
* A {@code head N} without a stable {@code sort} returns a non-deterministic row set on the
72+
* analytics-engine route — raw-{@code PUT} docs land in a separate segment and can sort ahead of
73+
* the bulk-loaded docs. Adding a {@code sort} fixes it only when the sort key is unique over the
74+
* head window; when ties fall back to a nullable key, null placement diverges between the
75+
* v2/Calcite and analytics routes, so the row set still differs.
76+
*/
77+
HEAD_WITHOUT_STABLE_SORT(
78+
"head N without a sort on a key that is unique over the head window is non-deterministic on"
79+
+ " the analytics-engine route, and a nullable tiebreak orders nulls differently than the"
80+
+ " v2/Calcite path.");
5881

5982
private final String reason;
6083

0 commit comments

Comments
 (0)