Skip to content

Commit 9ccd9d8

Browse files
committed
Stabilize CalcitePPLConditionBuiltinFunctionIT on the analytics-engine route
init() seeds two extra docs into state_country_with_null via unconditional raw PUTs. init() runs as @before before every test method, and the analytics-engine parquet-backed store is append-only on same-_id PUT, so the docs accumulated a duplicate per method and inflated row counts across the suite. Guard the seed on a pre-loadIndex isIndexExist check so it runs exactly once; behavior is unchanged on the v2/Calcite route (same end state). Skip the six tests that exercise behaviors the analytics-engine route does not support, using the assumeNotAnalytics(...) registry (AnalyticsRouteLimitation) plus matching excludeTestsMatching entries in integTestRemote so the skip set stays countable in one place. NESTED_FIELDS is reused; three new constants: - STRUCT_PARENT_FIELD: querying an object/struct parent field directly (isnull/isnotnull(aws)) resolves to FIELD_NOT_FOUND — the route flattens objects to dotted leaf columns and the parent is not a queryable column. - CONCAT_NULL_AS_EMPTY: concat('H', null) = 'H' on the route (DataFusion NULL-as-empty) vs null on v2/Calcite (NULL-propagating). - EARLIEST_LATEST_NOW_CLOCK: earliest('now', utc_timestamp()) is true on the route (same instant) but false on v2 (clock-source divergence). The two nested tests reuse NESTED_FIELDS (nested fields are stripped at index creation on this route, #5541). Results (-Dtests.analytics.parquet_indices=true against the analytics route): CalcitePPLConditionBuiltinFunctionIT: 6/24 -> 18/18 run, 0 fail (6 excluded) v2/Calcite route unchanged: 24/24 pass. Signed-off-by: Kai Huang <ahkcs@amazon.com>
1 parent 9663d5f commit 9ccd9d8

3 files changed

Lines changed: 88 additions & 12 deletions

File tree

integ-test/build.gradle

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1135,6 +1135,23 @@ task integTestRemote(type: RestIntegTestTask) {
11351135
// nullable tiebreak these would need orders nulls differently than v2/Calcite.
11361136
excludeTestsMatching '*CalcitePPLEnhancedCoalesceIT.testCoalesceWithMixedTypes'
11371137
excludeTestsMatching '*CalcitePPLEnhancedCoalesceIT.testCoalesceBasic'
1138+
1139+
// === Excludes: CalcitePPLConditionBuiltinFunctionIT route divergences ===
1140+
// Each test also carries an in-test assumeNotAnalytics(...) recording the reason (see
1141+
// AnalyticsRouteLimitation); listed here so the AE-route skip set stays countable.
1142+
// - isnull/isnotnull on the object/struct parent field big5.aws: objects are flattened
1143+
// to dotted leaf columns and the struct parent is not a queryable column.
1144+
excludeTestsMatching '*CalcitePPLConditionBuiltinFunctionIT.testIsNullWithStruct'
1145+
excludeTestsMatching '*CalcitePPLConditionBuiltinFunctionIT.testIsNotNullWithStruct'
1146+
// - isnull/isnotnull on the nested field nested_simple.address: nested fields are
1147+
// stripped at index creation (the route can't store them).
1148+
excludeTestsMatching '*CalcitePPLConditionBuiltinFunctionIT.testIsNullWithNested'
1149+
excludeTestsMatching '*CalcitePPLConditionBuiltinFunctionIT.testIsNotNullWithNested'
1150+
// - concat('H', null): DataFusion treats NULL as empty string; v2/Calcite propagates NULL.
1151+
excludeTestsMatching '*CalcitePPLConditionBuiltinFunctionIT.testNullIfWithExpression'
1152+
// - earliest('now', utc_timestamp()): 'now' and utc_timestamp() resolve to the same
1153+
// instant on the route (true) but differ on v2 (false).
1154+
excludeTestsMatching '*CalcitePPLConditionBuiltinFunctionIT.testEarliestWithEval'
11381155
}
11391156
}
11401157

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLConditionBuiltinFunctionIT.java

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,12 @@
55

66
package org.opensearch.sql.calcite.remote;
77

8+
import static org.opensearch.sql.legacy.TestUtils.isIndexExist;
89
import static org.opensearch.sql.legacy.TestsConstants.*;
10+
import static org.opensearch.sql.util.AnalyticsRouteLimitation.CONCAT_NULL_AS_EMPTY;
11+
import static org.opensearch.sql.util.AnalyticsRouteLimitation.EARLIEST_LATEST_NOW_CLOCK;
12+
import static org.opensearch.sql.util.AnalyticsRouteLimitation.NESTED_FIELDS;
13+
import static org.opensearch.sql.util.AnalyticsRouteLimitation.STRUCT_PARENT_FIELD;
914
import static org.opensearch.sql.util.MatcherUtils.*;
1015
import static org.opensearch.sql.util.MatcherUtils.rows;
1116

@@ -22,22 +27,29 @@ public void init() throws Exception {
2227
super.init();
2328
enableCalcite();
2429

30+
// init() runs as @Before, before every test method. On the analytics route the parquet-backed
31+
// store is append-only on same-_id PUT, so seed the extra docs only when the index is first
32+
// created — otherwise they accumulate a duplicate per test method and inflate row counts.
33+
boolean stateCountryWithNullExisted =
34+
isIndexExist(client(), TEST_INDEX_STATE_COUNTRY_WITH_NULL);
2535
loadIndex(Index.STATE_COUNTRY);
2636
loadIndex(Index.STATE_COUNTRY_WITH_NULL);
2737
loadIndex(Index.CALCS);
2838
loadIndex(Index.NESTED_SIMPLE);
2939
loadIndex(Index.BIG5);
30-
Request request1 =
31-
new Request("PUT", "/" + TEST_INDEX_STATE_COUNTRY_WITH_NULL + "/_doc/7?refresh=true");
32-
request1.setJsonEntity(
33-
"{\"name\":\" "
34-
+ " \",\"age\":27,\"state\":\"B.C\",\"country\":\"Canada\",\"year\":2023,\"month\":4}");
35-
client().performRequest(request1);
36-
Request request2 =
37-
new Request("PUT", "/" + TEST_INDEX_STATE_COUNTRY_WITH_NULL + "/_doc/8?refresh=true");
38-
request2.setJsonEntity(
39-
"{\"name\":\"\",\"age\":57,\"state\":\"B.C\",\"country\":\"Canada\",\"year\":2023,\"month\":4}");
40-
client().performRequest(request2);
40+
if (!stateCountryWithNullExisted) {
41+
Request request1 =
42+
new Request("PUT", "/" + TEST_INDEX_STATE_COUNTRY_WITH_NULL + "/_doc/7?refresh=true");
43+
request1.setJsonEntity(
44+
"{\"name\":\" "
45+
+ " \",\"age\":27,\"state\":\"B.C\",\"country\":\"Canada\",\"year\":2023,\"month\":4}");
46+
client().performRequest(request1);
47+
Request request2 =
48+
new Request("PUT", "/" + TEST_INDEX_STATE_COUNTRY_WITH_NULL + "/_doc/8?refresh=true");
49+
request2.setJsonEntity(
50+
"{\"name\":\"\",\"age\":57,\"state\":\"B.C\",\"country\":\"Canada\",\"year\":2023,\"month\":4}");
51+
client().performRequest(request2);
52+
}
4153
}
4254

4355
@Test
@@ -54,13 +66,17 @@ public void testIsNull() throws IOException {
5466

5567
@Test
5668
public void testIsNullWithStruct() throws IOException {
69+
// Queries the object/struct parent field 'aws' directly.
70+
assumeNotAnalytics(STRUCT_PARENT_FIELD);
5771
JSONObject actual = executeQuery("source=big5 | where isnull(aws) | fields aws");
5872
verifySchema(actual, schema("aws", "struct"));
5973
verifyNumOfRows(actual, 0);
6074
}
6175

6276
@Test
6377
public void testIsNullWithNested() throws IOException {
78+
// Queries a nested field; the route strips nested fields at index creation.
79+
assumeNotAnalytics(NESTED_FIELDS);
6480
JSONObject actual =
6581
executeQuery(
6682
String.format(
@@ -124,13 +140,17 @@ public void testIsNotNullWithSingleNotEquals() throws IOException {
124140

125141
@Test
126142
public void testIsNotNullWithStruct() throws IOException {
143+
// Queries the object/struct parent field 'aws' directly.
144+
assumeNotAnalytics(STRUCT_PARENT_FIELD);
127145
JSONObject actual = executeQuery("source=big5 | where isnotnull(aws) | fields aws");
128146
verifySchema(actual, schema("aws", "struct"));
129147
verifyNumOfRows(actual, 3);
130148
}
131149

132150
@Test
133151
public void testIsNotNullWithNested() throws IOException {
152+
// Queries a nested field; the route strips nested fields at index creation.
153+
assumeNotAnalytics(NESTED_FIELDS);
134154
JSONObject actual =
135155
executeQuery(
136156
String.format(
@@ -165,6 +185,8 @@ public void testNullIf() throws IOException {
165185

166186
@Test
167187
public void testNullIfWithExpression() throws IOException {
188+
// concat('H', name) over the null-name row diverges (NULL-as-empty vs NULL-propagating).
189+
assumeNotAnalytics(CONCAT_NULL_AS_EMPTY);
168190
JSONObject actual =
169191
executeQuery(
170192
String.format(
@@ -354,6 +376,8 @@ public void testLatest() throws IOException {
354376

355377
@Test
356378
public void testEarliestWithEval() throws IOException {
379+
// earliest('now', utc_timestamp()) resolves true on the route but false on v2 (clock source).
380+
assumeNotAnalytics(EARLIEST_LATEST_NOW_CLOCK);
357381
JSONObject actual =
358382
executeQuery(
359383
String.format(

integ-test/src/test/java/org/opensearch/sql/util/AnalyticsRouteLimitation.java

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,42 @@ public enum AnalyticsRouteLimitation {
108108
HEAD_WITHOUT_STABLE_SORT(
109109
"head N without a sort on a key that is unique over the head window is non-deterministic on"
110110
+ " the analytics-engine route, and a nullable tiebreak orders nulls differently than the"
111-
+ " v2/Calcite path.");
111+
+ " v2/Calcite path."),
112+
113+
/**
114+
* Querying an {@code object}/struct parent field directly (e.g. {@code isnull(aws)} where {@code
115+
* aws} is an {@code object}) fails on the analytics-engine route with {@code FIELD_NOT_FOUND}.
116+
* The route flattens objects into dotted leaf columns — {@code aws.cloudwatch.log_group} scans
117+
* fine — but the struct parent is not exposed as a queryable column. Distinct from {@link
118+
* #NESTED_FIELDS}: {@code object} parents survive in the OpenSearch mapping (they aren't stripped
119+
* at load) yet still can't be referenced as a whole.
120+
*/
121+
STRUCT_PARENT_FIELD(
122+
"Querying an object/struct parent field directly is unsupported on the analytics-engine"
123+
+ " route: objects are flattened to dotted leaf columns and the parent resolves to"
124+
+ " FIELD_NOT_FOUND."),
125+
126+
/**
127+
* {@code concat()} over a NULL argument diverges: the analytics-engine route (DataFusion) treats
128+
* NULL as an empty string (e.g. {@code concat('H', null)} = {@code 'H'}), whereas the v2/Calcite
129+
* engine propagates NULL ({@code concat('H', null)} = {@code null}). Any expression that depends
130+
* on the NULL-propagating behavior over a possibly-null operand diverges.
131+
*/
132+
CONCAT_NULL_AS_EMPTY(
133+
"concat() treats a NULL argument as an empty string on the analytics-engine route (DataFusion"
134+
+ " semantics), whereas the v2/Calcite engine propagates NULL."),
135+
136+
/**
137+
* {@code earliest('now', <ts>)} / {@code latest('now', <ts>)} where {@code <ts>} is {@code
138+
* utc_timestamp()} diverge: on the analytics-engine route the relative-time {@code 'now'} and
139+
* {@code utc_timestamp()} resolve to the same instant (so {@code earliest('now', now)} is {@code
140+
* true}), whereas on the v2/Calcite path they differ (it is {@code false}) — a clock-source
141+
* divergence between the relative-time evaluation and {@code utc_timestamp()}.
142+
*/
143+
EARLIEST_LATEST_NOW_CLOCK(
144+
"earliest/latest with relative-time 'now' against utc_timestamp() diverges on the"
145+
+ " analytics-engine route: 'now' and utc_timestamp() resolve to the same instant"
146+
+ " (earliest('now', now) is true), but differ on the v2/Calcite path (false).");
112147

113148
private final String reason;
114149

0 commit comments

Comments
 (0)