Skip to content

Commit 31593a4

Browse files
committed
Stabilize CalcitePPLConditionBuiltinFunctionIT on the analytics-engine route
init() seeds two extra docs into state_country_with_null via unconditional raw PUTs. init() runs as @before before every test method, and the analytics-engine parquet-backed store is append-only on same-_id PUT, so the docs accumulated a duplicate per method and inflated row counts across the suite. Guard the seed on a pre-loadIndex isIndexExist check so it runs exactly once; behavior is unchanged on the v2/Calcite route (same end state). Gate the six tests that exercise behaviors the analytics-engine route does not support with assumeFalse(isAnalyticsParquetIndicesEnabled()): - isnull/isnotnull on the object/struct parent field big5.aws — objects are flattened to dotted leaf columns and the struct parent is not a queryable column, so it resolves to FIELD_NOT_FOUND - isnull/isnotnull on the nested field nested_simple.address — the route cannot store nested fields, so the test infra strips them at index creation (UNSUPPORTED_FIELD_TYPES) and the field resolves to FIELD_NOT_FOUND - nullif(concat('H', name), ...) over a null name — DataFusion concat treats NULL as empty string ('H'), whereas v2/Calcite propagates NULL - earliest('now', utc_timestamp()) — relative-time 'now' and utc_timestamp() resolve to the same instant on the route (true) but differ on v2 (false) Results (-Dtests.analytics.parquet_indices=true against the analytics route): CalcitePPLConditionBuiltinFunctionIT: 6/24 -> 18/24 pass, 6 skip, 0 fail v2/Calcite route unchanged: 24/24 pass, 0 skip. Signed-off-by: Kai Huang <ahkcs@amazon.com>
1 parent 9663d5f commit 31593a4

1 file changed

Lines changed: 51 additions & 11 deletions

File tree

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLConditionBuiltinFunctionIT.java

Lines changed: 51 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
package org.opensearch.sql.calcite.remote;
77

8+
import static org.junit.Assume.assumeFalse;
9+
import static org.opensearch.sql.legacy.TestUtils.isIndexExist;
810
import static org.opensearch.sql.legacy.TestsConstants.*;
911
import static org.opensearch.sql.util.MatcherUtils.*;
1012
import static org.opensearch.sql.util.MatcherUtils.rows;
@@ -22,22 +24,29 @@ public void init() throws Exception {
2224
super.init();
2325
enableCalcite();
2426

27+
// init() runs as @Before, before every test method. On the analytics route the parquet-backed
28+
// store is append-only on same-_id PUT, so seed the extra docs only when the index is first
29+
// created — otherwise they accumulate a duplicate per test method and inflate row counts.
30+
boolean stateCountryWithNullExisted =
31+
isIndexExist(client(), TEST_INDEX_STATE_COUNTRY_WITH_NULL);
2532
loadIndex(Index.STATE_COUNTRY);
2633
loadIndex(Index.STATE_COUNTRY_WITH_NULL);
2734
loadIndex(Index.CALCS);
2835
loadIndex(Index.NESTED_SIMPLE);
2936
loadIndex(Index.BIG5);
30-
Request request1 =
31-
new Request("PUT", "/" + TEST_INDEX_STATE_COUNTRY_WITH_NULL + "/_doc/7?refresh=true");
32-
request1.setJsonEntity(
33-
"{\"name\":\" "
34-
+ " \",\"age\":27,\"state\":\"B.C\",\"country\":\"Canada\",\"year\":2023,\"month\":4}");
35-
client().performRequest(request1);
36-
Request request2 =
37-
new Request("PUT", "/" + TEST_INDEX_STATE_COUNTRY_WITH_NULL + "/_doc/8?refresh=true");
38-
request2.setJsonEntity(
39-
"{\"name\":\"\",\"age\":57,\"state\":\"B.C\",\"country\":\"Canada\",\"year\":2023,\"month\":4}");
40-
client().performRequest(request2);
37+
if (!stateCountryWithNullExisted) {
38+
Request request1 =
39+
new Request("PUT", "/" + TEST_INDEX_STATE_COUNTRY_WITH_NULL + "/_doc/7?refresh=true");
40+
request1.setJsonEntity(
41+
"{\"name\":\" "
42+
+ " \",\"age\":27,\"state\":\"B.C\",\"country\":\"Canada\",\"year\":2023,\"month\":4}");
43+
client().performRequest(request1);
44+
Request request2 =
45+
new Request("PUT", "/" + TEST_INDEX_STATE_COUNTRY_WITH_NULL + "/_doc/8?refresh=true");
46+
request2.setJsonEntity(
47+
"{\"name\":\"\",\"age\":57,\"state\":\"B.C\",\"country\":\"Canada\",\"year\":2023,\"month\":4}");
48+
client().performRequest(request2);
49+
}
4150
}
4251

4352
@Test
@@ -54,13 +63,23 @@ public void testIsNull() throws IOException {
5463

5564
@Test
5665
public void testIsNullWithStruct() throws IOException {
66+
assumeFalse(
67+
"Queries the object/struct parent field 'aws'. On the analytics-engine route objects are"
68+
+ " flattened to dotted leaf columns and the struct parent is not a queryable column,"
69+
+ " so the field resolves to FIELD_NOT_FOUND.",
70+
isAnalyticsParquetIndicesEnabled());
5771
JSONObject actual = executeQuery("source=big5 | where isnull(aws) | fields aws");
5872
verifySchema(actual, schema("aws", "struct"));
5973
verifyNumOfRows(actual, 0);
6074
}
6175

6276
@Test
6377
public void testIsNullWithNested() throws IOException {
78+
assumeFalse(
79+
"Queries the 'address' field of type nested. The analytics-engine route cannot store nested"
80+
+ " fields, so the test infra strips them at index creation (UNSUPPORTED_FIELD_TYPES)"
81+
+ " and the field resolves to FIELD_NOT_FOUND.",
82+
isAnalyticsParquetIndicesEnabled());
6483
JSONObject actual =
6584
executeQuery(
6685
String.format(
@@ -124,13 +143,23 @@ public void testIsNotNullWithSingleNotEquals() throws IOException {
124143

125144
@Test
126145
public void testIsNotNullWithStruct() throws IOException {
146+
assumeFalse(
147+
"Queries the object/struct parent field 'aws'. On the analytics-engine route objects are"
148+
+ " flattened to dotted leaf columns and the struct parent is not a queryable column,"
149+
+ " so the field resolves to FIELD_NOT_FOUND.",
150+
isAnalyticsParquetIndicesEnabled());
127151
JSONObject actual = executeQuery("source=big5 | where isnotnull(aws) | fields aws");
128152
verifySchema(actual, schema("aws", "struct"));
129153
verifyNumOfRows(actual, 3);
130154
}
131155

132156
@Test
133157
public void testIsNotNullWithNested() throws IOException {
158+
assumeFalse(
159+
"Queries the 'address' field of type nested. The analytics-engine route cannot store nested"
160+
+ " fields, so the test infra strips them at index creation (UNSUPPORTED_FIELD_TYPES)"
161+
+ " and the field resolves to FIELD_NOT_FOUND.",
162+
isAnalyticsParquetIndicesEnabled());
134163
JSONObject actual =
135164
executeQuery(
136165
String.format(
@@ -165,6 +194,11 @@ public void testNullIf() throws IOException {
165194

166195
@Test
167196
public void testNullIfWithExpression() throws IOException {
197+
assumeFalse(
198+
"concat() over a NULL argument: the analytics-engine route (DataFusion) treats NULL as an"
199+
+ " empty string (e.g. concat('H', null) = 'H'), whereas the v2/Calcite engine"
200+
+ " propagates NULL (concat('H', null) = null), so the null-name row diverges.",
201+
isAnalyticsParquetIndicesEnabled());
168202
JSONObject actual =
169203
executeQuery(
170204
String.format(
@@ -354,6 +388,12 @@ public void testLatest() throws IOException {
354388

355389
@Test
356390
public void testEarliestWithEval() throws IOException {
391+
assumeFalse(
392+
"earliest('now', now) compares the relative-time 'now' against utc_timestamp(). On the"
393+
+ " analytics-engine route both resolve to the same instant so the result is true,"
394+
+ " whereas on the v2/Calcite route they differ and it is false — a clock-source"
395+
+ " divergence between the relative-time evaluation and utc_timestamp().",
396+
isAnalyticsParquetIndicesEnabled());
357397
JSONObject actual =
358398
executeQuery(
359399
String.format(

0 commit comments

Comments
 (0)