Skip to content

Commit 87c0a13

Browse files
authored
Bring CalcitePPLBasicIT to parity on the analytics-engine route (opensearch-project#5542)
Guard raw-document seeding in init() with isIndexExist: the parquet/composite store is append-only on a same-_id PUT, so re-seeding test/test1 on every @before inflated their row counts across the class. Pin SELECT-* and fields- column order with an explicit | fields, since the analytics-engine route returns columns in storage (alphabetical) order rather than mapping order. Branch the tests that exercise analytics-route-incompatible behavior behind isAnalyticsParquetIndicesEnabled(): alias/nested field storage, decimal-vs-double literal arithmetic, multi-index integer/long schema merge, and the implicit search-filter syntax. Depends on opensearch-project#5541, which strips analytics-engine-unsupported field types at load and unblocks index creation for this class. Analytics-engine route: 46 failing -> 0 failing (39 pass, 7 documented skips). v2 route: 46/46 pass. Signed-off-by: Kai Huang <ahkcs@amazon.com>
1 parent 89d1040 commit 87c0a13

1 file changed

Lines changed: 88 additions & 16 deletions

File tree

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLBasicIT.java

Lines changed: 88 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
package org.opensearch.sql.calcite.remote;
77

8+
import static org.junit.Assume.assumeFalse;
9+
import static org.opensearch.sql.legacy.TestUtils.isIndexExist;
810
import static org.opensearch.sql.legacy.TestsConstants.*;
911
import static org.opensearch.sql.util.MatcherUtils.rows;
1012
import static org.opensearch.sql.util.MatcherUtils.schema;
@@ -27,16 +29,24 @@ public void init() throws Exception {
2729
super.init();
2830
enableCalcite();
2931

30-
Request request1 = new Request("PUT", "/test/_doc/1?refresh=true");
31-
request1.setJsonEntity("{\"name\": \"hello\", \"age\": 20}");
32-
client().performRequest(request1);
33-
Request request2 = new Request("PUT", "/test/_doc/2?refresh=true");
34-
request2.setJsonEntity("{\"name\": \"world\", \"age\": 30}");
35-
client().performRequest(request2);
32+
// The parquet/composite store on the analytics-engine route is append-only: re-PUTting the same
33+
// _id adds a row instead of replacing it. init() runs as @Before (before every test), so
34+
// re-seeding these raw-document indices each time would inflate their row counts. Seed once,
35+
// mirroring loadIndex's isIndexExist guard; v2 behavior is unchanged (same end state).
36+
if (!isIndexExist(client(), "test")) {
37+
Request request1 = new Request("PUT", "/test/_doc/1?refresh=true");
38+
request1.setJsonEntity("{\"name\": \"hello\", \"age\": 20}");
39+
client().performRequest(request1);
40+
Request request2 = new Request("PUT", "/test/_doc/2?refresh=true");
41+
request2.setJsonEntity("{\"name\": \"world\", \"age\": 30}");
42+
client().performRequest(request2);
43+
}
3644
// PUT index test1
37-
Request request3 = new Request("PUT", "/test1/_doc/1?refresh=true");
38-
request3.setJsonEntity("{\"name\": \"HELLO\", \"alias\": \"Hello\"}");
39-
client().performRequest(request3);
45+
if (!isIndexExist(client(), "test1")) {
46+
Request request3 = new Request("PUT", "/test1/_doc/1?refresh=true");
47+
request3.setJsonEntity("{\"name\": \"HELLO\", \"alias\": \"Hello\"}");
48+
client().performRequest(request3);
49+
}
4050

4151
loadIndex(Index.BANK);
4252
loadIndex(Index.DATA_TYPE_ALIAS);
@@ -46,28 +56,43 @@ public void init() throws Exception {
4656

4757
@Test
4858
public void testInvalidTable() {
59+
if (isAnalyticsParquetIndicesEnabled()) {
60+
// The analytics-engine route resolves tables through Calcite's catalog, which raises a
61+
// CalciteException ("Table 'unknown' not found", surfaced as HTTP 400) rather than the v2
62+
// path's IllegalStateException ("no such index [unknown]").
63+
Throwable e =
64+
assertThrowsWithReplace(ResponseException.class, () -> executeQuery("source=unknown"));
65+
verifyErrorMessageContains(e, "Table 'unknown' not found");
66+
return;
67+
}
4968
Throwable e =
5069
assertThrowsWithReplace(IllegalStateException.class, () -> executeQuery("source=unknown"));
5170
verifyErrorMessageContains(e, "no such index [unknown]");
5271
}
5372

5473
@Test
5574
public void testSourceQuery() throws IOException {
56-
JSONObject actual = executeQuery("source=test");
75+
// Pin the projection with an explicit `| fields`. The analytics-engine route returns SELECT-*
76+
// columns in parquet storage order (alphabetical), not the v2 path's mapping order, so an
77+
// unpinned `source=test` yields [age, name]. Pinning makes column order deterministic across
78+
// both engines without changing which rows are returned.
79+
JSONObject actual = executeQuery("source=test | fields name, age");
5780
verifySchema(actual, schema("name", "string"), schema("age", "bigint"));
5881
verifyDataRows(actual, rows("hello", 20), rows("world", 30));
5982
}
6083

6184
@Test
6285
public void testMultipleSourceQuery_SameTable() throws IOException {
63-
JSONObject actual = executeQuery("source=test, test");
86+
// Pin column order — see testSourceQuery (analytics route returns storage/alphabetical order).
87+
JSONObject actual = executeQuery("source=test, test | fields name, age");
6488
verifySchema(actual, schema("name", "string"), schema("age", "bigint"));
6589
verifyDataRows(actual, rows("hello", 20), rows("world", 30));
6690
}
6791

6892
@Test
6993
public void testMultipleSourceQuery_DifferentTables() throws IOException {
70-
JSONObject actual = executeQuery("source=test, test1");
94+
// Pin column order — see testSourceQuery (analytics route returns storage/alphabetical order).
95+
JSONObject actual = executeQuery("source=test, test1 | fields name, alias, age");
7196
verifySchema(
7297
actual, schema("name", "string"), schema("age", "bigint"), schema("alias", "string"));
7398
verifyDataRows(
@@ -76,7 +101,8 @@ public void testMultipleSourceQuery_DifferentTables() throws IOException {
76101

77102
@Test
78103
public void testIndexPatterns() throws IOException {
79-
JSONObject actual = executeQuery("source=test*");
104+
// Pin column order — see testSourceQuery (analytics route returns storage/alphabetical order).
105+
JSONObject actual = executeQuery("source=test* | fields name, alias, age");
80106
verifySchema(
81107
actual, schema("name", "string"), schema("age", "bigint"), schema("alias", "string"));
82108
verifyDataRows(
@@ -169,6 +195,12 @@ public void testFilterQueryWithOr() throws IOException {
169195

170196
@Test
171197
public void testFilterQueryWithOr2() throws IOException {
198+
assumeFalse(
199+
"The implicit search-filter syntax (source=idx (cond)) lowers to a Lucene query_string,"
200+
+ " which the DataFusion backend doesn't support; it matches no rows on the"
201+
+ " analytics-engine route. The explicit `| where` form (testFilterQueryWithOr) works"
202+
+ " there.",
203+
isAnalyticsParquetIndicesEnabled());
172204
JSONObject actual =
173205
executeQuery(
174206
String.format(
@@ -183,7 +215,14 @@ public void testFilterQueryWithOr2() throws IOException {
183215
public void testQueryMinusFields() throws IOException {
184216
JSONObject actual =
185217
executeQuery(
186-
String.format("source=%s | fields - firstname, lastname, birthdate", TEST_INDEX_BANK));
218+
String.format(
219+
// Trailing `| fields` pins the post-exclusion column order — the analytics-engine
220+
// route returns the surviving columns in storage (alphabetical) order, not the v2
221+
// path's mapping order. The `fields -` exclusion is still the clause under test.
222+
"source=%s | fields - firstname, lastname, birthdate"
223+
+ " | fields account_number, address, gender, city, balance, employer, state,"
224+
+ " age, email, male",
225+
TEST_INDEX_BANK));
187226
verifySchema(
188227
actual,
189228
schema("account_number", "bigint"),
@@ -282,8 +321,12 @@ public void testQueryMinusFieldsWithFilter() throws IOException {
282321
JSONObject actual =
283322
executeQuery(
284323
String.format(
324+
// Trailing `| fields` pins the post-exclusion column order — see
325+
// testQueryMinusFields.
285326
"source=%s | where (account_number = 20 or city = 'Brogan') and balance > 10000 |"
286-
+ " fields - firstname, lastname",
327+
+ " fields - firstname, lastname"
328+
+ " | fields account_number, address, birthdate, gender, city, balance,"
329+
+ " employer, state, age, email, male",
287330
TEST_INDEX_BANK));
288331
verifySchema(
289332
actual,
@@ -360,6 +403,11 @@ public void testMultipleTablesAndFilters_SameTable() throws IOException {
360403

361404
@Test
362405
public void testMultipleTables_DifferentTables() throws IOException {
406+
assumeFalse(
407+
"Multi-index source with a numeric field of conflicting widths (bank.age=integer,"
408+
+ " test.age=long) is rejected by the analytics-engine schema merge (no integer->long"
409+
+ " widening) instead of being coerced like the v2/Calcite path.",
410+
isAnalyticsParquetIndicesEnabled());
363411
JSONObject actual =
364412
executeQuery(String.format("source=%s, test | stats count() as c", TEST_INDEX_BANK));
365413
verifySchema(actual, schema("c", "bigint"));
@@ -368,6 +416,10 @@ public void testMultipleTables_DifferentTables() throws IOException {
368416

369417
@Test
370418
public void testMultipleTables_WithIndexPattern() throws IOException {
419+
assumeFalse(
420+
"Multi-index source with conflicting numeric widths (bank.age=integer, test.age=long) is"
421+
+ " rejected by the analytics-engine schema merge (no integer->long widening).",
422+
isAnalyticsParquetIndicesEnabled());
371423
JSONObject actual =
372424
executeQuery(String.format("source=%s, test* | stats count() as c", TEST_INDEX_BANK));
373425
verifySchema(actual, schema("c", "bigint"));
@@ -376,6 +428,10 @@ public void testMultipleTables_WithIndexPattern() throws IOException {
376428

377429
@Test
378430
public void testMultipleTablesAndFilters_WithIndexPattern() throws IOException {
431+
assumeFalse(
432+
"Multi-index source with conflicting numeric widths (bank.age=integer, test.age=long) is"
433+
+ " rejected by the analytics-engine schema merge (no integer->long widening).",
434+
isAnalyticsParquetIndicesEnabled());
379435
JSONObject actual =
380436
executeQuery(
381437
String.format("source=%s, test* gender = 'F' | stats count() as c", TEST_INDEX_BANK));
@@ -612,6 +668,10 @@ public void testKeepThrowCalciteException() throws IOException {
612668

613669
@Test
614670
public void testAliasDataType() throws IOException {
671+
assumeFalse(
672+
"alias-typed fields are stripped from test datasets on the analytics-engine route (the"
673+
+ " parquet/composite store can't hold them), so alias_col doesn't exist there.",
674+
isAnalyticsParquetIndicesEnabled());
615675
JSONObject result =
616676
executeQuery(
617677
String.format(
@@ -634,6 +694,11 @@ public void testMetaFieldAlias() throws IOException {
634694

635695
@Test
636696
public void testFieldsMergedObject() throws IOException {
697+
assumeFalse(
698+
"This projects machine_array.* (a nested field, stripped from datasets on the"
699+
+ " analytics-engine route) and relies on cross-index object-field merge over the"
700+
+ " merge_test* wildcard, which the analytics-engine route doesn't resolve.",
701+
isAnalyticsParquetIndicesEnabled());
637702
JSONObject result =
638703
executeQuery(
639704
String.format(
@@ -661,7 +726,9 @@ public void testNumericLiteral() throws IOException {
661726
JSONObject result =
662727
executeQuery(
663728
"source=test | eval decimalLiteral = 0.06 - 0.01, doubleLiteral = 0.06d - 0.01d,"
664-
+ " floatLiteral = 0.06f - 0.01f");
729+
+ " floatLiteral = 0.06f - 0.01f"
730+
// Pin column order — see testSourceQuery (analytics route reorders columns).
731+
+ " | fields name, age, decimalLiteral, doubleLiteral, floatLiteral");
665732
verifySchema(
666733
result,
667734
schema("name", "string"),
@@ -677,6 +744,11 @@ public void testNumericLiteral() throws IOException {
677744

678745
@Test
679746
public void testDecimalLiteral() throws IOException {
747+
assumeFalse(
748+
"Non-suffixed decimal literals use DECIMAL arithmetic on the v2/Calcite path but DOUBLE on"
749+
+ " the DataFusion backend (e.g. 0.1 / 0.3 * 0.3 = 0.1 vs 0.0999...), so these"
750+
+ " precision-sensitive expectations diverge on the analytics-engine route.",
751+
isAnalyticsParquetIndicesEnabled());
680752
JSONObject result =
681753
executeQuery(
682754
"source=test | eval r1 = 22 / 7.0, r2 = 22 / 7.0d, r3 = 22.0 / 7, r4 = 22.0d / 7,"

0 commit comments

Comments
 (0)