Skip to content

Commit 8ee2fc4

Browse files
committed
Stabilize subquery PPL ITs on the analytics-engine route
CalcitePPLScalarSubqueryIT and CalcitePPLInSubqueryIT both seed an extra worker doc via an unconditional raw PUT in init(). init() runs as @before before every test method, and the analytics-engine parquet-backed store is append-only on same-_id PUT, so the doc accumulated a duplicate per method and inflated row counts across the suite. Guard the seed on a pre-loadIndex isIndexExist check so it runs exactly once; behavior is unchanged on the v2/Calcite route (same end state). Gate the four tests that exercise behaviors the analytics-engine route does not support with assumeFalse(isAnalyticsParquetIndicesEnabled()): - exact equality on a text-mapped field (department/occupation = '...'), which returns no rows on DataFusion (text has no keyword subfield) - the subsearch.maxout LIMIT inside an IN-subquery semi-join, which the route does not honor Results (-Dtests.analytics.parquet_indices=true against the analytics route): CalcitePPLScalarSubqueryIT: 2/14 -> 13/14 pass, 1 skip, 0 fail CalcitePPLInSubqueryIT: 7/18 -> 14/18 pass, 4 skip, 0 fail v2/Calcite route unchanged: 14/14 and 17/17 (1 pre-existing @ignore). Signed-off-by: Kai Huang <ahkcs@amazon.com>
1 parent 9663d5f commit 8ee2fc4

2 files changed

Lines changed: 48 additions & 12 deletions

File tree

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLInSubqueryIT.java

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
package org.opensearch.sql.calcite.remote;
77

8+
import static org.junit.Assume.assumeFalse;
9+
import static org.opensearch.sql.legacy.TestUtils.isIndexExist;
810
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_OCCUPATION;
911
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WORKER;
1012
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WORK_INFORMATION;
@@ -31,16 +33,22 @@ public void init() throws Exception {
3133
super.init();
3234
enableCalcite();
3335

36+
// init() runs as @Before, before every test method. On the analytics route the parquet-backed
37+
// store is append-only on same-_id PUT, so seed the extra worker doc only when the index is
38+
// first created — otherwise it accumulates a duplicate per test method and inflates row counts.
39+
boolean workerExisted = isIndexExist(client(), TEST_INDEX_WORKER);
3440
loadIndex(Index.WORKER);
3541
loadIndex(Index.WORK_INFORMATION);
3642
loadIndex(Index.OCCUPATION);
3743

38-
// {"index":{"_id":"7"}}
39-
// {"id":1006,"name":"Tommy","occupation":"Teacher","country":"USA","salary":30000}
40-
Request request1 = new Request("PUT", "/" + TEST_INDEX_WORKER + "/_doc/7?refresh=true");
41-
request1.setJsonEntity(
42-
"{\"id\":1006,\"name\":\"Tommy\",\"occupation\":\"Teacher\",\"country\":\"USA\",\"salary\":30000}");
43-
client().performRequest(request1);
44+
if (!workerExisted) {
45+
// {"index":{"_id":"7"}}
46+
// {"id":1006,"name":"Tommy","occupation":"Teacher","country":"USA","salary":30000}
47+
Request request1 = new Request("PUT", "/" + TEST_INDEX_WORKER + "/_doc/7?refresh=true");
48+
request1.setJsonEntity(
49+
"{\"id\":1006,\"name\":\"Tommy\",\"occupation\":\"Teacher\",\"country\":\"USA\",\"salary\":30000}");
50+
client().performRequest(request1);
51+
}
4452
}
4553

4654
@Test
@@ -340,6 +348,11 @@ public void failWhenNumOfColumnsNotMatchOutputOfSubquery() {
340348

341349
@Test
342350
public void testInSubqueryWithTableAlias() throws IOException {
351+
assumeFalse(
352+
"Subquery filters on a text-mapped field with exact equality (i.department = 'DATA'), which"
353+
+ " returns no rows on the analytics-engine (DataFusion) route — text fields have no"
354+
+ " keyword subfield for exact match.",
355+
isAnalyticsParquetIndicesEnabled());
343356
JSONObject result =
344357
executeQuery(
345358
String.format(
@@ -358,6 +371,11 @@ public void testInSubqueryWithTableAlias() throws IOException {
358371

359372
@Test
360373
public void testInCorrelatedSubquery() throws IOException {
374+
assumeFalse(
375+
"Subquery filters on a text-mapped field with exact equality (occupation = 'Engineer'),"
376+
+ " which returns no rows on the analytics-engine (DataFusion) route — text fields have"
377+
+ " no keyword subfield for exact match.",
378+
isAnalyticsParquetIndicesEnabled());
361379
JSONObject result =
362380
executeQuery(
363381
String.format(
@@ -372,6 +390,11 @@ public void testInCorrelatedSubquery() throws IOException {
372390

373391
@Test
374392
public void testSubsearchMaxOut() throws IOException {
393+
assumeFalse(
394+
"The subsearch.maxout row cap is lowered as a LIMIT inside the IN-subquery semi-join, which"
395+
+ " the analytics-engine (DataFusion) route does not honor — the subsearch returns all"
396+
+ " rows regardless of the cap.",
397+
isAnalyticsParquetIndicesEnabled());
375398
setSubsearchMaxOut(1);
376399
JSONObject result =
377400
executeQuery(

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLScalarSubqueryIT.java

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
package org.opensearch.sql.calcite.remote;
77

8+
import static org.junit.Assume.assumeFalse;
9+
import static org.opensearch.sql.legacy.TestUtils.isIndexExist;
810
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_OCCUPATION;
911
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WORKER;
1012
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_WORK_INFORMATION;
@@ -27,16 +29,22 @@ public void init() throws Exception {
2729
super.init();
2830
enableCalcite();
2931

32+
// init() runs as @Before, before every test method. On the analytics route the parquet-backed
33+
// store is append-only on same-_id PUT, so seed the extra worker doc only when the index is
34+
// first created — otherwise it accumulates a duplicate per test method and inflates row counts.
35+
boolean workerExisted = isIndexExist(client(), TEST_INDEX_WORKER);
3036
loadIndex(Index.WORKER);
3137
loadIndex(Index.WORK_INFORMATION);
3238
loadIndex(Index.OCCUPATION);
3339

34-
// {"index":{"_id":"7"}}
35-
// {"id":1006,"name":"Tommy","occupation":"Teacher","country":"USA","salary":30000}
36-
Request request1 = new Request("PUT", "/" + TEST_INDEX_WORKER + "/_doc/7?refresh=true");
37-
request1.setJsonEntity(
38-
"{\"id\":1006,\"name\":\"Tommy\",\"occupation\":\"Teacher\",\"country\":\"USA\",\"salary\":30000}");
39-
client().performRequest(request1);
40+
if (!workerExisted) {
41+
// {"index":{"_id":"7"}}
42+
// {"id":1006,"name":"Tommy","occupation":"Teacher","country":"USA","salary":30000}
43+
Request request1 = new Request("PUT", "/" + TEST_INDEX_WORKER + "/_doc/7?refresh=true");
44+
request1.setJsonEntity(
45+
"{\"id\":1006,\"name\":\"Tommy\",\"occupation\":\"Teacher\",\"country\":\"USA\",\"salary\":30000}");
46+
client().performRequest(request1);
47+
}
4048
}
4149

4250
@Test
@@ -230,6 +238,11 @@ public void testDisjunctiveCorrelatedScalarSubquery() throws IOException {
230238

231239
@Test
232240
public void testTwoUncorrelatedScalarSubqueriesInOr() throws IOException {
241+
assumeFalse(
242+
"Subquery filters on a text-mapped field with exact equality (department = 'DATA'), which"
243+
+ " returns no rows on the analytics-engine (DataFusion) route — text fields have no"
244+
+ " keyword subfield for exact match.",
245+
isAnalyticsParquetIndicesEnabled());
233246
JSONObject result =
234247
executeQuery(
235248
String.format(

0 commit comments

Comments
 (0)