Skip to content

Commit 56e3395

Browse files
committed
Add tests.analytics.parquet_indices test toggle
Adds a small, opt-in test infrastructure slice so the PPL integration test suite can run end-to-end against the analytics-engine backend without per-test rewiring. `-Dtests.analytics.parquet_indices=true` makes `TestUtils.createIndexByRestClient` back every test-created index with single-shard composite/parquet storage: index.pluggable.dataformat.enabled = true index.pluggable.dataformat = "composite" index.composite.primary_data_format = "parquet" `RestUnifiedQueryAction.isAnalyticsIndex` (post-opensearch-project#5432) reads these settings and routes any query against such indices to the analytics-engine planner (DataFusion). No additional cluster setting or routing override required — the production routing logic is the single source of truth. Also adds `PPLIntegTestCase.isAnalyticsParquetIndicesEnabled()` as a per-test predicate so individual tests can branch their assertions on engine semantics (DataFusion follows different ordering and null-bucket semantics than the legacy V2 and Calcite-DSL paths). Bulk loads on parquet-backed indices use `refresh=true` because `analytics-backend-lucene`'s `LuceneCommitter.getSafeCommitInfo` is a `TODO:: with index deleter` stub that hangs `refresh=wait_for` until the test framework request timeout (~60s). `integ-test/build.gradle` forwards the property to `:integTestRemote` so the gradle command line is the single knob. Default behavior is unchanged — with the flag unset, every test-created index is Lucene-backed and every IT runs through the existing V2 / Calcite path exactly as before. Signed-off-by: Kai Huang <ahkcs@amazon.com>
1 parent cb2d824 commit 56e3395

3 files changed

Lines changed: 68 additions & 2 deletions

File tree

integ-test/build.gradle

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -849,6 +849,14 @@ task integTestRemote(type: RestIntegTestTask) {
849849
systemProperty "user", System.getProperty("user")
850850
systemProperty "password", System.getProperty("password")
851851

852+
// Forward the analytics-engine parquet-indices toggle when set on the gradle command
853+
// line. TestUtils.createIndexByRestClient reads this to back every test-created index
854+
// with composite/parquet so RestUnifiedQueryAction.isAnalyticsIndex (post-#5432) routes
855+
// to the analytics-engine planner via index settings.
856+
if (System.getProperty("tests.analytics.parquet_indices") != null) {
857+
systemProperty 'tests.analytics.parquet_indices', System.getProperty("tests.analytics.parquet_indices")
858+
}
859+
852860
// Set default query size limit
853861
systemProperty 'defaultQuerySizeLimit', '10000'
854862

integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,18 @@ public class TestUtils {
3737

3838
private static final String MAPPING_FILE_PATH = "src/test/resources/indexDefinitions/";
3939

40+
/**
41+
* System property that makes every test-created index parquet-backed (composite primary data
42+
* format = parquet) with a single shard. When set, {@link
43+
* RestUnifiedQueryAction#isAnalyticsIndex} (which routes based on {@code
44+
* index.pluggable.dataformat.enabled} / {@code index.pluggable.dataformat=composite}, see #5432)
45+
* will return {@code true} for every test-created index — exercising the analytics-engine route
46+
* end-to-end without per-test rewiring.
47+
*
48+
* <p>Off by default; normal CI is untouched.
49+
*/
50+
public static final String ANALYTICS_PARQUET_INDICES_PROP = "tests.analytics.parquet_indices";
51+
4052
/**
4153
* Create test index by REST client.
4254
*
@@ -48,6 +60,9 @@ public static void createIndexByRestClient(RestClient client, String indexName,
4860
Request request = new Request("PUT", "/" + indexName);
4961
JSONObject jsonObject = isNullOrEmpty(mapping) ? new JSONObject() : new JSONObject(mapping);
5062
setZeroReplicas(jsonObject);
63+
if (Boolean.parseBoolean(System.getProperty(ANALYTICS_PARQUET_INDICES_PROP, "false"))) {
64+
makeParquetBacked(jsonObject);
65+
}
5166
request.setJsonEntity(jsonObject.toString());
5267
performRequest(client, request);
5368
}
@@ -69,6 +84,25 @@ private static void setZeroReplicas(JSONObject jsonObject) {
6984
jsonObject.put("settings", settings);
7085
}
7186

87+
/**
88+
* Switches the test index to a parquet-backed composite store with a single shard so the
89+
* analytics-engine path has a backend that can scan it. Routing is then driven entirely by index
90+
* settings (#5432) — no other test plumbing required.
91+
*/
92+
private static void makeParquetBacked(JSONObject jsonObject) {
93+
JSONObject settings =
94+
jsonObject.has("settings") ? jsonObject.getJSONObject("settings") : new JSONObject();
95+
JSONObject indexSettings =
96+
settings.has("index") ? settings.getJSONObject("index") : new JSONObject();
97+
indexSettings.put("number_of_shards", 1);
98+
indexSettings.put("pluggable.dataformat.enabled", true);
99+
indexSettings.put("pluggable.dataformat", "composite");
100+
indexSettings.put("composite.primary_data_format", "parquet");
101+
indexSettings.put("composite.secondary_data_formats", new org.json.JSONArray());
102+
settings.put("index", indexSettings);
103+
jsonObject.put("settings", settings);
104+
}
105+
72106
/**
73107
* https://github.com/elastic/elasticsearch/pull/49959<br>
74108
* Deprecate creation of dot-prefixed index names except for hidden and system indices. Create
@@ -116,8 +150,17 @@ public static boolean isIndexExist(RestClient client, String indexName) {
116150
public static void loadDataByRestClient(
117151
RestClient client, String indexName, String dataSetFilePath) throws IOException {
118152
Path path = Paths.get(getResourceFilePath(dataSetFilePath));
119-
Request request =
120-
new Request("POST", "/" + indexName + "/_bulk?refresh=wait_for&wait_for_active_shards=all");
153+
// Workaround: parquet-backed indices in the analytics-backend-lucene composite engine
154+
// do not yet implement LuceneCommitter.getSafeCommitInfo (UnsupportedOperationException
155+
// "TODO:: with index deleter"), which hangs refresh=wait_for until the test framework
156+
// request timeout (~60s). Force-refresh sidesteps the safe-commit-info path while still
157+
// making the bulk-loaded docs immediately searchable. Drop this branch once
158+
// LuceneCommitter.getSafeCommitInfo is implemented.
159+
String refreshParam =
160+
Boolean.parseBoolean(System.getProperty(ANALYTICS_PARQUET_INDICES_PROP, "false"))
161+
? "refresh=true"
162+
: "refresh=wait_for&wait_for_active_shards=all";
163+
Request request = new Request("POST", "/" + indexName + "/_bulk?" + refreshParam);
121164
request.setJsonEntity(new String(Files.readAllBytes(path)));
122165
performRequest(client, request);
123166
}

integ-test/src/test/java/org/opensearch/sql/ppl/PPLIntegTestCase.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import org.opensearch.sql.common.setting.Settings;
3131
import org.opensearch.sql.common.setting.Settings.Key;
3232
import org.opensearch.sql.legacy.SQLIntegTestCase;
33+
import org.opensearch.sql.legacy.TestUtils;
3334
import org.opensearch.sql.protocol.response.format.Format;
3435
import org.opensearch.sql.util.RetryProcessor;
3536

@@ -49,6 +50,20 @@ protected void init() throws Exception {
4950
disableCalcite(); // calcite is enabled by default from 3.3.0
5051
}
5152

53+
/**
54+
* Returns {@code true} when the suite was started with {@code
55+
* -Dtests.analytics.parquet_indices=true}. Use this to branch test assertions that depend on the
56+
* execution backend — when this flag is on, every test-created index is composite/parquet, which
57+
* makes {@code RestUnifiedQueryAction.isAnalyticsIndex} (post-#5432) route every query to the
58+
* analytics-engine backend (DataFusion) instead of the Calcite enumerable / DSL-pushdown backend.
59+
* DataFusion follows different ordering and null-bucket semantics than the legacy V2 and
60+
* Calcite-DSL paths.
61+
*/
62+
public static boolean isAnalyticsParquetIndicesEnabled() {
63+
return Boolean.parseBoolean(
64+
System.getProperty(TestUtils.ANALYTICS_PARQUET_INDICES_PROP, "false"));
65+
}
66+
5267
protected JSONObject executeQuery(String query) throws IOException {
5368
return jsonify(executeQueryToString(query));
5469
}

0 commit comments

Comments
 (0)