Skip to content

Commit 47e7c4e

Browse files
committed
Parquet-back raw-PUT test indices on the analytics-engine route
With -Dtests.analytics.parquet_indices=true, indices created by a raw document PUT (e.g. `PUT /test/_doc/1` in a test's init()) bypass AnalyticsIndexConfig.applyIndexCreationSettings, so they inherit the composite *value* — and are therefore routed to the analytics engine by RestUnifiedQueryAction.isAnalyticsIndex — but not the `pluggable.dataformat.enabled` flag. They are then stored as a plain-Lucene EngineBackedIndexer whose acquireReader() is unimplemented, and the query fails with `StreamException[INTERNAL] Failed to start streaming fragment`. Apply the cluster-level composite defaults in setUpIndices() so every index — including raw-PUT ones — is stored as a parquet-backed DataFormatAwareEngine that is actually scannable by the analytics engine it routes to. No-op unless tests.analytics.parquet_indices=true, so normal CI is unchanged. Signed-off-by: Kai Huang <ahkcs@amazon.com>
1 parent f861d02 commit 47e7c4e

2 files changed

Lines changed: 40 additions & 3 deletions

File tree

integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,12 @@ public void setUpIndices() throws Exception {
6666
initClient();
6767
}
6868

69+
// When -Dtests.analytics.parquet_indices=true, make every index (including ones a test
70+
// auto-creates via a raw document PUT, which bypasses createIndexByRestClient) parquet-backed
71+
// composite, so it is stored as a DataFormatAwareEngine and is actually scannable by the
72+
// analytics engine it routes to. Must run before init() creates any index.
73+
TestUtils.AnalyticsIndexConfig.applyClusterSettings(client());
74+
6975
if (shouldResetQuerySizeLimit()) {
7076
resetQuerySizeLimit();
7177
}

integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import java.util.List;
2424
import java.util.Locale;
2525
import java.util.stream.Collectors;
26+
import org.json.JSONArray;
2627
import org.json.JSONObject;
2728
import org.opensearch.action.bulk.BulkRequest;
2829
import org.opensearch.action.bulk.BulkResponse;
@@ -68,6 +69,11 @@ public static boolean isEnabled() {
6869
return Boolean.parseBoolean(System.getProperty(ENABLED_PROP, "false"));
6970
}
7071

72+
// Composite-store format values shared by the index-level and cluster-level settings below.
73+
private static final String DATAFORMAT_COMPOSITE = "composite";
74+
private static final String PRIMARY_FORMAT_PARQUET = "parquet";
75+
private static final String SECONDARY_FORMAT_LUCENE = "lucene";
76+
7177
/**
7278
* Inject the parquet-backed composite-store index settings into {@code jsonObject}. No-op when
7379
* the config is disabled; idempotent — safe on any index-creation JSON shape.
@@ -82,13 +88,38 @@ static void applyIndexCreationSettings(JSONObject jsonObject) {
8288
settings.has("index") ? settings.getJSONObject("index") : new JSONObject();
8389
indexSettings.put("number_of_shards", 1);
8490
indexSettings.put("pluggable.dataformat.enabled", true);
85-
indexSettings.put("pluggable.dataformat", "composite");
86-
indexSettings.put("composite.primary_data_format", "parquet");
87-
indexSettings.put("composite.secondary_data_formats", new org.json.JSONArray().put("lucene"));
91+
indexSettings.put("pluggable.dataformat", DATAFORMAT_COMPOSITE);
92+
indexSettings.put("composite.primary_data_format", PRIMARY_FORMAT_PARQUET);
93+
indexSettings.put(
94+
"composite.secondary_data_formats", new JSONArray().put(SECONDARY_FORMAT_LUCENE));
8895
settings.put("index", indexSettings);
8996
jsonObject.put("settings", settings);
9097
}
9198

99+
/**
100+
* Set the composite-store defaults at the cluster level so even indices auto-created by a raw
101+
* document {@code PUT} (which bypass {@link #applyIndexCreationSettings}) are parquet-backed.
102+
* Otherwise such an index inherits only the composite value — so it routes to the analytics
103+
* engine — but not the {@code .enabled} flag, leaving it stored as a plain-Lucene {@code
104+
* EngineBackedIndexer} that fails at query time. No-op when disabled; idempotent.
105+
*/
106+
public static void applyClusterSettings(RestClient client) {
107+
if (!isEnabled()) {
108+
return;
109+
}
110+
JSONObject persistent =
111+
new JSONObject()
112+
.put("cluster.pluggable.dataformat.enabled", true)
113+
.put("cluster.pluggable.dataformat", DATAFORMAT_COMPOSITE)
114+
.put("cluster.composite.primary_data_format", PRIMARY_FORMAT_PARQUET)
115+
.put(
116+
"cluster.composite.secondary_data_formats",
117+
new JSONArray().put(SECONDARY_FORMAT_LUCENE));
118+
Request request = new Request("PUT", "/_cluster/settings");
119+
request.setJsonEntity(new JSONObject().put("persistent", persistent).toString());
120+
performRequest(client, request);
121+
}
122+
92123
/**
93124
* Returns the {@code _bulk} refresh query string for the current index type. Parquet-backed
94125
* indices in the analytics-backend-lucene composite engine don't yet implement {@code

0 commit comments

Comments
 (0)