Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,12 @@ public void setUpIndices() throws Exception {
initClient();
}

// When -Dtests.analytics.parquet_indices=true, make every index (including ones a test
// auto-creates via a raw document PUT, which bypasses createIndexByRestClient) parquet-backed
// composite, so it is stored as a DataFormatAwareEngine and is actually scannable by the
// analytics engine it routes to. Must run before init() creates any index.
TestUtils.AnalyticsIndexConfig.applyClusterSettings(client());

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Any cleanup required in tear down logic?
  2. I assume this applied to all SQL tests too if any raw doc put?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looked into the issues:

  1. No extra cleanup needed — same lifecycle as enableCalcite(). applyClusterSettings() is called from setUpIndices() (@before) so it's re-applied before every test method, and all of these settings are wiped by the existing @afterclass cleanUpIndices() → wipeAllClusterSettings() (nulls persistent./transient. after each class).
  2. Correct — but only when -Dtests.analytics.parquet_indices=true, which is set only for the analytics-route runs


if (shouldResetQuerySizeLimit()) {
resetQuerySizeLimit();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.util.List;
import java.util.Locale;
import java.util.stream.Collectors;
import org.json.JSONArray;
import org.json.JSONObject;
import org.opensearch.action.bulk.BulkRequest;
import org.opensearch.action.bulk.BulkResponse;
Expand Down Expand Up @@ -68,6 +69,11 @@ public static boolean isEnabled() {
return Boolean.parseBoolean(System.getProperty(ENABLED_PROP, "false"));
}

// Composite-store format values shared by the index-level and cluster-level settings below.
private static final String DATAFORMAT_COMPOSITE = "composite";
private static final String PRIMARY_FORMAT_PARQUET = "parquet";
private static final String SECONDARY_FORMAT_LUCENE = "lucene";

/**
* Inject the parquet-backed composite-store index settings into {@code jsonObject}. No-op when
* the config is disabled; idempotent — safe on any index-creation JSON shape.
Expand All @@ -82,13 +88,38 @@ static void applyIndexCreationSettings(JSONObject jsonObject) {
settings.has("index") ? settings.getJSONObject("index") : new JSONObject();
indexSettings.put("number_of_shards", 1);
indexSettings.put("pluggable.dataformat.enabled", true);
indexSettings.put("pluggable.dataformat", "composite");
indexSettings.put("composite.primary_data_format", "parquet");
indexSettings.put("composite.secondary_data_formats", new org.json.JSONArray().put("lucene"));
indexSettings.put("pluggable.dataformat", DATAFORMAT_COMPOSITE);
indexSettings.put("composite.primary_data_format", PRIMARY_FORMAT_PARQUET);
indexSettings.put(
"composite.secondary_data_formats", new JSONArray().put(SECONDARY_FORMAT_LUCENE));
settings.put("index", indexSettings);
jsonObject.put("settings", settings);
}

/**
* Set the composite-store defaults at the cluster level so even indices auto-created by a raw
* document {@code PUT} (which bypass {@link #applyIndexCreationSettings}) are parquet-backed.
* Otherwise such an index inherits only the composite value — so it routes to the analytics
* engine — but not the {@code .enabled} flag, leaving it stored as a plain-Lucene {@code
* EngineBackedIndexer} that fails at query time. No-op when disabled; idempotent.
*/
public static void applyClusterSettings(RestClient client) {
if (!isEnabled()) {
return;
}
JSONObject persistent =
new JSONObject()
.put("cluster.pluggable.dataformat.enabled", true)
.put("cluster.pluggable.dataformat", DATAFORMAT_COMPOSITE)
.put("cluster.composite.primary_data_format", PRIMARY_FORMAT_PARQUET)
.put(
"cluster.composite.secondary_data_formats",
new JSONArray().put(SECONDARY_FORMAT_LUCENE));
Request request = new Request("PUT", "/_cluster/settings");
request.setJsonEntity(new JSONObject().put("persistent", persistent).toString());
performRequest(client, request);
}

/**
* Returns the {@code _bulk} refresh query string for the current index type. Parquet-backed
* indices in the analytics-backend-lucene composite engine don't yet implement {@code
Expand Down
Loading