opensearch-project
diff --git a/‎sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/index/LuceneIndexingExecutionEngine.java‎
Lines changed: 3 additions & 1 deletion b/‎sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/index/LuceneIndexingExecutionEngine.java‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎sandbox/plugins/composite-engine/src/internalClusterTest/java/org/opensearch/composite/CompositeMergeIT.java‎
Lines changed: 209 additions & 0 deletions b/‎sandbox/plugins/composite-engine/src/internalClusterTest/java/org/opensearch/composite/CompositeMergeIT.java‎
Lines changed: 209 additions & 0 deletions
diff --git a/‎sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/CompositeDataFormat.java‎
Lines changed: 16 additions & 3 deletions b/‎sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/CompositeDataFormat.java‎
Lines changed: 16 additions & 3 deletions
diff --git a/‎sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/CompositeIndexingExecutionEngine.java‎
Lines changed: 3 additions & 2 deletions b/‎sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/CompositeIndexingExecutionEngine.java‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/merge/CompositeMergeExecutor.java‎
Lines changed: 90 additions & 0 deletions b/‎sandbox/plugins/composite-engine/src/main/java/org/opensearch/composite/merge/CompositeMergeExecutor.java‎
Lines changed: 90 additions & 0 deletions
@@ -26,6 +26,7 @@
 import org.opensearch.common.annotation.ExperimentalApi;
 import org.opensearch.index.engine.dataformat.DataFormat;
 import org.opensearch.index.engine.dataformat.IndexingExecutionEngine;
+import org.opensearch.index.engine.dataformat.MergeResult;
 import org.opensearch.index.engine.dataformat.Merger;
 import org.opensearch.index.engine.dataformat.RefreshInput;
 import org.opensearch.index.engine.dataformat.RefreshResult;
@@ -278,7 +279,8 @@ public RefreshResult refresh(RefreshInput refreshInput) throws IOException {
     /** Returns {@code null} — merge scheduling is not yet implemented for the Lucene format. */
     @Override
     public Merger getMerger() {
-        return null;
+        // TODO: Implement merge support as ParquetMerger
+        return mergeInput -> new MergeResult(Map.of());
     }
 
     /**
 
@@ -0,0 +1,209 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.composite;
+
+import org.opensearch.action.admin.indices.refresh.RefreshResponse;
+import org.opensearch.action.admin.indices.stats.IndicesStatsResponse;
+import org.opensearch.action.admin.indices.stats.ShardStats;
+import org.opensearch.action.index.IndexResponse;
+import org.opensearch.be.lucene.LucenePlugin;
+import org.opensearch.cluster.metadata.IndexMetadata;
+import org.opensearch.common.SuppressForbidden;
+import org.opensearch.common.settings.Settings;
+import org.opensearch.common.util.FeatureFlags;
+import org.opensearch.core.rest.RestStatus;
+import org.opensearch.index.IndexSettings;
+import org.opensearch.index.engine.CommitStats;
+import org.opensearch.index.engine.dataformat.DataFormatDescriptor;
+import org.opensearch.index.engine.dataformat.DataFormatRegistry;
+import org.opensearch.index.engine.dataformat.ReaderManagerConfig;
+import org.opensearch.index.engine.dataformat.stub.MockDataFormat;
+import org.opensearch.index.engine.dataformat.stub.MockDataFormatPlugin;
+import org.opensearch.index.engine.dataformat.stub.MockReaderManager;
+import org.opensearch.index.engine.exec.EngineReaderManager;
+import org.opensearch.index.engine.exec.coord.DataformatAwareCatalogSnapshot;
+import org.opensearch.index.merge.MergeStats;
+import org.opensearch.index.store.PrecomputedChecksumStrategy;
+import org.opensearch.plugins.Plugin;
+import org.opensearch.plugins.SearchBackEndPlugin;
+import org.opensearch.test.OpenSearchIntegTestCase;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.function.Function;
+
+/**
+ * Integration tests for composite merge operations across single and multiple data format engines.
+ *
+ * Requires JDK 25 and sandbox enabled. Run with:
+ * ./gradlew :sandbox:plugins:composite-engine:internalClusterTest \
+ *   --tests "*.CompositeMergeIT" \
+ *   -Dsandbox.enabled=true
+ */
+@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 1)
+public class CompositeMergeIT extends OpenSearchIntegTestCase {
+
+    private static final String INDEX_NAME = "test-composite-merge";
+    private static final String MERGE_ENABLED_PROPERTY = "opensearch.pluggable.dataformat.merge.enabled";
+
+    // ── Mock DataFormatPlugin using test framework stubs ──
+
+    public static class MockParquetDataFormatPlugin extends MockDataFormatPlugin implements SearchBackEndPlugin<Object> {
+        private static final MockDataFormat PARQUET_FORMAT = new MockDataFormat("parquet", 0L, Set.of());
+
+        public MockParquetDataFormatPlugin() {
+            super(PARQUET_FORMAT);
+        }
+
+        @Override
+        public Map<String, DataFormatDescriptor> getFormatDescriptors(IndexSettings indexSettings, DataFormatRegistry registry) {
+            return Map.of("parquet", new DataFormatDescriptor("parquet", new PrecomputedChecksumStrategy()));
+        }
+
+        @Override
+        public String name() {
+            return "mock-parquet-backend";
+        }
+
+        @Override
+        public List<String> getSupportedFormats() {
+            return List.of("parquet");
+        }
+
+        @Override
+        public EngineReaderManager<?> createReaderManager(ReaderManagerConfig settings) {
+            return new MockReaderManager("parquet");
+        }
+    }
+
+    // ── Test setup ──
+
+    @Override
+    public void setUp() throws Exception {
+        enableMerge();
+        super.setUp();
+    }
+
+    @Override
+    public void tearDown() throws Exception {
+        try {
+            client().admin().indices().prepareDelete(INDEX_NAME).get();
+        } catch (Exception e) {
+            // index may not exist if test failed before creation
+        }
+        super.tearDown();
+        disableMerge();
+    }
+
+    @SuppressForbidden(reason = "enable pluggable dataformat merge for integration testing")
+    private static void enableMerge() {
+        System.setProperty(MERGE_ENABLED_PROPERTY, "true");
+    }
+
+    @SuppressForbidden(reason = "restore pluggable dataformat merge property after test")
+    private static void disableMerge() {
+        System.clearProperty(MERGE_ENABLED_PROPERTY);
+    }
+
+    @Override
+    protected Collection<Class<? extends Plugin>> nodePlugins() {
+        return Arrays.asList(MockParquetDataFormatPlugin.class, CompositeDataFormatPlugin.class, LucenePlugin.class);
+    }
+
+    @Override
+    protected Settings nodeSettings(int nodeOrdinal) {
+        return Settings.builder()
+            .put(super.nodeSettings(nodeOrdinal))
+            .put(FeatureFlags.PLUGGABLE_DATAFORMAT_EXPERIMENTAL_FLAG, true)
+            .build();
+    }
+
+    // ── Tests ──
+
+    /**
+     * Verifies that background merges are triggered automatically after refresh
+     * when enough segments accumulate to exceed the TieredMergePolicy threshold.
+     * <p>
+     * Flow: index docs across many refresh cycles → each refresh calls
+     * triggerPossibleMerges() → MergeScheduler picks up merge candidates
+     * asynchronously → segment count decreases.
+     */
+    public void testBackgroundMergeSingleEngine() throws Exception {
+        createIndex(INDEX_NAME, singleEngineSettings());
+        ensureGreen(INDEX_NAME);
+
+        // Create enough segments to exceed TieredMergePolicy's default threshold (~10)
+        int totalSegmentsCreated = indexDocsAcrossMultipleRefreshes(15, 5);
+
+        // Wait for async background merges to complete
+        assertBusy(() -> {
+            flush(INDEX_NAME);
+            DataformatAwareCatalogSnapshot snapshot = getCatalogSnapshot();
+            assertTrue(
+                "Expected merges to reduce segment count below " + totalSegmentsCreated + ", but got: " + snapshot.getSegments().size(),
+                snapshot.getSegments().size() < totalSegmentsCreated
+            );
+        });
+
+        MergeStats mergeStats = getMergeStats();
+        assertTrue("Expected at least one merge to have occurred", mergeStats.getTotal() > 0);
+
+        DataformatAwareCatalogSnapshot snapshot = getCatalogSnapshot();
+        assertEquals(Set.of("parquet"), snapshot.getDataFormats());
+    }
+
+    // ── Helpers ──
+
+    private Settings singleEngineSettings() {
+        return Settings.builder()
+            .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
+            .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
+            .put("index.pluggable.dataformat.enabled", true)
+            .put("index.pluggable.dataformat", "composite")
+            .put("index.composite.primary_data_format", "parquet")
+            .putList("index.composite.secondary_data_formats")
+            .build();
+    }
+
+    private int indexDocsAcrossMultipleRefreshes(int refreshCycles, int docsPerCycle) {
+        for (int cycle = 0; cycle < refreshCycles; cycle++) {
+            for (int i = 0; i < docsPerCycle; i++) {
+                IndexResponse response = client().prepareIndex()
+                    .setIndex(INDEX_NAME)
+                    .setSource("field_text", randomAlphaOfLength(10), "field_number", randomIntBetween(1, 1000))
+                    .get();
+                assertEquals(RestStatus.CREATED, response.status());
+            }
+            RefreshResponse refreshResponse = client().admin().indices().prepareRefresh(INDEX_NAME).get();
+            assertEquals(RestStatus.OK, refreshResponse.getStatus());
+        }
+        return refreshCycles;
+    }
+
+    private DataformatAwareCatalogSnapshot getCatalogSnapshot() throws IOException {
+        IndicesStatsResponse statsResponse = client().admin().indices().prepareStats(INDEX_NAME).clear().setStore(true).get();
+        ShardStats shardStats = statsResponse.getIndex(INDEX_NAME).getShards()[0];
+        CommitStats commitStats = shardStats.getCommitStats();
+        assertNotNull(commitStats);
+        assertTrue(commitStats.getUserData().containsKey(DataformatAwareCatalogSnapshot.CATALOG_SNAPSHOT_KEY));
+        return DataformatAwareCatalogSnapshot.deserializeFromString(
+            commitStats.getUserData().get(DataformatAwareCatalogSnapshot.CATALOG_SNAPSHOT_KEY),
+            Function.identity()
+        );
+    }
+
+    private MergeStats getMergeStats() {
+        IndicesStatsResponse statsResponse = client().admin().indices().prepareStats(INDEX_NAME).clear().setMerge(true).get();
+        return statsResponse.getIndex(INDEX_NAME).getShards()[0].getStats().getMerge();
+    }
+}
@@ -26,21 +26,25 @@
 @ExperimentalApi
 public class CompositeDataFormat extends DataFormat {
 
+    private final DataFormat primaryDataFormat;
     private final List<DataFormat> dataFormats;
 
     /**
-     * Constructs a CompositeDataFormat from the given list of data formats.
+     * Constructs a CompositeDataFormat with a designated primary format and a list of all constituent formats.
      *
-     * @param dataFormats the constituent data formats
+     * @param primaryDataFormat the authoritative data format used for merge operations
+     * @param dataFormats       all constituent data formats (including the primary)
      */
-    public CompositeDataFormat(List<DataFormat> dataFormats) {
+    public CompositeDataFormat(DataFormat primaryDataFormat, List<DataFormat> dataFormats) {
+        this.primaryDataFormat = Objects.requireNonNull(primaryDataFormat, "primaryDataFormat must not be null");
         this.dataFormats = List.copyOf(Objects.requireNonNull(dataFormats, "dataFormats must not be null"));
     }
 
     /**
      * Constructs an empty CompositeDataFormat with no constituent formats.
      */
     public CompositeDataFormat() {
+        this.primaryDataFormat = null;
         this.dataFormats = List.of();
     }
 
@@ -53,6 +57,15 @@ public List<DataFormat> getDataFormats() {
         return dataFormats;
     }
 
+    /**
+     * Returns the primary data format used for merge operations.
+     *
+     * @return the primary data format
+     */
+    public DataFormat getPrimaryDataFormat() {
+        return primaryDataFormat;
+    }
+
     @Override
     public String name() {
         return "composite";
 
@@ -13,6 +13,7 @@
 import org.opensearch.common.annotation.ExperimentalApi;
 import org.opensearch.common.settings.Settings;
 import org.opensearch.common.util.io.IOUtils;
+import org.opensearch.composite.merge.CompositeMerger;
 import org.opensearch.index.IndexSettings;
 import org.opensearch.index.engine.dataformat.DataFormat;
 import org.opensearch.index.engine.dataformat.DataFormatPlugin;
@@ -129,7 +130,7 @@ public CompositeIndexingExecutionEngine(
         }
         this.secondaryEngines = Set.copyOf(secondaries);
 
-        this.compositeDataFormat = new CompositeDataFormat(allFormats);
+        this.compositeDataFormat = new CompositeDataFormat(primaryFormat, allFormats);
         this.committer = committer;
     }
 
@@ -181,7 +182,7 @@ public Writer<CompositeDocumentInput> createWriter(long writerGeneration) {
     /** {@inheritDoc} Delegates to the primary engine's merger. */
     @Override
     public Merger getMerger() {
-        return primaryEngine.getMerger();
+        return new CompositeMerger(this, compositeDataFormat);
     }
 
     /**
 
@@ -0,0 +1,90 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.composite.merge;
+
+import org.opensearch.common.annotation.ExperimentalApi;
+import org.opensearch.index.engine.dataformat.DataFormat;
+import org.opensearch.index.engine.dataformat.MergeInput;
+import org.opensearch.index.engine.dataformat.MergeResult;
+import org.opensearch.index.engine.dataformat.Merger;
+import org.opensearch.index.engine.dataformat.RowIdMapping;
+import org.opensearch.index.engine.exec.Segment;
+import org.opensearch.index.engine.exec.WriterFileSet;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Executes a composite merge: primary format first, then secondaries using the
+ * row-ID mapping from the primary. Stateless — all state comes from the
+ * {@link MergePlan} and the merger map.
+ *
+ * @opensearch.experimental
+ */
+@ExperimentalApi
+public class CompositeMergeExecutor {
+
+    private final Map<DataFormat, Merger> mergers;
+
+    public CompositeMergeExecutor(Map<DataFormat, Merger> mergers) {
+        this.mergers = Map.copyOf(mergers);
+    }
+
+    /**
+     * Executes the merge described by the plan.
+     *
+     * @param plan the pre-validated merge plan
+     * @return the combined merge result across all formats
+     */
+    public MergeResult execute(MergePlan plan) {
+        List<FormatMergeResult> completed = new ArrayList<>();
+        try {
+            FormatMergeResult primaryResult = mergeFormat(plan, plan.primaryFormat(), null);
+            completed.add(primaryResult);
+
+            RowIdMapping mapping = plan.hasSecondaries()
+                ? primaryResult.rowIdMappingOpt()
+                    .orElseThrow(() -> new IllegalStateException("Primary merge did not produce row-ID mapping required by secondaries"))
+                : null;
+
+            for (DataFormat secondary : plan.secondaryFormats()) {
+                completed.add(mergeFormat(plan, secondary, mapping));
+            }
+
+            return toMergeResult(completed, mapping);
+        } catch (Exception e) {
+            completed.forEach(FormatMergeResult::cleanup);
+            if (e instanceof RuntimeException re) throw re;
+            throw new UncheckedIOException((IOException) e);
+        }
+    }
+
+    private FormatMergeResult mergeFormat(MergePlan plan, DataFormat format, RowIdMapping mapping) throws IOException {
+        Merger merger = mergers.get(format);
+        List<WriterFileSet> files = plan.filesFor(format);
+        List<Segment> segments = new ArrayList<>();
+        for (WriterFileSet wfs : files) {
+            segments.add(Segment.builder(wfs.writerGeneration()).addSearchableFiles(format, wfs).build());
+        }
+        MergeResult result = merger.merge(new MergeInput(segments, mapping, plan.mergedWriterGeneration()));
+        return new FormatMergeResult(format, result.getMergedWriterFileSetForDataformat(format), result.rowIdMapping().orElse(null));
+    }
+
+    private static MergeResult toMergeResult(List<FormatMergeResult> results, RowIdMapping mapping) {
+        Map<DataFormat, WriterFileSet> merged = new HashMap<>();
+        for (FormatMergeResult r : results) {
+            merged.put(r.format(), r.mergedFiles());
+        }
+        return new MergeResult(merged, mapping);
+    }
+}