From b4fad4a6fec6379cb8528ce32cb40f23abdcbed2 Mon Sep 17 00:00:00 2001 From: fabrizzio-dotCMS Date: Mon, 29 Jun 2026 10:09:44 -0600 Subject: [PATCH 1/3] fix(opensearch): repair bare orphan index on bootstrap without discarding populated indices (#36237) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The idempotent-bootstrap reuse path from #36238 re-asserted the custom mapping via putMapping against an orphaned cluster index. On a bare orphan (no settings) this failed with HTTP 400 because the content mapping references the custom analyzer `my_analyzer`, which is a static index setting that can only be applied at creation time — leaving the index half-mapped (QA TC-003). Decide the orphan's fate by document count so a populated index is never discarded: - empty orphan (0 docs): delete and recreate from scratch, restoring full settings + base mapping + custom mapping. An empty index has no data and no reindex progress, so this costs nothing operationally. - populated orphan (>0 docs) or unknown count: reuse in place, untouched. A dotCMS-created index already carries the full mapping; deleting it would force a full reindex (hours, degraded search). On any uncertainty (the count probe fails) we err toward reuse. Covered by unit tests (empty->recreate, populated->reuse-untouched, probe-fails->reuse, delete-fails->still-creates, OS tag propagation) and a regression IT that recreates a bare orphan against a real OpenSearch cluster and asserts the recreated index carries the dynamic templates and my_analyzer. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../business/ContentletIndexAPIImpl.java | 76 +++++++++-- .../ContentletIndexAPIImplBootstrapTest.java | 120 ++++++++++++++++-- ...tIndexAPIImplMigrationIntegrationTest.java | 73 ++++++++++- 3 files changed, 243 insertions(+), 26 deletions(-) diff --git a/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java b/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java index 936ec4883cf..882eb3829c0 100644 --- a/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java +++ b/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java @@ -655,15 +655,33 @@ public synchronized boolean createContentIndex(final String indexName, final int * Create an index exclusively in one of the SE Providers. * *

Idempotent bootstrap. If the physical index already exists in the target - * cluster it is reused instead of issuing a create. This guards against an orphaned - * cluster index — present in the cluster but missing from the index store — left behind - * when a previous bootstrap created the index but never committed its store pointer - * (e.g. the OS {@code VersionedIndices} row, or after a partial/interrupted startup). - * Without this guard the restart re-derives the same logical name, the create fails with - * {@code resource_already_exists}, and {@code checkAndInitializeIndex()} aborts — leaving - * the instance half-initialised. The custom mapping is (re)applied either way - * ({@code putMapping} is additive/idempotent), so a previously unmapped orphan is repaired, - * and the caller's {@code point()} re-registers the index in the store.

+ * cluster it is an orphan — present in the cluster but missing from the index + * store — left behind when a previous bootstrap created the index but never committed its + * store pointer (e.g. the OS {@code VersionedIndices} row, or after a partial/interrupted + * startup). Without handling this, the restart re-derives the same logical name, the create + * fails with {@code resource_already_exists}, and {@code checkAndInitializeIndex()} aborts — + * leaving the instance half-initialised.

+ * + *

The orphan is handled by document count, so a populated index is never discarded:

+ * + * + *

The caller's {@code point()} then registers the index in the store.

* * @param indexName logical index name (no cluster prefix, no vendor tag) * @param shards number of shards to create with (ignored when the index already exists) @@ -714,11 +732,45 @@ boolean createContentIndex(final String indexName, final int shards, final Index + e.getMessage(), e)) .getOrElse(false); if (alreadyExists) { + // Orphan: exists in cluster, missing from store (see method javadoc). Decide by doc + // count so a populated index — including partial reindex progress — is never discarded. + // The count probe is best-effort: any failure is treated as "has data" (-1) so we err + // toward reuse and never delete on uncertainty. + final long docCount = Try.of(() -> ops.getIndexDocumentCount(physicalName)) + .onFailure(e -> Logger.warn(this, + "Orphan doc-count probe failed for " + physicalName + + " — treating as populated and reusing in place: " + + e.getMessage(), e)) + .getOrElse(-1L); + + if (docCount != 0L) { + // Populated (or unknown): reuse in place, untouched. A dotCMS-created index already + // carries the full settings + base mapping + custom mapping, so nothing needs to be + // (re)applied. Deleting it would force a full reindex (hours, degraded search) — + // not justified to clean up an orphan. + Logger.info(this, String.format( + "Bootstrap: orphaned %s index found with %s document(s); reusing in place" + + " (not deleting, not remapping): %s", + tag, docCount < 0 ? "an unknown number of" : docCount, physicalName)); + return true; + } + + // Empty orphan: delete so the create below rebuilds a clean index with full settings + + // base mapping. An empty index has no data and no reindex progress, so this is safe and + // costs nothing operationally (issue #36237 — repairs a bare orphan that reuse cannot). Logger.info(this, String.format( - "Bootstrap: %s index already exists, reusing and re-asserting mapping: %s", + "Bootstrap: empty orphaned %s index found (in cluster, missing from store);" + + " deleting and recreating with full settings + mapping: %s", tag, physicalName)); - helper.addCustomMapping(List.of(indexName), tag); - return true; + final boolean deleted = Try.of(() -> providerApi.delete(physicalName)) + .onFailure(e -> Logger.warn(this, + "Failed to delete empty orphaned index " + physicalName + + " before recreate; attempting create anyway: " + e.getMessage(), e)) + .getOrElse(false); + if (!deleted) { + Logger.warn(this, "Empty orphaned index " + physicalName + + " was not acknowledged as deleted; the create below may fail."); + } } final boolean contentIndex = ops.createContentIndex(physicalName, shards); diff --git a/dotCMS/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplBootstrapTest.java b/dotCMS/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplBootstrapTest.java index 19202975428..dc9648768bd 100644 --- a/dotCMS/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplBootstrapTest.java +++ b/dotCMS/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplBootstrapTest.java @@ -30,12 +30,17 @@ * *

The behaviour under test:

* */ public class ContentletIndexAPIImplBootstrapTest { @@ -60,26 +65,109 @@ private static ContentletIndexAPIImpl newApi() { } /** - * Given : the physical index already exists in the target cluster (an orphaned cluster index - * left behind by a previous bootstrap that never committed its store pointer). + * Given : an EMPTY orphaned index (0 docs) exists in the cluster but is missing from the store + * (left by a previous bootstrap that never committed its store pointer). * When : createContentIndex() runs during bootstrap. - * Then : the index is reused (no create is issued), the custom mapping is re-asserted to - * repair a possibly-unmapped orphan, and the method returns true. + * Then : the empty orphan is deleted and recreated from scratch (full settings + base mapping), + * the custom mapping is applied to the clean index, and the method returns true. */ @Test - public void test_orphanIndexExists_reusesAndReassertsMapping_skipsCreate() throws IOException { + public void test_emptyOrphan_deletedAndRecreated_withFullMapping() throws IOException { final ContentletIndexOperations ops = mock(ContentletIndexOperations.class); final IndexAPI providerApi = mock(IndexAPI.class); final MappingHelper helper = mock(MappingHelper.class); when(ops.toPhysicalName(LOGICAL_NAME)).thenReturn(PHYSICAL_NAME); when(providerApi.indexExists(PHYSICAL_NAME)).thenReturn(true); + when(ops.getIndexDocumentCount(PHYSICAL_NAME)).thenReturn(0L); + when(providerApi.delete(PHYSICAL_NAME)).thenReturn(true); + when(ops.createContentIndex(PHYSICAL_NAME, SHARDS)).thenReturn(true); + + final boolean result = newApi() + .createContentIndex(LOGICAL_NAME, SHARDS, IndexTag.ES, ops, providerApi, helper); + + assertTrue("Empty orphan must be recreated and reported as available", result); + verify(providerApi).delete(PHYSICAL_NAME); + verify(ops).createContentIndex(PHYSICAL_NAME, SHARDS); + verify(helper).addCustomMapping(List.of(LOGICAL_NAME), IndexTag.ES); + } + + /** + * Given : a POPULATED orphaned index (> 0 docs) exists in the cluster but is missing from + * the store. + * When : createContentIndex() runs during bootstrap. + * Then : the orphan is reused in place, untouched — it is NOT deleted, NOT recreated, and its + * mapping is NOT re-applied (a dotCMS-created index already carries the full mapping). + * Discarding it would force a costly full reindex. The method returns true. + */ + @Test + public void test_populatedOrphan_reusedInPlace_notDeletedNotRemapped() throws IOException { + final ContentletIndexOperations ops = mock(ContentletIndexOperations.class); + final IndexAPI providerApi = mock(IndexAPI.class); + final MappingHelper helper = mock(MappingHelper.class); + + when(ops.toPhysicalName(LOGICAL_NAME)).thenReturn(PHYSICAL_NAME); + when(providerApi.indexExists(PHYSICAL_NAME)).thenReturn(true); + when(ops.getIndexDocumentCount(PHYSICAL_NAME)).thenReturn(42L); final boolean result = newApi() .createContentIndex(LOGICAL_NAME, SHARDS, IndexTag.ES, ops, providerApi, helper); - assertTrue("Existing (orphaned) index must be reused and reported as available", result); + assertTrue("Populated orphan must be reused and reported as available", result); + verify(providerApi, never()).delete(PHYSICAL_NAME); verify(ops, never()).createContentIndex(anyString(), anyInt()); + verify(helper, never()).addCustomMapping(List.of(LOGICAL_NAME), IndexTag.ES); + } + + /** + * Given : an orphan exists but the document-count probe fails (e.g. transient cluster error). + * When : createContentIndex() runs during bootstrap. + * Then : the uncertainty is treated as "has data" — the orphan is reused in place and never + * deleted, so a possibly-populated index is never discarded on a flaky probe. + */ + @Test + public void test_orphanDocCountProbeFails_treatedAsPopulated_reused() throws IOException { + final ContentletIndexOperations ops = mock(ContentletIndexOperations.class); + final IndexAPI providerApi = mock(IndexAPI.class); + final MappingHelper helper = mock(MappingHelper.class); + + when(ops.toPhysicalName(LOGICAL_NAME)).thenReturn(PHYSICAL_NAME); + when(providerApi.indexExists(PHYSICAL_NAME)).thenReturn(true); + when(ops.getIndexDocumentCount(PHYSICAL_NAME)) + .thenThrow(new RuntimeException("count unavailable")); + + final boolean result = newApi() + .createContentIndex(LOGICAL_NAME, SHARDS, IndexTag.ES, ops, providerApi, helper); + + assertTrue("Unknown doc count must be reused (never deleted)", result); + verify(providerApi, never()).delete(PHYSICAL_NAME); + verify(ops, never()).createContentIndex(anyString(), anyInt()); + } + + /** + * Given : an EMPTY orphan exists but its delete is not acknowledged (e.g. transient error). + * When : createContentIndex() runs during bootstrap. + * Then : the failure does not abort bootstrap — the create is still attempted and, on + * success here, the mapping is applied. + */ + @Test + public void test_emptyOrphanDeleteFails_stillAttemptsCreate() throws IOException { + final ContentletIndexOperations ops = mock(ContentletIndexOperations.class); + final IndexAPI providerApi = mock(IndexAPI.class); + final MappingHelper helper = mock(MappingHelper.class); + + when(ops.toPhysicalName(LOGICAL_NAME)).thenReturn(PHYSICAL_NAME); + when(providerApi.indexExists(PHYSICAL_NAME)).thenReturn(true); + when(ops.getIndexDocumentCount(PHYSICAL_NAME)).thenReturn(0L); + when(providerApi.delete(PHYSICAL_NAME)) + .thenThrow(new RuntimeException("delete not acknowledged")); + when(ops.createContentIndex(PHYSICAL_NAME, SHARDS)).thenReturn(true); + + final boolean result = newApi() + .createContentIndex(LOGICAL_NAME, SHARDS, IndexTag.ES, ops, providerApi, helper); + + assertTrue("A failed orphan delete must fall through to a (successful) create", result); + verify(ops).createContentIndex(PHYSICAL_NAME, SHARDS); verify(helper).addCustomMapping(List.of(LOGICAL_NAME), IndexTag.ES); } @@ -156,13 +244,14 @@ public void test_existenceProbeThrows_treatedAsMissing_proceedsToCreate() throws } /** - * Given : an OS-tagged bootstrap of an already-existing index. + * Given : an OS-tagged bootstrap of an already-existing EMPTY (orphaned) index. * When : createContentIndex() runs with {@link IndexTag#OS}. - * Then : the mapping is re-asserted against the OS provider — the tag is propagated unchanged - * to the mapping helper so the correct vendor is targeted. + * Then : the empty orphan is deleted and recreated against the OS provider — the fully-tagged + * physical name is used for the delete, create and doc-count probe, and the OS tag is + * propagated unchanged to the mapping helper so the correct vendor is targeted. */ @Test - public void test_osTag_isPropagatedToMappingHelper() throws IOException { + public void test_osTag_emptyOrphanDeletedRecreated_andTagPropagated() throws IOException { final ContentletIndexOperations ops = mock(ContentletIndexOperations.class); final IndexAPI providerApi = mock(IndexAPI.class); final MappingHelper helper = mock(MappingHelper.class); @@ -170,11 +259,16 @@ public void test_osTag_isPropagatedToMappingHelper() throws IOException { final String osPhysical = PHYSICAL_NAME + ".os"; when(ops.toPhysicalName(LOGICAL_NAME)).thenReturn(osPhysical); when(providerApi.indexExists(osPhysical)).thenReturn(true); + when(ops.getIndexDocumentCount(osPhysical)).thenReturn(0L); + when(providerApi.delete(osPhysical)).thenReturn(true); + when(ops.createContentIndex(osPhysical, SHARDS)).thenReturn(true); final boolean result = newApi() .createContentIndex(LOGICAL_NAME, SHARDS, IndexTag.OS, ops, providerApi, helper); assertTrue(result); + verify(providerApi).delete(osPhysical); + verify(ops).createContentIndex(osPhysical, SHARDS); verify(helper).addCustomMapping(List.of(LOGICAL_NAME), IndexTag.OS); } } \ No newline at end of file diff --git a/dotcms-integration/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplMigrationIntegrationTest.java b/dotcms-integration/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplMigrationIntegrationTest.java index f7f01a2b03b..7ff153c4561 100644 --- a/dotcms-integration/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplMigrationIntegrationTest.java +++ b/dotcms-integration/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplMigrationIntegrationTest.java @@ -13,9 +13,14 @@ import com.dotcms.content.index.IndexAPIImpl; import com.dotcms.content.index.IndexTag; import com.dotcms.content.index.VersionedIndices; +import com.dotcms.content.elasticsearch.util.MappingHelper; import com.dotcms.content.index.opensearch.ContentletIndexOperationsOS; +import com.dotcms.content.index.opensearch.MappingOperationsOS; +import com.dotcms.content.index.opensearch.OSClientProvider; import com.dotcms.content.index.opensearch.OSIndexAPIImpl; import com.dotcms.util.IntegrationTestInitService; +import org.opensearch.client.opensearch.OpenSearchClient; +import org.opensearch.client.opensearch.indices.GetIndexResponse; import com.dotmarketing.business.APILocator; import com.dotmarketing.common.db.DotConnect; import com.dotmarketing.exception.DotDataException; @@ -114,6 +119,13 @@ public class ContentletIndexAPIImplMigrationIntegrationTest extends IntegrationT @Inject private ContentletIndexOperationsOS opsOS; + // ── Used by the orphan-repair regression test to read back mapping + analysis settings ── + @Inject + private MappingOperationsOS mappingOps; + + @Inject + private OSClientProvider clientProvider; + /** * Physical (cluster-prefixed, tag-suffixed) form of {@link #DUAL_WORKING}; resolved once * per test in {@link #setUp()}. Only {@code DUAL_WORKING} needs a physical-name field — @@ -433,9 +445,52 @@ public void test_delete_phase1_removesFromBothClusters() throws IOException, Dot } // ========================================================================= - // activateIndex — pointer-store writes verified against real DB + // Orphan bootstrap repair — bare cluster index missing from the store (#36237) // ========================================================================= + /** + * Given Scenario: Phase 1. A bare OS index physically exists in the cluster (default + * settings, empty mapping — no custom {@code my_analyzer}, no dotCMS dynamic + * templates) but is absent from the index store. This is the orphan left by a + * prior bootstrap that created the index but never committed its store pointer, + * reproduced bare exactly as QA did for #36237 TC-003. + * When : the bootstrap seam {@code createContentIndex(name, shards, OS, ...)} runs. + * Then : the orphan is deleted and recreated from scratch, so the index now carries the FULL + * settings (custom {@code my_analyzer}) and base mapping (dotCMS dynamic templates). + * This is the regression guard for TC-003 — reusing the bare orphan in place left it + * half-mapped and triggered {@code putMapping HTTP 400} (analyzer not found). + */ + @Test + public void test_bootstrap_bareOrphan_phase1_isRecreatedWithFullSettingsAndMapping() + throws Exception { + setPhase(1); + + // Pre: create a BARE orphan — default settings, no mapping (QA recreated it as PUT {}). + osIndexAPI.createIndex(physicalDualWorking, 0); + assertTrue("Pre: bare orphan must exist in OS", + osIndexAPI.indexExists(physicalDualWorking)); + assertFalse("Pre: bare orphan must NOT yet carry the dotCMS dynamic templates", + mappingOps.getMapping(DUAL_WORKING).contains("template_1")); + assertFalse("Pre: bare orphan must NOT yet define the custom my_analyzer", + analyzers().containsKey("my_analyzer")); + + // When: run the orphan-aware bootstrap seam against the real OS collaborators. + final boolean result = ((ContentletIndexAPIImpl) contentletIndexAPI()) + .createContentIndex(DUAL_WORKING, 1, IndexTag.OS, + opsOS, osIndexAPI, MappingHelper.getInstance()); + + // Then: the orphan was repaired — full settings + base mapping are present. + assertTrue("Bootstrap must report the recreated index as available", result); + assertTrue("Recreated index must exist in OS", + osIndexAPI.indexExists(physicalDualWorking)); + assertTrue("Recreated index must carry the dotCMS dynamic templates (base mapping restored)", + mappingOps.getMapping(DUAL_WORKING).contains("template_1")); + assertTrue("Recreated index must define the custom my_analyzer (settings restored)", + analyzers().containsKey("my_analyzer")); + + Logger.info(this, "✅ bootstrap bare-orphan Phase 1 — recreated with full settings + mapping"); + } + /** * Given Scenario: Phase 0. ES store is currently pointing at the pre-existing app index. * When : activateIndex(ES_WORKING) is called. @@ -915,6 +970,22 @@ private static void setPhase(final int ordinal) { Config.setProperty(FLAG_KEY, String.valueOf(ordinal)); } + /** + * Reads back the analyzer map configured on the {@link #physicalDualWorking} OS index. + * Returns an empty map when the index has no analysis settings (e.g. a bare orphan). + */ + private Map analyzers() throws IOException { + final OpenSearchClient client = clientProvider.getClient(); + final GetIndexResponse response = client.indices().get(b -> b.index(physicalDualWorking)); + final var indexState = response.result().get(physicalDualWorking); + if (indexState == null || indexState.settings() == null + || indexState.settings().index() == null + || indexState.settings().index().analysis() == null) { + return Map.of(); + } + return indexState.settings().index().analysis().analyzer(); + } + private static ContentletIndexAPI contentletIndexAPI() { return APILocator.getContentletIndexAPI(); } From 47b6899727f796dfef27caf5f35020340a4a4fdd Mon Sep 17 00:00:00 2001 From: fabrizzio-dotCMS Date: Mon, 29 Jun 2026 11:03:41 -0600 Subject: [PATCH 2/3] fix(opensearch): reuse empty orphan in place when its delete is not acknowledged (#36237) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses PR review: if the empty-orphan delete is not acknowledged the index may still exist, so the follow-up create would throw resource_already_exists — which bootstrapAndPointOS does not catch (it only handles IOException), aborting bootstrap (the original #36237 failure mode). Now, when the delete is not acknowledged, the empty orphan is reused in place instead of attempting a doomed create. The index is empty so nothing is lost, and a later clean restart recreates it properly once the cluster is healthy. Unit test updated accordingly. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../business/ContentletIndexAPIImpl.java | 9 +++++++-- .../ContentletIndexAPIImplBootstrapTest.java | 20 ++++++++++--------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java b/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java index 882eb3829c0..5221743a684 100644 --- a/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java +++ b/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java @@ -765,11 +765,16 @@ boolean createContentIndex(final String indexName, final int shards, final Index final boolean deleted = Try.of(() -> providerApi.delete(physicalName)) .onFailure(e -> Logger.warn(this, "Failed to delete empty orphaned index " + physicalName - + " before recreate; attempting create anyway: " + e.getMessage(), e)) + + ": " + e.getMessage(), e)) .getOrElse(false); if (!deleted) { + // The delete was not acknowledged, so the index may still exist. Recreating it + // would throw resource_already_exists and abort bootstrap — the very failure this + // guard prevents. Reuse it in place instead: it is empty, so nothing is lost, and a + // later clean restart recreates it properly once the cluster is healthy. Logger.warn(this, "Empty orphaned index " + physicalName - + " was not acknowledged as deleted; the create below may fail."); + + " could not be deleted; reusing in place to avoid aborting bootstrap."); + return true; } } diff --git a/dotCMS/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplBootstrapTest.java b/dotCMS/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplBootstrapTest.java index dc9648768bd..9185cb3d54e 100644 --- a/dotCMS/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplBootstrapTest.java +++ b/dotCMS/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplBootstrapTest.java @@ -40,7 +40,8 @@ *
  • a failed create does not apply a mapping;
  • *
  • a failing existence probe is treated as "does not exist", so bootstrap falls through to * the create path instead of aborting;
  • - *
  • a failed delete of an empty orphan does not abort bootstrap — the create is still attempted.
  • + *
  • a failed delete of an empty orphan does not abort bootstrap — the orphan is reused in + * place rather than attempting a create that would throw {@code resource_already_exists}.
  • * */ public class ContentletIndexAPIImplBootstrapTest { @@ -145,13 +146,15 @@ public void test_orphanDocCountProbeFails_treatedAsPopulated_reused() throws IOE } /** - * Given : an EMPTY orphan exists but its delete is not acknowledged (e.g. transient error). + * Given : an EMPTY orphan exists but its delete is not acknowledged (e.g. transient error), + * so the index may still be present in the cluster. * When : createContentIndex() runs during bootstrap. - * Then : the failure does not abort bootstrap — the create is still attempted and, on - * success here, the mapping is applied. + * Then : bootstrap is NOT aborted — rather than attempt a create that would throw + * {@code resource_already_exists}, the orphan is reused in place. No create is issued + * and no mapping is applied; the method returns true. */ @Test - public void test_emptyOrphanDeleteFails_stillAttemptsCreate() throws IOException { + public void test_emptyOrphanDeleteFails_reusedInPlace_notRecreated() throws IOException { final ContentletIndexOperations ops = mock(ContentletIndexOperations.class); final IndexAPI providerApi = mock(IndexAPI.class); final MappingHelper helper = mock(MappingHelper.class); @@ -161,14 +164,13 @@ public void test_emptyOrphanDeleteFails_stillAttemptsCreate() throws IOException when(ops.getIndexDocumentCount(PHYSICAL_NAME)).thenReturn(0L); when(providerApi.delete(PHYSICAL_NAME)) .thenThrow(new RuntimeException("delete not acknowledged")); - when(ops.createContentIndex(PHYSICAL_NAME, SHARDS)).thenReturn(true); final boolean result = newApi() .createContentIndex(LOGICAL_NAME, SHARDS, IndexTag.ES, ops, providerApi, helper); - assertTrue("A failed orphan delete must fall through to a (successful) create", result); - verify(ops).createContentIndex(PHYSICAL_NAME, SHARDS); - verify(helper).addCustomMapping(List.of(LOGICAL_NAME), IndexTag.ES); + assertTrue("A failed delete must not abort bootstrap — reuse in place instead", result); + verify(ops, never()).createContentIndex(anyString(), anyInt()); + verify(helper, never()).addCustomMapping(List.of(LOGICAL_NAME), IndexTag.ES); } /** From 8b8da22e8c392eceffa3bbb59a3d1e3659221b5f Mon Sep 17 00:00:00 2001 From: fabrizzio-dotCMS Date: Mon, 29 Jun 2026 11:17:41 -0600 Subject: [PATCH 3/3] fix(opensearch): fail loud instead of registering a half-mapped orphan when delete is stuck (#36237) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses the 🔴 Critical review finding: the previous revision returned true (reused the bare orphan) when its delete was not acknowledged, which would register an un-repairable half-mapped index in the store — bare orphans have no custom analyzer and their mapping cannot be repaired in place. Now, when the empty-orphan delete is not acknowledged, re-probe existence: - index gone (delete took effect without an ack) -> recreate cleanly; - index still present -> throw, failing bootstrap loudly with a clear message rather than recreating (would throw resource_already_exists) or registering a half-mapped index. This is an abnormal cluster state, not the orphan-name collision the method otherwise resolves. Unit tests: split the unacked-delete case into index-gone->recreate and index-still-present->fail-loud. 9/9 green. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../business/ContentletIndexAPIImpl.java | 28 +++++++--- .../ContentletIndexAPIImplBootstrapTest.java | 56 +++++++++++++++---- 2 files changed, 64 insertions(+), 20 deletions(-) diff --git a/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java b/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java index 5221743a684..b456c46c059 100644 --- a/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java +++ b/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java @@ -671,7 +671,9 @@ public synchronized boolean createContentIndex(final String indexName, final int * {@code putMapping}-only re-assert against a bare orphan fails with {@code HTTP 400} * (analyzer not found) and leaves the index half-mapped (issue #36237, QA TC-003). An * empty index has no data and no reindex progress, so recreating it costs nothing - * operationally and yields a clean index with full settings + base mapping. + * operationally and yields a clean index with full settings + base mapping. If the + * delete cannot be confirmed and the index is still present, bootstrap fails loudly + * rather than register a half-mapped index. *
  • Populated orphan (> 0 docs), or count unknown — reused in place, untouched. * A populated orphan was created by dotCMS itself, so it already carries the full * settings + base mapping + custom mapping; nothing needs to be (re)applied. The index is @@ -768,13 +770,23 @@ boolean createContentIndex(final String indexName, final int shards, final Index + ": " + e.getMessage(), e)) .getOrElse(false); if (!deleted) { - // The delete was not acknowledged, so the index may still exist. Recreating it - // would throw resource_already_exists and abort bootstrap — the very failure this - // guard prevents. Reuse it in place instead: it is empty, so nothing is lost, and a - // later clean restart recreates it properly once the cluster is healthy. - Logger.warn(this, "Empty orphaned index " + physicalName - + " could not be deleted; reusing in place to avoid aborting bootstrap."); - return true; + // Delete not acknowledged. Re-probe: it may have taken effect without an ack, in + // which case we can still recreate cleanly. If the index is genuinely still there + // we must NOT proceed — recreating would throw resource_already_exists, and reusing + // it would register a bare orphan whose mapping cannot be repaired (the custom + // analyzer is a create-time-only setting). Fail loud instead of leaving a + // half-mapped index in the store. This is an abnormal cluster state, not the + // orphan-name collision this method otherwise resolves. + final boolean stillExists = Try.of(() -> providerApi.indexExists(physicalName)) + .getOrElse(true); + if (stillExists) { + throw new IOException("Empty orphaned " + tag + " index " + physicalName + + " could not be deleted and still exists; aborting bootstrap to avoid" + + " registering a half-mapped index. Check the search cluster health" + + " and restart."); + } + Logger.warn(this, "Empty orphaned index " + physicalName + " delete was not" + + " acknowledged, but the index is gone; proceeding to recreate."); } } diff --git a/dotCMS/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplBootstrapTest.java b/dotCMS/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplBootstrapTest.java index 9185cb3d54e..1de3a230cd4 100644 --- a/dotCMS/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplBootstrapTest.java +++ b/dotCMS/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplBootstrapTest.java @@ -2,6 +2,7 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import static org.mockito.ArgumentMatchers.anyInt; import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.mock; @@ -40,8 +41,10 @@ *
  • a failed create does not apply a mapping;
  • *
  • a failing existence probe is treated as "does not exist", so bootstrap falls through to * the create path instead of aborting;
  • - *
  • a failed delete of an empty orphan does not abort bootstrap — the orphan is reused in - * place rather than attempting a create that would throw {@code resource_already_exists}.
  • + *
  • an empty orphan whose delete is unacknowledged but is actually gone recreates cleanly;
  • + *
  • an empty orphan whose delete fails while the index is still present fails loudly, rather + * than recreating (would throw {@code resource_already_exists}) or registering a bare, + * un-repairable index.
  • * */ public class ContentletIndexAPIImplBootstrapTest { @@ -146,29 +149,58 @@ public void test_orphanDocCountProbeFails_treatedAsPopulated_reused() throws IOE } /** - * Given : an EMPTY orphan exists but its delete is not acknowledged (e.g. transient error), - * so the index may still be present in the cluster. + * Given : an EMPTY orphan whose delete is not acknowledged, but a re-probe shows the index is + * actually gone (the delete took effect without an ack). * When : createContentIndex() runs during bootstrap. - * Then : bootstrap is NOT aborted — rather than attempt a create that would throw - * {@code resource_already_exists}, the orphan is reused in place. No create is issued - * and no mapping is applied; the method returns true. + * Then : it recreates cleanly — the create is issued and the mapping applied. */ @Test - public void test_emptyOrphanDeleteFails_reusedInPlace_notRecreated() throws IOException { + public void test_emptyOrphanDeleteUnacked_butIndexGone_recreates() throws IOException { final ContentletIndexOperations ops = mock(ContentletIndexOperations.class); final IndexAPI providerApi = mock(IndexAPI.class); final MappingHelper helper = mock(MappingHelper.class); when(ops.toPhysicalName(LOGICAL_NAME)).thenReturn(PHYSICAL_NAME); - when(providerApi.indexExists(PHYSICAL_NAME)).thenReturn(true); + // exists at first (orphan probe) → gone after the unacked delete (re-probe) + when(providerApi.indexExists(PHYSICAL_NAME)).thenReturn(true, false); when(ops.getIndexDocumentCount(PHYSICAL_NAME)).thenReturn(0L); - when(providerApi.delete(PHYSICAL_NAME)) - .thenThrow(new RuntimeException("delete not acknowledged")); + when(providerApi.delete(PHYSICAL_NAME)).thenReturn(false); + when(ops.createContentIndex(PHYSICAL_NAME, SHARDS)).thenReturn(true); final boolean result = newApi() .createContentIndex(LOGICAL_NAME, SHARDS, IndexTag.ES, ops, providerApi, helper); - assertTrue("A failed delete must not abort bootstrap — reuse in place instead", result); + assertTrue("Unacked delete with the index gone must recreate cleanly", result); + verify(ops).createContentIndex(PHYSICAL_NAME, SHARDS); + verify(helper).addCustomMapping(List.of(LOGICAL_NAME), IndexTag.ES); + } + + /** + * Given : an EMPTY orphan whose delete fails AND a re-probe shows the index is still present. + * When : createContentIndex() runs during bootstrap. + * Then : it fails loudly (throws) rather than recreating (which would throw + * {@code resource_already_exists}) or reusing a bare, un-repairable index. No create is + * issued and no mapping is applied. + */ + @Test + public void test_emptyOrphanDeleteFails_indexStillExists_failsLoud() throws IOException { + final ContentletIndexOperations ops = mock(ContentletIndexOperations.class); + final IndexAPI providerApi = mock(IndexAPI.class); + final MappingHelper helper = mock(MappingHelper.class); + + when(ops.toPhysicalName(LOGICAL_NAME)).thenReturn(PHYSICAL_NAME); + when(providerApi.indexExists(PHYSICAL_NAME)).thenReturn(true); // still there on re-probe + when(ops.getIndexDocumentCount(PHYSICAL_NAME)).thenReturn(0L); + when(providerApi.delete(PHYSICAL_NAME)) + .thenThrow(new RuntimeException("delete not acknowledged")); + + try { + newApi().createContentIndex(LOGICAL_NAME, SHARDS, IndexTag.ES, ops, providerApi, helper); + fail("A stuck empty orphan (delete fails, index remains) must fail loudly"); + } catch (final IOException expected) { + // expected — bootstrap must not silently register a half-mapped index + } + verify(ops, never()).createContentIndex(anyString(), anyInt()); verify(helper, never()).addCustomMapping(List.of(LOGICAL_NAME), IndexTag.ES); }