diff --git a/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java b/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java index 936ec4883cf..b456c46c059 100644 --- a/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java +++ b/dotCMS/src/main/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImpl.java @@ -655,15 +655,35 @@ public synchronized boolean createContentIndex(final String indexName, final int * Create an index exclusively in one of the SE Providers. * *

Idempotent bootstrap. If the physical index already exists in the target - * cluster it is reused instead of issuing a create. This guards against an orphaned - * cluster index — present in the cluster but missing from the index store — left behind - * when a previous bootstrap created the index but never committed its store pointer - * (e.g. the OS {@code VersionedIndices} row, or after a partial/interrupted startup). - * Without this guard the restart re-derives the same logical name, the create fails with - * {@code resource_already_exists}, and {@code checkAndInitializeIndex()} aborts — leaving - * the instance half-initialised. The custom mapping is (re)applied either way - * ({@code putMapping} is additive/idempotent), so a previously unmapped orphan is repaired, - * and the caller's {@code point()} re-registers the index in the store.

+ * cluster it is an orphan — present in the cluster but missing from the index + * store — left behind when a previous bootstrap created the index but never committed its + * store pointer (e.g. the OS {@code VersionedIndices} row, or after a partial/interrupted + * startup). Without handling this, the restart re-derives the same logical name, the create + * fails with {@code resource_already_exists}, and {@code checkAndInitializeIndex()} aborts — + * leaving the instance half-initialised.

+ * + *

The orphan is handled by document count, so a populated index is never discarded:

+ * + * + *

The caller's {@code point()} then registers the index in the store.

* * @param indexName logical index name (no cluster prefix, no vendor tag) * @param shards number of shards to create with (ignored when the index already exists) @@ -714,11 +734,60 @@ boolean createContentIndex(final String indexName, final int shards, final Index + e.getMessage(), e)) .getOrElse(false); if (alreadyExists) { + // Orphan: exists in cluster, missing from store (see method javadoc). Decide by doc + // count so a populated index — including partial reindex progress — is never discarded. + // The count probe is best-effort: any failure is treated as "has data" (-1) so we err + // toward reuse and never delete on uncertainty. + final long docCount = Try.of(() -> ops.getIndexDocumentCount(physicalName)) + .onFailure(e -> Logger.warn(this, + "Orphan doc-count probe failed for " + physicalName + + " — treating as populated and reusing in place: " + + e.getMessage(), e)) + .getOrElse(-1L); + + if (docCount != 0L) { + // Populated (or unknown): reuse in place, untouched. A dotCMS-created index already + // carries the full settings + base mapping + custom mapping, so nothing needs to be + // (re)applied. Deleting it would force a full reindex (hours, degraded search) — + // not justified to clean up an orphan. + Logger.info(this, String.format( + "Bootstrap: orphaned %s index found with %s document(s); reusing in place" + + " (not deleting, not remapping): %s", + tag, docCount < 0 ? "an unknown number of" : docCount, physicalName)); + return true; + } + + // Empty orphan: delete so the create below rebuilds a clean index with full settings + + // base mapping. An empty index has no data and no reindex progress, so this is safe and + // costs nothing operationally (issue #36237 — repairs a bare orphan that reuse cannot). Logger.info(this, String.format( - "Bootstrap: %s index already exists, reusing and re-asserting mapping: %s", + "Bootstrap: empty orphaned %s index found (in cluster, missing from store);" + + " deleting and recreating with full settings + mapping: %s", tag, physicalName)); - helper.addCustomMapping(List.of(indexName), tag); - return true; + final boolean deleted = Try.of(() -> providerApi.delete(physicalName)) + .onFailure(e -> Logger.warn(this, + "Failed to delete empty orphaned index " + physicalName + + ": " + e.getMessage(), e)) + .getOrElse(false); + if (!deleted) { + // Delete not acknowledged. Re-probe: it may have taken effect without an ack, in + // which case we can still recreate cleanly. If the index is genuinely still there + // we must NOT proceed — recreating would throw resource_already_exists, and reusing + // it would register a bare orphan whose mapping cannot be repaired (the custom + // analyzer is a create-time-only setting). Fail loud instead of leaving a + // half-mapped index in the store. This is an abnormal cluster state, not the + // orphan-name collision this method otherwise resolves. + final boolean stillExists = Try.of(() -> providerApi.indexExists(physicalName)) + .getOrElse(true); + if (stillExists) { + throw new IOException("Empty orphaned " + tag + " index " + physicalName + + " could not be deleted and still exists; aborting bootstrap to avoid" + + " registering a half-mapped index. Check the search cluster health" + + " and restart."); + } + Logger.warn(this, "Empty orphaned index " + physicalName + " delete was not" + + " acknowledged, but the index is gone; proceeding to recreate."); + } } final boolean contentIndex = ops.createContentIndex(physicalName, shards); diff --git a/dotCMS/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplBootstrapTest.java b/dotCMS/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplBootstrapTest.java index 19202975428..1de3a230cd4 100644 --- a/dotCMS/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplBootstrapTest.java +++ b/dotCMS/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplBootstrapTest.java @@ -2,6 +2,7 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import static org.mockito.ArgumentMatchers.anyInt; import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.mock; @@ -30,12 +31,20 @@ * *

The behaviour under test:

* */ public class ContentletIndexAPIImplBootstrapTest { @@ -60,29 +69,142 @@ private static ContentletIndexAPIImpl newApi() { } /** - * Given : the physical index already exists in the target cluster (an orphaned cluster index - * left behind by a previous bootstrap that never committed its store pointer). + * Given : an EMPTY orphaned index (0 docs) exists in the cluster but is missing from the store + * (left by a previous bootstrap that never committed its store pointer). * When : createContentIndex() runs during bootstrap. - * Then : the index is reused (no create is issued), the custom mapping is re-asserted to - * repair a possibly-unmapped orphan, and the method returns true. + * Then : the empty orphan is deleted and recreated from scratch (full settings + base mapping), + * the custom mapping is applied to the clean index, and the method returns true. */ @Test - public void test_orphanIndexExists_reusesAndReassertsMapping_skipsCreate() throws IOException { + public void test_emptyOrphan_deletedAndRecreated_withFullMapping() throws IOException { final ContentletIndexOperations ops = mock(ContentletIndexOperations.class); final IndexAPI providerApi = mock(IndexAPI.class); final MappingHelper helper = mock(MappingHelper.class); when(ops.toPhysicalName(LOGICAL_NAME)).thenReturn(PHYSICAL_NAME); when(providerApi.indexExists(PHYSICAL_NAME)).thenReturn(true); + when(ops.getIndexDocumentCount(PHYSICAL_NAME)).thenReturn(0L); + when(providerApi.delete(PHYSICAL_NAME)).thenReturn(true); + when(ops.createContentIndex(PHYSICAL_NAME, SHARDS)).thenReturn(true); final boolean result = newApi() .createContentIndex(LOGICAL_NAME, SHARDS, IndexTag.ES, ops, providerApi, helper); - assertTrue("Existing (orphaned) index must be reused and reported as available", result); + assertTrue("Empty orphan must be recreated and reported as available", result); + verify(providerApi).delete(PHYSICAL_NAME); + verify(ops).createContentIndex(PHYSICAL_NAME, SHARDS); + verify(helper).addCustomMapping(List.of(LOGICAL_NAME), IndexTag.ES); + } + + /** + * Given : a POPULATED orphaned index (> 0 docs) exists in the cluster but is missing from + * the store. + * When : createContentIndex() runs during bootstrap. + * Then : the orphan is reused in place, untouched — it is NOT deleted, NOT recreated, and its + * mapping is NOT re-applied (a dotCMS-created index already carries the full mapping). + * Discarding it would force a costly full reindex. The method returns true. + */ + @Test + public void test_populatedOrphan_reusedInPlace_notDeletedNotRemapped() throws IOException { + final ContentletIndexOperations ops = mock(ContentletIndexOperations.class); + final IndexAPI providerApi = mock(IndexAPI.class); + final MappingHelper helper = mock(MappingHelper.class); + + when(ops.toPhysicalName(LOGICAL_NAME)).thenReturn(PHYSICAL_NAME); + when(providerApi.indexExists(PHYSICAL_NAME)).thenReturn(true); + when(ops.getIndexDocumentCount(PHYSICAL_NAME)).thenReturn(42L); + + final boolean result = newApi() + .createContentIndex(LOGICAL_NAME, SHARDS, IndexTag.ES, ops, providerApi, helper); + + assertTrue("Populated orphan must be reused and reported as available", result); + verify(providerApi, never()).delete(PHYSICAL_NAME); verify(ops, never()).createContentIndex(anyString(), anyInt()); + verify(helper, never()).addCustomMapping(List.of(LOGICAL_NAME), IndexTag.ES); + } + + /** + * Given : an orphan exists but the document-count probe fails (e.g. transient cluster error). + * When : createContentIndex() runs during bootstrap. + * Then : the uncertainty is treated as "has data" — the orphan is reused in place and never + * deleted, so a possibly-populated index is never discarded on a flaky probe. + */ + @Test + public void test_orphanDocCountProbeFails_treatedAsPopulated_reused() throws IOException { + final ContentletIndexOperations ops = mock(ContentletIndexOperations.class); + final IndexAPI providerApi = mock(IndexAPI.class); + final MappingHelper helper = mock(MappingHelper.class); + + when(ops.toPhysicalName(LOGICAL_NAME)).thenReturn(PHYSICAL_NAME); + when(providerApi.indexExists(PHYSICAL_NAME)).thenReturn(true); + when(ops.getIndexDocumentCount(PHYSICAL_NAME)) + .thenThrow(new RuntimeException("count unavailable")); + + final boolean result = newApi() + .createContentIndex(LOGICAL_NAME, SHARDS, IndexTag.ES, ops, providerApi, helper); + + assertTrue("Unknown doc count must be reused (never deleted)", result); + verify(providerApi, never()).delete(PHYSICAL_NAME); + verify(ops, never()).createContentIndex(anyString(), anyInt()); + } + + /** + * Given : an EMPTY orphan whose delete is not acknowledged, but a re-probe shows the index is + * actually gone (the delete took effect without an ack). + * When : createContentIndex() runs during bootstrap. + * Then : it recreates cleanly — the create is issued and the mapping applied. + */ + @Test + public void test_emptyOrphanDeleteUnacked_butIndexGone_recreates() throws IOException { + final ContentletIndexOperations ops = mock(ContentletIndexOperations.class); + final IndexAPI providerApi = mock(IndexAPI.class); + final MappingHelper helper = mock(MappingHelper.class); + + when(ops.toPhysicalName(LOGICAL_NAME)).thenReturn(PHYSICAL_NAME); + // exists at first (orphan probe) → gone after the unacked delete (re-probe) + when(providerApi.indexExists(PHYSICAL_NAME)).thenReturn(true, false); + when(ops.getIndexDocumentCount(PHYSICAL_NAME)).thenReturn(0L); + when(providerApi.delete(PHYSICAL_NAME)).thenReturn(false); + when(ops.createContentIndex(PHYSICAL_NAME, SHARDS)).thenReturn(true); + + final boolean result = newApi() + .createContentIndex(LOGICAL_NAME, SHARDS, IndexTag.ES, ops, providerApi, helper); + + assertTrue("Unacked delete with the index gone must recreate cleanly", result); + verify(ops).createContentIndex(PHYSICAL_NAME, SHARDS); verify(helper).addCustomMapping(List.of(LOGICAL_NAME), IndexTag.ES); } + /** + * Given : an EMPTY orphan whose delete fails AND a re-probe shows the index is still present. + * When : createContentIndex() runs during bootstrap. + * Then : it fails loudly (throws) rather than recreating (which would throw + * {@code resource_already_exists}) or reusing a bare, un-repairable index. No create is + * issued and no mapping is applied. + */ + @Test + public void test_emptyOrphanDeleteFails_indexStillExists_failsLoud() throws IOException { + final ContentletIndexOperations ops = mock(ContentletIndexOperations.class); + final IndexAPI providerApi = mock(IndexAPI.class); + final MappingHelper helper = mock(MappingHelper.class); + + when(ops.toPhysicalName(LOGICAL_NAME)).thenReturn(PHYSICAL_NAME); + when(providerApi.indexExists(PHYSICAL_NAME)).thenReturn(true); // still there on re-probe + when(ops.getIndexDocumentCount(PHYSICAL_NAME)).thenReturn(0L); + when(providerApi.delete(PHYSICAL_NAME)) + .thenThrow(new RuntimeException("delete not acknowledged")); + + try { + newApi().createContentIndex(LOGICAL_NAME, SHARDS, IndexTag.ES, ops, providerApi, helper); + fail("A stuck empty orphan (delete fails, index remains) must fail loudly"); + } catch (final IOException expected) { + // expected — bootstrap must not silently register a half-mapped index + } + + verify(ops, never()).createContentIndex(anyString(), anyInt()); + verify(helper, never()).addCustomMapping(List.of(LOGICAL_NAME), IndexTag.ES); + } + /** * Given : the physical index does not exist in the target cluster. * When : createContentIndex() runs and the create succeeds. @@ -156,13 +278,14 @@ public void test_existenceProbeThrows_treatedAsMissing_proceedsToCreate() throws } /** - * Given : an OS-tagged bootstrap of an already-existing index. + * Given : an OS-tagged bootstrap of an already-existing EMPTY (orphaned) index. * When : createContentIndex() runs with {@link IndexTag#OS}. - * Then : the mapping is re-asserted against the OS provider — the tag is propagated unchanged - * to the mapping helper so the correct vendor is targeted. + * Then : the empty orphan is deleted and recreated against the OS provider — the fully-tagged + * physical name is used for the delete, create and doc-count probe, and the OS tag is + * propagated unchanged to the mapping helper so the correct vendor is targeted. */ @Test - public void test_osTag_isPropagatedToMappingHelper() throws IOException { + public void test_osTag_emptyOrphanDeletedRecreated_andTagPropagated() throws IOException { final ContentletIndexOperations ops = mock(ContentletIndexOperations.class); final IndexAPI providerApi = mock(IndexAPI.class); final MappingHelper helper = mock(MappingHelper.class); @@ -170,11 +293,16 @@ public void test_osTag_isPropagatedToMappingHelper() throws IOException { final String osPhysical = PHYSICAL_NAME + ".os"; when(ops.toPhysicalName(LOGICAL_NAME)).thenReturn(osPhysical); when(providerApi.indexExists(osPhysical)).thenReturn(true); + when(ops.getIndexDocumentCount(osPhysical)).thenReturn(0L); + when(providerApi.delete(osPhysical)).thenReturn(true); + when(ops.createContentIndex(osPhysical, SHARDS)).thenReturn(true); final boolean result = newApi() .createContentIndex(LOGICAL_NAME, SHARDS, IndexTag.OS, ops, providerApi, helper); assertTrue(result); + verify(providerApi).delete(osPhysical); + verify(ops).createContentIndex(osPhysical, SHARDS); verify(helper).addCustomMapping(List.of(LOGICAL_NAME), IndexTag.OS); } } \ No newline at end of file diff --git a/dotcms-integration/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplMigrationIntegrationTest.java b/dotcms-integration/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplMigrationIntegrationTest.java index f7f01a2b03b..7ff153c4561 100644 --- a/dotcms-integration/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplMigrationIntegrationTest.java +++ b/dotcms-integration/src/test/java/com/dotcms/content/elasticsearch/business/ContentletIndexAPIImplMigrationIntegrationTest.java @@ -13,9 +13,14 @@ import com.dotcms.content.index.IndexAPIImpl; import com.dotcms.content.index.IndexTag; import com.dotcms.content.index.VersionedIndices; +import com.dotcms.content.elasticsearch.util.MappingHelper; import com.dotcms.content.index.opensearch.ContentletIndexOperationsOS; +import com.dotcms.content.index.opensearch.MappingOperationsOS; +import com.dotcms.content.index.opensearch.OSClientProvider; import com.dotcms.content.index.opensearch.OSIndexAPIImpl; import com.dotcms.util.IntegrationTestInitService; +import org.opensearch.client.opensearch.OpenSearchClient; +import org.opensearch.client.opensearch.indices.GetIndexResponse; import com.dotmarketing.business.APILocator; import com.dotmarketing.common.db.DotConnect; import com.dotmarketing.exception.DotDataException; @@ -114,6 +119,13 @@ public class ContentletIndexAPIImplMigrationIntegrationTest extends IntegrationT @Inject private ContentletIndexOperationsOS opsOS; + // ── Used by the orphan-repair regression test to read back mapping + analysis settings ── + @Inject + private MappingOperationsOS mappingOps; + + @Inject + private OSClientProvider clientProvider; + /** * Physical (cluster-prefixed, tag-suffixed) form of {@link #DUAL_WORKING}; resolved once * per test in {@link #setUp()}. Only {@code DUAL_WORKING} needs a physical-name field — @@ -433,9 +445,52 @@ public void test_delete_phase1_removesFromBothClusters() throws IOException, Dot } // ========================================================================= - // activateIndex — pointer-store writes verified against real DB + // Orphan bootstrap repair — bare cluster index missing from the store (#36237) // ========================================================================= + /** + * Given Scenario: Phase 1. A bare OS index physically exists in the cluster (default + * settings, empty mapping — no custom {@code my_analyzer}, no dotCMS dynamic + * templates) but is absent from the index store. This is the orphan left by a + * prior bootstrap that created the index but never committed its store pointer, + * reproduced bare exactly as QA did for #36237 TC-003. + * When : the bootstrap seam {@code createContentIndex(name, shards, OS, ...)} runs. + * Then : the orphan is deleted and recreated from scratch, so the index now carries the FULL + * settings (custom {@code my_analyzer}) and base mapping (dotCMS dynamic templates). + * This is the regression guard for TC-003 — reusing the bare orphan in place left it + * half-mapped and triggered {@code putMapping HTTP 400} (analyzer not found). + */ + @Test + public void test_bootstrap_bareOrphan_phase1_isRecreatedWithFullSettingsAndMapping() + throws Exception { + setPhase(1); + + // Pre: create a BARE orphan — default settings, no mapping (QA recreated it as PUT {}). + osIndexAPI.createIndex(physicalDualWorking, 0); + assertTrue("Pre: bare orphan must exist in OS", + osIndexAPI.indexExists(physicalDualWorking)); + assertFalse("Pre: bare orphan must NOT yet carry the dotCMS dynamic templates", + mappingOps.getMapping(DUAL_WORKING).contains("template_1")); + assertFalse("Pre: bare orphan must NOT yet define the custom my_analyzer", + analyzers().containsKey("my_analyzer")); + + // When: run the orphan-aware bootstrap seam against the real OS collaborators. + final boolean result = ((ContentletIndexAPIImpl) contentletIndexAPI()) + .createContentIndex(DUAL_WORKING, 1, IndexTag.OS, + opsOS, osIndexAPI, MappingHelper.getInstance()); + + // Then: the orphan was repaired — full settings + base mapping are present. + assertTrue("Bootstrap must report the recreated index as available", result); + assertTrue("Recreated index must exist in OS", + osIndexAPI.indexExists(physicalDualWorking)); + assertTrue("Recreated index must carry the dotCMS dynamic templates (base mapping restored)", + mappingOps.getMapping(DUAL_WORKING).contains("template_1")); + assertTrue("Recreated index must define the custom my_analyzer (settings restored)", + analyzers().containsKey("my_analyzer")); + + Logger.info(this, "✅ bootstrap bare-orphan Phase 1 — recreated with full settings + mapping"); + } + /** * Given Scenario: Phase 0. ES store is currently pointing at the pre-existing app index. * When : activateIndex(ES_WORKING) is called. @@ -915,6 +970,22 @@ private static void setPhase(final int ordinal) { Config.setProperty(FLAG_KEY, String.valueOf(ordinal)); } + /** + * Reads back the analyzer map configured on the {@link #physicalDualWorking} OS index. + * Returns an empty map when the index has no analysis settings (e.g. a bare orphan). + */ + private Map analyzers() throws IOException { + final OpenSearchClient client = clientProvider.getClient(); + final GetIndexResponse response = client.indices().get(b -> b.index(physicalDualWorking)); + final var indexState = response.result().get(physicalDualWorking); + if (indexState == null || indexState.settings() == null + || indexState.settings().index() == null + || indexState.settings().index().analysis() == null) { + return Map.of(); + } + return indexState.settings().index().analysis().analyzer(); + } + private static ContentletIndexAPI contentletIndexAPI() { return APILocator.getContentletIndexAPI(); }