Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -655,15 +655,35 @@ public synchronized boolean createContentIndex(final String indexName, final int
* Create an index exclusively in one of the SE Providers.
*
* <p><b>Idempotent bootstrap.</b> If the physical index already exists in the target
* cluster it is reused instead of issuing a create. This guards against an orphaned
* cluster index — present in the cluster but missing from the index store — left behind
* when a previous bootstrap created the index but never committed its store pointer
* (e.g. the OS {@code VersionedIndices} row, or after a partial/interrupted startup).
* Without this guard the restart re-derives the same logical name, the create fails with
* {@code resource_already_exists}, and {@code checkAndInitializeIndex()} aborts — leaving
* the instance half-initialised. The custom mapping is (re)applied either way
* ({@code putMapping} is additive/idempotent), so a previously unmapped orphan is repaired,
* and the caller's {@code point()} re-registers the index in the store.</p>
* cluster it is an <em>orphan</em> — present in the cluster but missing from the index
* store — left behind when a previous bootstrap created the index but never committed its
* store pointer (e.g. the OS {@code VersionedIndices} row, or after a partial/interrupted
* startup). Without handling this, the restart re-derives the same logical name, the create
* fails with {@code resource_already_exists}, and {@code checkAndInitializeIndex()} aborts —
* leaving the instance half-initialised.</p>
*
* <p>The orphan is handled by document count, so a populated index is never discarded:</p>
* <ul>
* <li><b>Empty orphan (0 docs)</b> — deleted and recreated from scratch. In-place reuse
* cannot fully repair a bare orphan: the content mapping references a custom analyzer
* ({@code my_analyzer}) defined in the provider settings file, and analyzers are
* <em>static</em> index settings that can only be applied at creation time — so a
* {@code putMapping}-only re-assert against a bare orphan fails with {@code HTTP 400}
* (analyzer not found) and leaves the index half-mapped (issue #36237, QA TC-003). An
* empty index has no data and no reindex progress, so recreating it costs nothing
* operationally and yields a clean index with full settings + base mapping. If the
* delete cannot be confirmed and the index is still present, bootstrap fails loudly
* rather than register a half-mapped index.</li>
* <li><b>Populated orphan (&gt; 0 docs), or count unknown</b> — reused in place, untouched.
* A populated orphan was created by dotCMS itself, so it already carries the full
* settings + base mapping + custom mapping; nothing needs to be (re)applied. The index is
* never deleted here: discarding it would throw away its contents (including partial
* reindex progress) and force a full reindex, which can run for hours and degrade search
* consistency — not justified to clean up an orphan. On any uncertainty (the count probe
* fails) we err toward reuse for the same reason.</li>
* </ul>
*
* <p>The caller's {@code point()} then registers the index in the store.</p>
*
* @param indexName logical index name (no cluster prefix, no vendor tag)
* @param shards number of shards to create with (ignored when the index already exists)
Expand Down Expand Up @@ -714,11 +734,60 @@ boolean createContentIndex(final String indexName, final int shards, final Index
+ e.getMessage(), e))
.getOrElse(false);
if (alreadyExists) {
// Orphan: exists in cluster, missing from store (see method javadoc). Decide by doc
// count so a populated index — including partial reindex progress — is never discarded.
// The count probe is best-effort: any failure is treated as "has data" (-1) so we err
// toward reuse and never delete on uncertainty.
final long docCount = Try.of(() -> ops.getIndexDocumentCount(physicalName))
.onFailure(e -> Logger.warn(this,
"Orphan doc-count probe failed for " + physicalName
+ " — treating as populated and reusing in place: "
+ e.getMessage(), e))
.getOrElse(-1L);

if (docCount != 0L) {
// Populated (or unknown): reuse in place, untouched. A dotCMS-created index already
// carries the full settings + base mapping + custom mapping, so nothing needs to be
// (re)applied. Deleting it would force a full reindex (hours, degraded search) —
// not justified to clean up an orphan.
Logger.info(this, String.format(
"Bootstrap: orphaned %s index found with %s document(s); reusing in place"
+ " (not deleting, not remapping): %s",
tag, docCount < 0 ? "an unknown number of" : docCount, physicalName));
return true;
}

// Empty orphan: delete so the create below rebuilds a clean index with full settings +
// base mapping. An empty index has no data and no reindex progress, so this is safe and
// costs nothing operationally (issue #36237 — repairs a bare orphan that reuse cannot).
Logger.info(this, String.format(
"Bootstrap: %s index already exists, reusing and re-asserting mapping: %s",
"Bootstrap: empty orphaned %s index found (in cluster, missing from store);"
+ " deleting and recreating with full settings + mapping: %s",
tag, physicalName));
helper.addCustomMapping(List.of(indexName), tag);
return true;
final boolean deleted = Try.of(() -> providerApi.delete(physicalName))
.onFailure(e -> Logger.warn(this,
"Failed to delete empty orphaned index " + physicalName
+ ": " + e.getMessage(), e))
.getOrElse(false);
if (!deleted) {
// Delete not acknowledged. Re-probe: it may have taken effect without an ack, in
// which case we can still recreate cleanly. If the index is genuinely still there
// we must NOT proceed — recreating would throw resource_already_exists, and reusing
// it would register a bare orphan whose mapping cannot be repaired (the custom
// analyzer is a create-time-only setting). Fail loud instead of leaving a
// half-mapped index in the store. This is an abnormal cluster state, not the
// orphan-name collision this method otherwise resolves.
final boolean stillExists = Try.of(() -> providerApi.indexExists(physicalName))
.getOrElse(true);
if (stillExists) {
throw new IOException("Empty orphaned " + tag + " index " + physicalName
+ " could not be deleted and still exists; aborting bootstrap to avoid"
+ " registering a half-mapped index. Check the search cluster health"
+ " and restart.");
}
Logger.warn(this, "Empty orphaned index " + physicalName + " delete was not"
+ " acknowledged, but the index is gone; proceeding to recreate.");
}
}

final boolean contentIndex = ops.createContentIndex(physicalName, shards);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.mockito.ArgumentMatchers.anyInt;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Mockito.mock;
Expand Down Expand Up @@ -30,12 +31,20 @@
*
* <p>The behaviour under test:</p>
* <ul>
* <li>an index already present in the cluster is <b>reused</b> (no create) and its mapping is
* re-asserted — the orphaned-index repair path;</li>
* <li>an <b>empty</b> orphan (0 docs) is <b>deleted and recreated</b> from scratch (full settings
* + base mapping + custom mapping) — reusing in place cannot repair a bare orphan whose
* static custom analyzer can only be set at creation time (#36237);</li>
* <li>a <b>populated</b> orphan (&gt; 0 docs) is <b>reused untouched</b> — never deleted,
* recreated, or remapped, so its data (and any reindex progress) is preserved;</li>
* <li>a failing doc-count probe is treated as "populated" — the orphan is reused, never deleted;</li>
* <li>a missing index is created and, on success, mapped;</li>
* <li>a failed create does not apply a mapping;</li>
* <li>a failing existence probe is treated as "does not exist", so bootstrap falls through to
* the create path instead of aborting.</li>
* the create path instead of aborting;</li>
* <li>an empty orphan whose delete is unacknowledged but is actually gone recreates cleanly;</li>
* <li>an empty orphan whose delete fails while the index is still present fails loudly, rather
* than recreating (would throw {@code resource_already_exists}) or registering a bare,
* un-repairable index.</li>
* </ul>
*/
public class ContentletIndexAPIImplBootstrapTest {
Expand All @@ -60,29 +69,142 @@ private static ContentletIndexAPIImpl newApi() {
}

/**
* Given : the physical index already exists in the target cluster (an orphaned cluster index
* left behind by a previous bootstrap that never committed its store pointer).
* Given : an EMPTY orphaned index (0 docs) exists in the cluster but is missing from the store
* (left by a previous bootstrap that never committed its store pointer).
* When : createContentIndex() runs during bootstrap.
* Then : the index is reused (no create is issued), the custom mapping is re-asserted to
* repair a possibly-unmapped orphan, and the method returns true.
* Then : the empty orphan is deleted and recreated from scratch (full settings + base mapping),
* the custom mapping is applied to the clean index, and the method returns true.
*/
@Test
public void test_orphanIndexExists_reusesAndReassertsMapping_skipsCreate() throws IOException {
public void test_emptyOrphan_deletedAndRecreated_withFullMapping() throws IOException {
final ContentletIndexOperations ops = mock(ContentletIndexOperations.class);
final IndexAPI providerApi = mock(IndexAPI.class);
final MappingHelper helper = mock(MappingHelper.class);

when(ops.toPhysicalName(LOGICAL_NAME)).thenReturn(PHYSICAL_NAME);
when(providerApi.indexExists(PHYSICAL_NAME)).thenReturn(true);
when(ops.getIndexDocumentCount(PHYSICAL_NAME)).thenReturn(0L);
when(providerApi.delete(PHYSICAL_NAME)).thenReturn(true);
when(ops.createContentIndex(PHYSICAL_NAME, SHARDS)).thenReturn(true);

final boolean result = newApi()
.createContentIndex(LOGICAL_NAME, SHARDS, IndexTag.ES, ops, providerApi, helper);

assertTrue("Existing (orphaned) index must be reused and reported as available", result);
assertTrue("Empty orphan must be recreated and reported as available", result);
verify(providerApi).delete(PHYSICAL_NAME);
verify(ops).createContentIndex(PHYSICAL_NAME, SHARDS);
verify(helper).addCustomMapping(List.of(LOGICAL_NAME), IndexTag.ES);
}

/**
* Given : a POPULATED orphaned index (&gt; 0 docs) exists in the cluster but is missing from
* the store.
* When : createContentIndex() runs during bootstrap.
* Then : the orphan is reused in place, untouched — it is NOT deleted, NOT recreated, and its
* mapping is NOT re-applied (a dotCMS-created index already carries the full mapping).
* Discarding it would force a costly full reindex. The method returns true.
*/
@Test
public void test_populatedOrphan_reusedInPlace_notDeletedNotRemapped() throws IOException {
final ContentletIndexOperations ops = mock(ContentletIndexOperations.class);
final IndexAPI providerApi = mock(IndexAPI.class);
final MappingHelper helper = mock(MappingHelper.class);

when(ops.toPhysicalName(LOGICAL_NAME)).thenReturn(PHYSICAL_NAME);
when(providerApi.indexExists(PHYSICAL_NAME)).thenReturn(true);
when(ops.getIndexDocumentCount(PHYSICAL_NAME)).thenReturn(42L);

final boolean result = newApi()
.createContentIndex(LOGICAL_NAME, SHARDS, IndexTag.ES, ops, providerApi, helper);

assertTrue("Populated orphan must be reused and reported as available", result);
verify(providerApi, never()).delete(PHYSICAL_NAME);
verify(ops, never()).createContentIndex(anyString(), anyInt());
verify(helper, never()).addCustomMapping(List.of(LOGICAL_NAME), IndexTag.ES);
}

/**
* Given : an orphan exists but the document-count probe fails (e.g. transient cluster error).
* When : createContentIndex() runs during bootstrap.
* Then : the uncertainty is treated as "has data" — the orphan is reused in place and never
* deleted, so a possibly-populated index is never discarded on a flaky probe.
*/
@Test
public void test_orphanDocCountProbeFails_treatedAsPopulated_reused() throws IOException {
final ContentletIndexOperations ops = mock(ContentletIndexOperations.class);
final IndexAPI providerApi = mock(IndexAPI.class);
final MappingHelper helper = mock(MappingHelper.class);

when(ops.toPhysicalName(LOGICAL_NAME)).thenReturn(PHYSICAL_NAME);
when(providerApi.indexExists(PHYSICAL_NAME)).thenReturn(true);
when(ops.getIndexDocumentCount(PHYSICAL_NAME))
.thenThrow(new RuntimeException("count unavailable"));

final boolean result = newApi()
.createContentIndex(LOGICAL_NAME, SHARDS, IndexTag.ES, ops, providerApi, helper);

assertTrue("Unknown doc count must be reused (never deleted)", result);
verify(providerApi, never()).delete(PHYSICAL_NAME);
verify(ops, never()).createContentIndex(anyString(), anyInt());
}

/**
* Given : an EMPTY orphan whose delete is not acknowledged, but a re-probe shows the index is
* actually gone (the delete took effect without an ack).
* When : createContentIndex() runs during bootstrap.
* Then : it recreates cleanly — the create is issued and the mapping applied.
*/
@Test
public void test_emptyOrphanDeleteUnacked_butIndexGone_recreates() throws IOException {
final ContentletIndexOperations ops = mock(ContentletIndexOperations.class);
final IndexAPI providerApi = mock(IndexAPI.class);
final MappingHelper helper = mock(MappingHelper.class);

when(ops.toPhysicalName(LOGICAL_NAME)).thenReturn(PHYSICAL_NAME);
// exists at first (orphan probe) → gone after the unacked delete (re-probe)
when(providerApi.indexExists(PHYSICAL_NAME)).thenReturn(true, false);
when(ops.getIndexDocumentCount(PHYSICAL_NAME)).thenReturn(0L);
when(providerApi.delete(PHYSICAL_NAME)).thenReturn(false);
when(ops.createContentIndex(PHYSICAL_NAME, SHARDS)).thenReturn(true);

final boolean result = newApi()
.createContentIndex(LOGICAL_NAME, SHARDS, IndexTag.ES, ops, providerApi, helper);

assertTrue("Unacked delete with the index gone must recreate cleanly", result);
verify(ops).createContentIndex(PHYSICAL_NAME, SHARDS);
verify(helper).addCustomMapping(List.of(LOGICAL_NAME), IndexTag.ES);
}

/**
* Given : an EMPTY orphan whose delete fails AND a re-probe shows the index is still present.
* When : createContentIndex() runs during bootstrap.
* Then : it fails loudly (throws) rather than recreating (which would throw
* {@code resource_already_exists}) or reusing a bare, un-repairable index. No create is
* issued and no mapping is applied.
*/
@Test
public void test_emptyOrphanDeleteFails_indexStillExists_failsLoud() throws IOException {
final ContentletIndexOperations ops = mock(ContentletIndexOperations.class);
final IndexAPI providerApi = mock(IndexAPI.class);
final MappingHelper helper = mock(MappingHelper.class);

when(ops.toPhysicalName(LOGICAL_NAME)).thenReturn(PHYSICAL_NAME);
when(providerApi.indexExists(PHYSICAL_NAME)).thenReturn(true); // still there on re-probe
when(ops.getIndexDocumentCount(PHYSICAL_NAME)).thenReturn(0L);
when(providerApi.delete(PHYSICAL_NAME))
.thenThrow(new RuntimeException("delete not acknowledged"));

try {
newApi().createContentIndex(LOGICAL_NAME, SHARDS, IndexTag.ES, ops, providerApi, helper);
fail("A stuck empty orphan (delete fails, index remains) must fail loudly");
} catch (final IOException expected) {
// expected — bootstrap must not silently register a half-mapped index
}

verify(ops, never()).createContentIndex(anyString(), anyInt());
verify(helper, never()).addCustomMapping(List.of(LOGICAL_NAME), IndexTag.ES);
}

/**
* Given : the physical index does not exist in the target cluster.
* When : createContentIndex() runs and the create succeeds.
Expand Down Expand Up @@ -156,25 +278,31 @@ public void test_existenceProbeThrows_treatedAsMissing_proceedsToCreate() throws
}

/**
* Given : an OS-tagged bootstrap of an already-existing index.
* Given : an OS-tagged bootstrap of an already-existing EMPTY (orphaned) index.
* When : createContentIndex() runs with {@link IndexTag#OS}.
* Then : the mapping is re-asserted against the OS provider — the tag is propagated unchanged
* to the mapping helper so the correct vendor is targeted.
* Then : the empty orphan is deleted and recreated against the OS provider — the fully-tagged
* physical name is used for the delete, create and doc-count probe, and the OS tag is
* propagated unchanged to the mapping helper so the correct vendor is targeted.
*/
@Test
public void test_osTag_isPropagatedToMappingHelper() throws IOException {
public void test_osTag_emptyOrphanDeletedRecreated_andTagPropagated() throws IOException {
final ContentletIndexOperations ops = mock(ContentletIndexOperations.class);
final IndexAPI providerApi = mock(IndexAPI.class);
final MappingHelper helper = mock(MappingHelper.class);

final String osPhysical = PHYSICAL_NAME + ".os";
when(ops.toPhysicalName(LOGICAL_NAME)).thenReturn(osPhysical);
when(providerApi.indexExists(osPhysical)).thenReturn(true);
when(ops.getIndexDocumentCount(osPhysical)).thenReturn(0L);
when(providerApi.delete(osPhysical)).thenReturn(true);
when(ops.createContentIndex(osPhysical, SHARDS)).thenReturn(true);

final boolean result = newApi()
.createContentIndex(LOGICAL_NAME, SHARDS, IndexTag.OS, ops, providerApi, helper);

assertTrue(result);
verify(providerApi).delete(osPhysical);
verify(ops).createContentIndex(osPhysical, SHARDS);
verify(helper).addCustomMapping(List.of(LOGICAL_NAME), IndexTag.OS);
}
}
Loading
Loading