Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,15 @@ static List<String> getSearchIndexFields(String entityType) {
if (TIME_SERIES_ENTITIES.contains(entityType)) {
return List.of();
}
return List.of("*");
org.openmetadata.service.search.SearchRepository repo =
org.openmetadata.service.Entity.getSearchRepository();
if (repo == null || repo.getSearchIndexFactory() == null) {
// Fallback for environments where the search subsystem isn't bootstrapped (e.g. unit
// tests that exercise the reader without the full Entity registry). Behaves the same
// as the pre-selective-fields code path.
return List.of("*");
}
return new ArrayList<>(repo.getSearchIndexFactory().getReindexFieldsFor(entityType));
}

static int calculateNumberOfReaders(int totalEntityRecords, int batchSize) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,28 @@
@Slf4j
public class SearchIndexFactory {

/**
* Returns the minimal set of fields the reindex path must request from
* {@code EntityRepository.setFields} for the given entity type. Probes the corresponding
* index class via {@link #buildIndex(String, Object)} with a {@code null} entity and calls
* {@link SearchIndex#getRequiredReindexFields()}. Index constructors must be safe with a null
* entity for this probe to work — they are today because field declarations are static.
*/
public java.util.Set<String> getReindexFieldsFor(String entityType) {
try {
SearchIndex probe = buildIndex(entityType, null);
if (probe != null) {
return probe.getRequiredReindexFields();
}
} catch (Exception e) {
LOG.warn(
"Failed to probe reindex fields for entity type {}; falling back to common set: {}",
entityType,
e.getMessage());
Comment on lines +125 to +127
Copy link

Copilot AI Apr 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The catch block logs only e.getMessage() and drops the exception/stack trace, which can make it hard to diagnose why probing failed (especially if getMessage() is null). Consider logging the exception itself (e.g., as the last argument) so failures in field probing are actionable.

Suggested change
"Failed to probe reindex fields for entity type {}; falling back to common set: {}",
entityType,
e.getMessage());
"Failed to probe reindex fields for entity type {}; falling back to common set",
entityType,
e);

Copilot uses AI. Check for mistakes.
}
return SearchIndex.COMMON_REINDEX_FIELDS;
Comment on lines +123 to +129
Copy link

Copilot AI Apr 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

getReindexFieldsFor uses LOG.warn(...), but this class is annotated with Lombok @Slf4j (which generates a log field) and there is no LOG logger defined here. This will not compile; switch to log.warn(...) (or add an explicit private static final Logger LOG = ...).

Copilot uses AI. Check for mistakes.
}
Comment thread
mohityadav766 marked this conversation as resolved.
Comment on lines +110 to +130
Copy link

Copilot AI Apr 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Selective reindex fields are introduced via getReindexFieldsFor(...), but the primary single-server reindex path (SearchIndexExecutor) still hardcodes List.of("*") for entity reads. This means the performance/OOM mitigation won’t apply in the default reindex strategy unless that path is updated to use the factory-derived field set (similar to EntityReader).

Copilot uses AI. Check for mistakes.

public SearchIndex buildIndex(String entityType, Object entity) {
return switch (entityType) {
case Entity.TABLE -> new TableIndex((Table) entity);
Expand Down Expand Up @@ -177,7 +199,9 @@ public SearchIndex buildIndex(String entityType, Object entity) {
case Entity.PIPELINE_EXECUTION -> {
PipelineExecutionIndex.PipelineExecutionData data =
(PipelineExecutionIndex.PipelineExecutionData) entity;
yield new PipelineExecutionIndex(data.getPipeline(), data.getPipelineStatus());
yield data == null
? new PipelineExecutionIndex(null, null)
: new PipelineExecutionIndex(data.getPipeline(), data.getPipelineStatus());
}
default -> buildExternalIndexes(entityType, entity);
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import static org.openmetadata.service.search.EntityBuilderConstant.DATA_MODEL_COLUMNS_NAME_KEYWORD;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -35,6 +36,13 @@ public Set<String> getExcludedFields() {
return Set.of("children");
}

@Override
public Set<String> getRequiredReindexFields() {
Set<String> fields = new HashSet<>(DataAssetIndex.super.getRequiredReindexFields());
fields.add("dataModel");
return Collections.unmodifiableSet(fields);
}

public Map<String, Object> buildSearchIndexDocInternal(Map<String, Object> doc) {
if (container.getDataModel() != null && container.getDataModel().getColumns() != null) {
List<FlattenColumn> cols = new ArrayList<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,13 @@ public Set<String> getExcludedFields() {
return Set.of("dataModels");
}

@Override
public Set<String> getRequiredReindexFields() {
Set<String> fields = new java.util.HashSet<>(DataAssetIndex.super.getRequiredReindexFields());
fields.add("charts");
return java.util.Collections.unmodifiableSet(fields);
}

public Map<String, Object> buildSearchIndexDocInternal(Map<String, Object> doc) {
return doc;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@ public Set<String> getExcludedFields() {
return Set.of("databaseSchemas");
}

@Override
public Set<String> getRequiredReindexFields() {
Set<String> fields = new java.util.HashSet<>(TaggableIndex.super.getRequiredReindexFields());
fields.add("usageSummary");
return java.util.Collections.unmodifiableSet(fields);
}

public Map<String, Object> buildSearchIndexDocInternal(Map<String, Object> doc) {
return doc;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@ public Set<String> getExcludedFields() {
return Set.of("children");
}

@Override
public Set<String> getRequiredReindexFields() {
Set<String> fields = new java.util.HashSet<>(TaggableIndex.super.getRequiredReindexFields());
fields.add("relatedTerms");
return java.util.Collections.unmodifiableSet(fields);
}

public Map<String, Object> buildSearchIndexDocInternal(Map<String, Object> doc) {
if (doc.containsKey("glossary") && glossaryTerm.getGlossary() != null) {
@SuppressWarnings("unchecked")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,13 @@ public Set<String> getExcludedFields() {
return excludeFields;
}

@Override
public Set<String> getRequiredReindexFields() {
Set<String> fields = new java.util.HashSet<>(TaggableIndex.super.getRequiredReindexFields());
fields.add("pipelineStatuses");
return java.util.Collections.unmodifiableSet(fields);
}

public Map<String, Object> buildSearchIndexDocInternal(Map<String, Object> doc) {
doc.put(
"name",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.openmetadata.service.search.indexes;

import java.util.Map;
import java.util.Set;
import org.openmetadata.schema.entity.data.Pipeline;
import org.openmetadata.service.Entity;

Expand All @@ -21,6 +22,13 @@ public String getEntityTypeName() {
return Entity.PIPELINE;
}

@Override
public Set<String> getRequiredReindexFields() {
Set<String> fields = new java.util.HashSet<>(DataAssetIndex.super.getRequiredReindexFields());
fields.add("tasks");
return java.util.Collections.unmodifiableSet(fields);
}

@Override
public Object getIndexServiceType() {
return pipeline.getServiceType();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,23 @@ public interface SearchIndex {
"connection",
"changeSummary");

/**
* Relationship/enrichment fields fetched by {@code EntityRepository.setFields} that every search
* document populates via {@link #populateCommonFields(Map, EntityInterface, String)}. Stored-JSON
* fields (name, displayName, description, service, entity-native counts) are NOT in this set —
* they live on the entity row and need no extra fetch.
*/
Set<String> COMMON_REINDEX_FIELDS =
Set.of(
"owners",
"domains",
"reviewers",
"followers",
"votes",
"extension",
"certification",
"dataProducts");
Comment thread
mohityadav766 marked this conversation as resolved.

SearchClient searchClient = Entity.getSearchRepository().getSearchClient();
Logger LOG = LoggerFactory.getLogger(SearchIndex.class);

Expand Down Expand Up @@ -114,6 +131,23 @@ default Set<String> getExcludedFields() {

Map<String, Object> buildSearchIndexDocInternal(Map<String, Object> esDoc);

/**
* Returns the minimal set of fields the {@code SearchIndexApp} reindex path must ask
* {@code EntityRepository.setFields} to populate for this index to build a correct document.
*
* <p>Default is {@link #COMMON_REINDEX_FIELDS}, augmented with {@code "tags"} when the index
* implements {@link TaggableIndex}. Individual index classes override to add entity-specific
* relationships. Keep this method side-effect-free and safe to call on a probe instance whose
* entity is {@code null} — it is invoked without an entity to discover fields statically.
*/
default Set<String> getRequiredReindexFields() {
Set<String> fields = new java.util.HashSet<>(COMMON_REINDEX_FIELDS);
if (this instanceof TaggableIndex) {
fields.add("tags");
}
return java.util.Collections.unmodifiableSet(fields);
}

/**
* Populates common entity fields into the search index document. Called automatically by {@link
* #buildSearchIndexDoc()} for all EntityInterface-based entities. Individual index classes should
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,13 @@ public Object getIndexServiceType() {
return spreadsheet.getServiceType();
}

@Override
public Set<String> getRequiredReindexFields() {
Set<String> fields = new java.util.HashSet<>(DataAssetIndex.super.getRequiredReindexFields());
fields.add("worksheets");
return java.util.Collections.unmodifiableSet(fields);
}

public Map<String, Object> buildSearchIndexDocInternal(Map<String, Object> doc) {
doc.put("directory", getEntityWithDisplayName(spreadsheet.getDirectory()));
doc.put("mimeType", spreadsheet.getMimeType());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,12 @@ public Set<String> getExcludedFields() {
}

@Override
public Object getIndexServiceType() {
return table.getServiceType();
public Set<String> getRequiredReindexFields() {
Set<String> fields = new HashSet<>(DataAssetIndex.super.getRequiredReindexFields());
// "columns" is fields-gated in TableRepository; without it column-level tags are not
// hydrated, breaking tag merge in the search doc.
fields.add("columns");
return java.util.Collections.unmodifiableSet(fields);
}

Comment thread
mohityadav766 marked this conversation as resolved.
public Map<String, Object> buildSearchIndexDocInternal(Map<String, Object> doc) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.openmetadata.service.search.indexes;

import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.openmetadata.schema.entity.teams.Team;
Expand All @@ -13,6 +14,13 @@ public TeamIndex(Team team) {
this.team = team;
}

@Override
public Set<String> getRequiredReindexFields() {
Set<String> fields = new HashSet<>(SearchIndex.super.getRequiredReindexFields());
fields.add("parents");
return java.util.Collections.unmodifiableSet(fields);
}

@Override
public Object getEntity() {
return team;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,15 @@ public String getEntityTypeName() {
return Entity.TEST_CASE;
}

@Override
public Set<String> getRequiredReindexFields() {
Set<String> fields = new java.util.HashSet<>(TaggableIndex.super.getRequiredReindexFields());
fields.add("testSuite");
fields.add("testSuites");
fields.add("testDefinition");
return java.util.Collections.unmodifiableSet(fields);
}

@Override
public void removeNonIndexableFields(Map<String, Object> esDoc) {
TaggableIndex.super.removeNonIndexableFields(esDoc);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.openmetadata.service.search.indexes;

import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.openmetadata.schema.entity.teams.User;
Expand All @@ -13,6 +14,15 @@ public UserIndex(User user) {
this.user = user;
}

@Override
public Set<String> getRequiredReindexFields() {
Set<String> fields = new HashSet<>(SearchIndex.super.getRequiredReindexFields());
fields.add("teams");
fields.add("roles");
fields.add("inheritedRoles");
return java.util.Collections.unmodifiableSet(fields);
}

@Override
public Object getEntity() {
return user;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
package org.openmetadata.service.search;

import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertInstanceOf;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

import java.util.Set;
import java.util.function.Supplier;
import java.util.stream.Stream;
import org.junit.jupiter.api.AfterAll;
Expand Down Expand Up @@ -161,6 +164,76 @@ void buildIndexRejectsUnknownEntityTypes() {
org.junit.jupiter.api.Assertions.assertTrue(exception.getMessage().contains("unknownType"));
}

@ParameterizedTest
@MethodSource("supportedIndexMappings")
void reindexFieldsProbeSucceedsForEveryEntityType(
String entityType, Supplier<Object> entitySupplier, Class<? extends SearchIndex> indexClass) {
// The factory probes each Index with a null entity to read its static field declarations.
// This asserts every Index constructor is null-safe and that a non-empty field set is returned.
Set<String> fields = factory.getReindexFieldsFor(entityType);
assertFalse(
fields.isEmpty(),
() -> "Reindex fields for " + entityType + " must not be empty; got " + fields);
}
Comment thread
mohityadav766 marked this conversation as resolved.

@ParameterizedTest
@MethodSource("supportedIndexMappings")
void commonReindexFieldsPresentForEveryEntityType(
String entityType, Supplier<Object> entitySupplier, Class<? extends SearchIndex> indexClass) {
Set<String> fields = factory.getReindexFieldsFor(entityType);
for (String common : SearchIndex.COMMON_REINDEX_FIELDS) {
assertTrue(
fields.contains(common),
() -> entityType + " reindex fields missing common field '" + common + "': " + fields);
Comment thread
mohityadav766 marked this conversation as resolved.
}
}

@Test
void reindexFieldsIncludeKnownOverrides() {
// Regression guard: every Index class that adds its own fields via getRequiredReindexFields
// must continue to surface those fields through the factory probe.
assertTrue(factory.getReindexFieldsFor(Entity.TABLE).contains("columns"));
assertTrue(factory.getReindexFieldsFor(Entity.CONTAINER).contains("dataModel"));
assertTrue(factory.getReindexFieldsFor(Entity.SPREADSHEET).contains("worksheets"));
assertTrue(factory.getReindexFieldsFor(Entity.INGESTION_PIPELINE).contains("pipelineStatuses"));
assertTrue(factory.getReindexFieldsFor(Entity.DATABASE).contains("usageSummary"));
assertTrue(factory.getReindexFieldsFor(Entity.DASHBOARD).contains("charts"));
assertTrue(factory.getReindexFieldsFor(Entity.PIPELINE).contains("tasks"));
assertTrue(factory.getReindexFieldsFor(Entity.GLOSSARY_TERM).contains("relatedTerms"));
assertTrue(factory.getReindexFieldsFor(Entity.TEAM).contains("parents"));
Set<String> userFields = factory.getReindexFieldsFor(Entity.USER);
assertTrue(userFields.contains("teams"));
assertTrue(userFields.contains("roles"));
assertTrue(userFields.contains("inheritedRoles"));
Set<String> testCaseFields = factory.getReindexFieldsFor(Entity.TEST_CASE);
assertTrue(testCaseFields.contains("testSuite"));
assertTrue(testCaseFields.contains("testSuites"));
assertTrue(testCaseFields.contains("testDefinition"));
}

@Test
void reindexFieldsOmitKnownFanOutFields() {
// These are the "blow up the heap" relationships we explicitly do NOT want fetched during
// reindex. They either live in the Index's getExcludedFields() (stripped post-hoc) or
// aren't read by buildSearchIndexDocInternal. Either way, asking setFields to load them
// would be wasted work and risks OOM on large parents.
assertFalse(factory.getReindexFieldsFor(Entity.DATABASE_SCHEMA).contains("tables"));
assertFalse(factory.getReindexFieldsFor(Entity.DATABASE).contains("databaseSchemas"));
assertFalse(factory.getReindexFieldsFor(Entity.TEAM).contains("users"));
assertFalse(factory.getReindexFieldsFor(Entity.CONTAINER).contains("children"));
assertFalse(factory.getReindexFieldsFor(Entity.API_COLLECTION).contains("apiEndpoints"));
assertFalse(factory.getReindexFieldsFor(Entity.DASHBOARD).contains("dataModels"));
assertFalse(factory.getReindexFieldsFor(Entity.GLOSSARY_TERM).contains("children"));
}

@Test
void reindexFieldsUnknownEntityTypeFallsBackToCommon() {
// Graceful degradation: if a new entity type is added and the factory can't probe it,
// the reindex path still works with the common set rather than throwing.
Set<String> fields = factory.getReindexFieldsFor("nonExistentEntityType");
org.junit.jupiter.api.Assertions.assertEquals(SearchIndex.COMMON_REINDEX_FIELDS, fields);
}

private static Stream<Arguments> supportedIndexMappings() {
return Stream.of(
Arguments.of(Entity.TABLE, (Supplier<Object>) Table::new, TableIndex.class),
Expand Down
Loading
Loading