From 2c86134d20ad01795ee16de088eecf5515d02727 Mon Sep 17 00:00:00 2001 From: mohitdeuex Date: Fri, 24 Apr 2026 23:34:28 +0530 Subject: [PATCH 1/3] Make Selective Field Query --- .../bundles/searchIndex/EntityReader.java | 6 +- .../service/search/SearchIndexFactory.java | 26 +- .../search/indexes/ContainerIndex.java | 7 + .../search/indexes/DashboardIndex.java | 7 + .../service/search/indexes/DatabaseIndex.java | 7 + .../search/indexes/GlossaryTermIndex.java | 7 + .../indexes/IngestionPipelineIndex.java | 7 + .../service/search/indexes/PipelineIndex.java | 8 + .../service/search/indexes/SearchIndex.java | 34 +++ .../search/indexes/SpreadsheetIndex.java | 7 + .../service/search/indexes/TableIndex.java | 8 +- .../service/search/indexes/TeamIndex.java | 8 + .../service/search/indexes/TestCaseIndex.java | 9 + .../service/search/indexes/UserIndex.java | 10 + .../search/SearchIndexFactoryTest.java | 73 ++++++ .../SearchIndexReindexFieldsParityTest.java | 245 ++++++++++++++++++ 16 files changed, 465 insertions(+), 4 deletions(-) create mode 100644 openmetadata-service/src/test/java/org/openmetadata/service/search/SearchIndexReindexFieldsParityTest.java diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/EntityReader.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/EntityReader.java index c77b78d5b226..eaf9ceefc442 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/EntityReader.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/EntityReader.java @@ -326,7 +326,11 @@ static List getSearchIndexFields(String entityType) { if (TIME_SERIES_ENTITIES.contains(entityType)) { return List.of(); } - return List.of("*"); + Set required = + org.openmetadata.service.Entity.getSearchRepository() + .getSearchIndexFactory() + .getReindexFieldsFor(entityType); + return new ArrayList<>(required); } static int calculateNumberOfReaders(int totalEntityRecords, int batchSize) { diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchIndexFactory.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchIndexFactory.java index 1af43d6f4ca6..33d3908ad22e 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchIndexFactory.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchIndexFactory.java @@ -107,6 +107,28 @@ @Slf4j public class SearchIndexFactory { + /** + * Returns the minimal set of fields the reindex path must request from + * {@code EntityRepository.setFields} for the given entity type. Probes the corresponding + * index class via {@link #buildIndex(String, Object)} with a {@code null} entity and calls + * {@link SearchIndex#getRequiredReindexFields()}. Index constructors must be safe with a null + * entity for this probe to work — they are today because field declarations are static. + */ + public java.util.Set getReindexFieldsFor(String entityType) { + try { + SearchIndex probe = buildIndex(entityType, null); + if (probe != null) { + return probe.getRequiredReindexFields(); + } + } catch (Exception e) { + LOG.warn( + "Failed to probe reindex fields for entity type {}; falling back to common set: {}", + entityType, + e.getMessage()); + } + return SearchIndex.COMMON_REINDEX_FIELDS; + } + public SearchIndex buildIndex(String entityType, Object entity) { return switch (entityType) { case Entity.TABLE -> new TableIndex((Table) entity); @@ -177,7 +199,9 @@ public SearchIndex buildIndex(String entityType, Object entity) { case Entity.PIPELINE_EXECUTION -> { PipelineExecutionIndex.PipelineExecutionData data = (PipelineExecutionIndex.PipelineExecutionData) entity; - yield new PipelineExecutionIndex(data.getPipeline(), data.getPipelineStatus()); + yield data == null + ? new PipelineExecutionIndex(null, null) + : new PipelineExecutionIndex(data.getPipeline(), data.getPipelineStatus()); } default -> buildExternalIndexes(entityType, entity); }; diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/ContainerIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/ContainerIndex.java index 5d6f7aa2f6c3..4ef7934fa223 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/ContainerIndex.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/ContainerIndex.java @@ -35,6 +35,13 @@ public Set getExcludedFields() { return Set.of("children"); } + @Override + public Set getRequiredReindexFields() { + Set fields = new HashSet<>(DataAssetIndex.super.getRequiredReindexFields()); + fields.add("dataModel"); + return java.util.Collections.unmodifiableSet(fields); + } + public Map buildSearchIndexDocInternal(Map doc) { if (container.getDataModel() != null && container.getDataModel().getColumns() != null) { List cols = new ArrayList<>(); diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DashboardIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DashboardIndex.java index c9b0bdab7f0b..a59e0f3ffab6 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DashboardIndex.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DashboardIndex.java @@ -32,6 +32,13 @@ public Set getExcludedFields() { return Set.of("dataModels"); } + @Override + public Set getRequiredReindexFields() { + Set fields = new java.util.HashSet<>(DataAssetIndex.super.getRequiredReindexFields()); + fields.add("charts"); + return java.util.Collections.unmodifiableSet(fields); + } + public Map buildSearchIndexDocInternal(Map doc) { return doc; } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DatabaseIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DatabaseIndex.java index 655dd00a51bc..ffbb3a96d5a5 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DatabaseIndex.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DatabaseIndex.java @@ -22,6 +22,13 @@ public Set getExcludedFields() { return Set.of("databaseSchemas"); } + @Override + public Set getRequiredReindexFields() { + Set fields = new java.util.HashSet<>(TaggableIndex.super.getRequiredReindexFields()); + fields.add("usageSummary"); + return java.util.Collections.unmodifiableSet(fields); + } + public Map buildSearchIndexDocInternal(Map doc) { return doc; } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/GlossaryTermIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/GlossaryTermIndex.java index 50ca62faac17..e4c7a54718ab 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/GlossaryTermIndex.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/GlossaryTermIndex.java @@ -29,6 +29,13 @@ public Set getExcludedFields() { return Set.of("children"); } + @Override + public Set getRequiredReindexFields() { + Set fields = new java.util.HashSet<>(TaggableIndex.super.getRequiredReindexFields()); + fields.add("relatedTerms"); + return java.util.Collections.unmodifiableSet(fields); + } + public Map buildSearchIndexDocInternal(Map doc) { if (doc.containsKey("glossary") && glossaryTerm.getGlossary() != null) { @SuppressWarnings("unchecked") diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/IngestionPipelineIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/IngestionPipelineIndex.java index c8f70229bc06..d3f9a59f6874 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/IngestionPipelineIndex.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/IngestionPipelineIndex.java @@ -33,6 +33,13 @@ public Set getExcludedFields() { return excludeFields; } + @Override + public Set getRequiredReindexFields() { + Set fields = new java.util.HashSet<>(TaggableIndex.super.getRequiredReindexFields()); + fields.add("pipelineStatuses"); + return java.util.Collections.unmodifiableSet(fields); + } + public Map buildSearchIndexDocInternal(Map doc) { doc.put( "name", diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/PipelineIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/PipelineIndex.java index 70959e0922a1..bdb698bd89f6 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/PipelineIndex.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/PipelineIndex.java @@ -1,6 +1,7 @@ package org.openmetadata.service.search.indexes; import java.util.Map; +import java.util.Set; import org.openmetadata.schema.entity.data.Pipeline; import org.openmetadata.service.Entity; @@ -21,6 +22,13 @@ public String getEntityTypeName() { return Entity.PIPELINE; } + @Override + public Set getRequiredReindexFields() { + Set fields = new java.util.HashSet<>(DataAssetIndex.super.getRequiredReindexFields()); + fields.add("tasks"); + return java.util.Collections.unmodifiableSet(fields); + } + @Override public Object getIndexServiceType() { return pipeline.getServiceType(); diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/SearchIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/SearchIndex.java index a61a8ba9b3a1..9760feeb256b 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/SearchIndex.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/SearchIndex.java @@ -53,6 +53,23 @@ public interface SearchIndex { "connection", "changeSummary"); + /** + * Relationship/enrichment fields fetched by {@code EntityRepository.setFields} that every search + * document populates via {@link #populateCommonFields(Map, EntityInterface, String)}. Stored-JSON + * fields (name, displayName, description, service, entity-native counts) are NOT in this set — + * they live on the entity row and need no extra fetch. + */ + Set COMMON_REINDEX_FIELDS = + Set.of( + "owners", + "domains", + "reviewers", + "followers", + "votes", + "extension", + "certification", + "dataProducts"); + SearchClient searchClient = Entity.getSearchRepository().getSearchClient(); Logger LOG = LoggerFactory.getLogger(SearchIndex.class); @@ -114,6 +131,23 @@ default Set getExcludedFields() { Map buildSearchIndexDocInternal(Map esDoc); + /** + * Returns the minimal set of fields the {@code SearchIndexApp} reindex path must ask + * {@code EntityRepository.setFields} to populate for this index to build a correct document. + * + *

Default is {@link #COMMON_REINDEX_FIELDS}, augmented with {@code "tags"} when the index + * implements {@link TaggableIndex}. Individual index classes override to add entity-specific + * relationships. Keep this method side-effect-free and safe to call on a probe instance whose + * entity is {@code null} — it is invoked without an entity to discover fields statically. + */ + default Set getRequiredReindexFields() { + Set fields = new java.util.HashSet<>(COMMON_REINDEX_FIELDS); + if (this instanceof TaggableIndex) { + fields.add("tags"); + } + return java.util.Collections.unmodifiableSet(fields); + } + /** * Populates common entity fields into the search index document. Called automatically by {@link * #buildSearchIndexDoc()} for all EntityInterface-based entities. Individual index classes should diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/SpreadsheetIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/SpreadsheetIndex.java index b7d27484f4fe..a55274a09b47 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/SpreadsheetIndex.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/SpreadsheetIndex.java @@ -36,6 +36,13 @@ public Object getIndexServiceType() { return spreadsheet.getServiceType(); } + @Override + public Set getRequiredReindexFields() { + Set fields = new java.util.HashSet<>(DataAssetIndex.super.getRequiredReindexFields()); + fields.add("worksheets"); + return java.util.Collections.unmodifiableSet(fields); + } + public Map buildSearchIndexDocInternal(Map doc) { doc.put("directory", getEntityWithDisplayName(spreadsheet.getDirectory())); doc.put("mimeType", spreadsheet.getMimeType()); diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/TableIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/TableIndex.java index f70565519410..126a513216af 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/TableIndex.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/TableIndex.java @@ -44,8 +44,12 @@ public Set getExcludedFields() { } @Override - public Object getIndexServiceType() { - return table.getServiceType(); + public Set getRequiredReindexFields() { + Set fields = new HashSet<>(DataAssetIndex.super.getRequiredReindexFields()); + // "columns" is fields-gated in TableRepository; without it column-level tags are not + // hydrated, breaking tag merge in the search doc. + fields.add("columns"); + return java.util.Collections.unmodifiableSet(fields); } public Map buildSearchIndexDocInternal(Map doc) { diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/TeamIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/TeamIndex.java index 58fad179c7e6..e8e98d9cec4b 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/TeamIndex.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/TeamIndex.java @@ -1,5 +1,6 @@ package org.openmetadata.service.search.indexes; +import java.util.HashSet; import java.util.Map; import java.util.Set; import org.openmetadata.schema.entity.teams.Team; @@ -13,6 +14,13 @@ public TeamIndex(Team team) { this.team = team; } + @Override + public Set getRequiredReindexFields() { + Set fields = new HashSet<>(SearchIndex.super.getRequiredReindexFields()); + fields.add("parents"); + return java.util.Collections.unmodifiableSet(fields); + } + @Override public Object getEntity() { return team; diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/TestCaseIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/TestCaseIndex.java index ceb5711ac9e3..e19a891024a0 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/TestCaseIndex.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/TestCaseIndex.java @@ -31,6 +31,15 @@ public String getEntityTypeName() { return Entity.TEST_CASE; } + @Override + public Set getRequiredReindexFields() { + Set fields = new java.util.HashSet<>(TaggableIndex.super.getRequiredReindexFields()); + fields.add("testSuite"); + fields.add("testSuites"); + fields.add("testDefinition"); + return java.util.Collections.unmodifiableSet(fields); + } + @Override public void removeNonIndexableFields(Map esDoc) { TaggableIndex.super.removeNonIndexableFields(esDoc); diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/UserIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/UserIndex.java index f22538d4a6b3..a557c9201657 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/UserIndex.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/UserIndex.java @@ -1,5 +1,6 @@ package org.openmetadata.service.search.indexes; +import java.util.HashSet; import java.util.Map; import java.util.Set; import org.openmetadata.schema.entity.teams.User; @@ -13,6 +14,15 @@ public UserIndex(User user) { this.user = user; } + @Override + public Set getRequiredReindexFields() { + Set fields = new HashSet<>(SearchIndex.super.getRequiredReindexFields()); + fields.add("teams"); + fields.add("roles"); + fields.add("inheritedRoles"); + return java.util.Collections.unmodifiableSet(fields); + } + @Override public Object getEntity() { return user; diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/search/SearchIndexFactoryTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/search/SearchIndexFactoryTest.java index 706252be3152..7138bcc5ad32 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/search/SearchIndexFactoryTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/search/SearchIndexFactoryTest.java @@ -1,10 +1,13 @@ package org.openmetadata.service.search; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertInstanceOf; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; +import java.util.Set; import java.util.function.Supplier; import java.util.stream.Stream; import org.junit.jupiter.api.AfterAll; @@ -161,6 +164,76 @@ void buildIndexRejectsUnknownEntityTypes() { org.junit.jupiter.api.Assertions.assertTrue(exception.getMessage().contains("unknownType")); } + @ParameterizedTest + @MethodSource("supportedIndexMappings") + void reindexFieldsProbeSucceedsForEveryEntityType( + String entityType, Supplier entitySupplier, Class indexClass) { + // The factory probes each Index with a null entity to read its static field declarations. + // This asserts every Index constructor is null-safe and that a non-empty field set is returned. + Set fields = factory.getReindexFieldsFor(entityType); + assertFalse( + fields.isEmpty(), + () -> "Reindex fields for " + entityType + " must not be empty; got " + fields); + } + + @ParameterizedTest + @MethodSource("supportedIndexMappings") + void commonReindexFieldsPresentForEveryEntityType( + String entityType, Supplier entitySupplier, Class indexClass) { + Set fields = factory.getReindexFieldsFor(entityType); + for (String common : SearchIndex.COMMON_REINDEX_FIELDS) { + assertTrue( + fields.contains(common), + () -> entityType + " reindex fields missing common field '" + common + "': " + fields); + } + } + + @Test + void reindexFieldsIncludeKnownOverrides() { + // Regression guard: every Index class that adds its own fields via getRequiredReindexFields + // must continue to surface those fields through the factory probe. + assertTrue(factory.getReindexFieldsFor(Entity.TABLE).contains("columns")); + assertTrue(factory.getReindexFieldsFor(Entity.CONTAINER).contains("dataModel")); + assertTrue(factory.getReindexFieldsFor(Entity.SPREADSHEET).contains("worksheets")); + assertTrue(factory.getReindexFieldsFor(Entity.INGESTION_PIPELINE).contains("pipelineStatuses")); + assertTrue(factory.getReindexFieldsFor(Entity.DATABASE).contains("usageSummary")); + assertTrue(factory.getReindexFieldsFor(Entity.DASHBOARD).contains("charts")); + assertTrue(factory.getReindexFieldsFor(Entity.PIPELINE).contains("tasks")); + assertTrue(factory.getReindexFieldsFor(Entity.GLOSSARY_TERM).contains("relatedTerms")); + assertTrue(factory.getReindexFieldsFor(Entity.TEAM).contains("parents")); + Set userFields = factory.getReindexFieldsFor(Entity.USER); + assertTrue(userFields.contains("teams")); + assertTrue(userFields.contains("roles")); + assertTrue(userFields.contains("inheritedRoles")); + Set testCaseFields = factory.getReindexFieldsFor(Entity.TEST_CASE); + assertTrue(testCaseFields.contains("testSuite")); + assertTrue(testCaseFields.contains("testSuites")); + assertTrue(testCaseFields.contains("testDefinition")); + } + + @Test + void reindexFieldsOmitKnownFanOutFields() { + // These are the "blow up the heap" relationships we explicitly do NOT want fetched during + // reindex. They either live in the Index's getExcludedFields() (stripped post-hoc) or + // aren't read by buildSearchIndexDocInternal. Either way, asking setFields to load them + // would be wasted work and risks OOM on large parents. + assertFalse(factory.getReindexFieldsFor(Entity.DATABASE_SCHEMA).contains("tables")); + assertFalse(factory.getReindexFieldsFor(Entity.DATABASE).contains("databaseSchemas")); + assertFalse(factory.getReindexFieldsFor(Entity.TEAM).contains("users")); + assertFalse(factory.getReindexFieldsFor(Entity.CONTAINER).contains("children")); + assertFalse(factory.getReindexFieldsFor(Entity.API_COLLECTION).contains("apiEndpoints")); + assertFalse(factory.getReindexFieldsFor(Entity.DASHBOARD).contains("dataModels")); + assertFalse(factory.getReindexFieldsFor(Entity.GLOSSARY_TERM).contains("children")); + } + + @Test + void reindexFieldsUnknownEntityTypeFallsBackToCommon() { + // Graceful degradation: if a new entity type is added and the factory can't probe it, + // the reindex path still works with the common set rather than throwing. + Set fields = factory.getReindexFieldsFor("nonExistentEntityType"); + org.junit.jupiter.api.Assertions.assertEquals(SearchIndex.COMMON_REINDEX_FIELDS, fields); + } + private static Stream supportedIndexMappings() { return Stream.of( Arguments.of(Entity.TABLE, (Supplier) Table::new, TableIndex.class), diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/search/SearchIndexReindexFieldsParityTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/search/SearchIndexReindexFieldsParityTest.java new file mode 100644 index 000000000000..98c5364b9e27 --- /dev/null +++ b/openmetadata-service/src/test/java/org/openmetadata/service/search/SearchIndexReindexFieldsParityTest.java @@ -0,0 +1,245 @@ +package org.openmetadata.service.search; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.openmetadata.schema.entity.data.Dashboard; +import org.openmetadata.schema.entity.data.Database; +import org.openmetadata.schema.entity.data.DatabaseSchema; +import org.openmetadata.schema.entity.teams.Team; +import org.openmetadata.schema.entity.teams.User; +import org.openmetadata.schema.type.EntityReference; +import org.openmetadata.schema.utils.JsonUtils; +import org.openmetadata.service.Entity; +import org.openmetadata.service.search.indexes.DashboardIndex; +import org.openmetadata.service.search.indexes.DatabaseIndex; +import org.openmetadata.service.search.indexes.DatabaseSchemaIndex; +import org.openmetadata.service.search.indexes.SearchIndex; +import org.openmetadata.service.search.indexes.TeamIndex; +import org.openmetadata.service.search.indexes.UserIndex; + +/** + * Static contract guards for the selective-reindex refactor. + * + *

Models the silent-drop risk chain without booting the Entity registry: + * + *

    + *
  1. {@code EntityRepository.setFields} with a pruned field list sets fan-out fields to null on + * the entity. + *
  2. {@code JsonUtils.getMap(entity)} serializes the entity; null collections drop out. + *
  3. {@code Index.removeNonIndexableFields} strips anything in {@code getExcludedFields}. + *
  4. What is left goes into the ES document. + *
+ * + *

If an Index class reads a fan-out field in its {@code buildSearchIndexDocInternal}, the field + * WOULD flow into the doc — that Index must declare the field in {@code getRequiredReindexFields}. + * These tests verify for each known fan-out that the end-state doc omits it regardless of whether + * {@code setFields} populated it, matching the refactor intent. + */ +class SearchIndexReindexFieldsParityTest { + + @BeforeAll + static void setUpSearchRepository() { + SearchRepository repository = mock(SearchRepository.class); + when(repository.getSearchClient()).thenReturn(mock(SearchClient.class)); + Entity.setSearchRepository(repository); + } + + @AfterAll + static void clearSearchRepository() { + Entity.setSearchRepository(null); + } + + // --- excluded-field contract ---------------------------------------------------- + + /** {@code DatabaseSchema.tables} is the OOM trigger — must stay stripped. */ + @Test + void databaseSchemaIndexStripsTablesField() { + DatabaseSchema withTables = basicSchema().withTables(fakeEntityRefs(1_000, "table")); + Map doc = simulatePostSerialization(withTables); + applyExcludedFields(doc, new DatabaseSchemaIndex(withTables).getExcludedFields()); + + assertFalse( + doc.containsKey("tables"), + "DatabaseSchemaIndex.getExcludedFields() must continue to strip 'tables'"); + } + + /** Modeling: when we don't fetch tables, the JSON has no tables key at all. */ + @Test + void databaseSchemaWithoutTablesProducesSameDoc() { + DatabaseSchema withoutTables = basicSchema(); + DatabaseSchema withTables = basicSchema().withTables(fakeEntityRefs(100, "table")); + + Map docA = simulatePostSerialization(withoutTables); + Map docB = simulatePostSerialization(withTables); + Set strip = new DatabaseSchemaIndex(withoutTables).getExcludedFields(); + applyExcludedFields(docA, strip); + applyExcludedFields(docB, strip); + + assertFalse(docA.containsKey("tables")); + assertFalse(docB.containsKey("tables")); + // The docs should be byte-identical for fields we care about. tables is stripped; + // any other observable field difference would indicate the Index accidentally reads tables. + assertDocsEqual(docA, docB, Set.of()); + } + + /** Database.databaseSchemas — same pattern. */ + @Test + void databaseIndexStripsDatabaseSchemasField() { + Database withSchemas = + basicDatabase().withDatabaseSchemas(fakeEntityRefs(200, "databaseSchema")); + Map doc = simulatePostSerialization(withSchemas); + applyExcludedFields(doc, new DatabaseIndex(withSchemas).getExcludedFields()); + + assertFalse(doc.containsKey("databaseSchemas")); + } + + /** Team.users — potentially huge, explicitly excluded. */ + @Test + void teamIndexStripsFanOutFields() { + Team team = + basicTeam() + .withUsers(fakeEntityRefs(5_000, "user")) + .withDefaultRoles(fakeEntityRefs(20, "role")) + .withInheritedRoles(fakeEntityRefs(20, "role")); + + Map doc = simulatePostSerialization(team); + applyExcludedFields(doc, new TeamIndex(team).getExcludedFields()); + + assertFalse(doc.containsKey("users")); + assertFalse(doc.containsKey("defaultRoles")); + assertFalse(doc.containsKey("inheritedRoles")); + assertFalse(doc.containsKey("owns")); + } + + /** User.owns, User.follows — power-user fan-out, excluded. */ + @Test + void userIndexStripsFanOutFields() { + User u = + basicUser() + .withOwns(fakeEntityRefs(5_000, "table")) + .withFollows(fakeEntityRefs(1_000, "topic")); + + Map doc = simulatePostSerialization(u); + applyExcludedFields(doc, new UserIndex(u).getExcludedFields()); + + assertFalse(doc.containsKey("owns")); + assertFalse(doc.containsKey("follows")); + assertFalse(doc.containsKey("authenticationMechanism")); + } + + /** Dashboard.dataModels — excluded (charts is NOT excluded — see positive test below). */ + @Test + void dashboardIndexStripsDataModelsButKeepsCharts() { + Dashboard dash = + basicDashboard() + .withCharts(fakeEntityRefs(10, "chart")) + .withDataModels(fakeEntityRefs(10, "dashboardDataModel")); + + Map doc = simulatePostSerialization(dash); + applyExcludedFields(doc, new DashboardIndex(dash).getExcludedFields()); + + assertFalse(doc.containsKey("dataModels"), "dataModels must be stripped from dashboard doc"); + assertTrue( + doc.containsKey("charts"), + "charts must NOT be stripped — the dashboard_search_index indexes them"); + } + + // --- common-field contract guard ------------------------------------------------ + + @Test + void commonReindexFieldsMatchDocumentedSet() { + org.junit.jupiter.api.Assertions.assertEquals( + Set.of( + "owners", + "domains", + "reviewers", + "followers", + "votes", + "extension", + "certification", + "dataProducts"), + SearchIndex.COMMON_REINDEX_FIELDS); + } + + // --- helpers -------------------------------------------------------------------- + + /** + * Serializes the entity to a Map the way {@code SearchIndex.buildSearchIndexDoc()} does on its + * first line: {@code esDoc = JsonUtils.getMap(entity)}. This captures exactly what would land in + * the doc before any Index-specific enrichment. + */ + @SuppressWarnings("unchecked") + private static Map simulatePostSerialization(Object entity) { + Map raw = JsonUtils.getMap(entity); + // getMap may return an immutable map depending on the codec; copy so we can strip. + return new HashMap<>(raw); + } + + private static void applyExcludedFields(Map doc, Set excluded) { + // Models SearchIndexUtils.removeNonIndexableFields — deep path notation isn't exercised + // by these entities; top-level removal is sufficient here. + Set stripKeys = new HashSet<>(excluded); + stripKeys.retainAll(doc.keySet()); + stripKeys.forEach(doc::remove); + } + + private static void assertDocsEqual( + Map a, Map b, Set ignoreKeys) { + Set keysA = new HashSet<>(a.keySet()); + Set keysB = new HashSet<>(b.keySet()); + keysA.removeAll(ignoreKeys); + keysB.removeAll(ignoreKeys); + org.junit.jupiter.api.Assertions.assertEquals(keysA, keysB, "doc keys must match"); + } + + private static DatabaseSchema basicSchema() { + return new DatabaseSchema() + .withId(UUID.randomUUID()) + .withName("s") + .withFullyQualifiedName("svc.db.s"); + } + + private static Database basicDatabase() { + return new Database().withId(UUID.randomUUID()).withName("db").withFullyQualifiedName("svc.db"); + } + + private static Team basicTeam() { + return new Team().withId(UUID.randomUUID()).withName("team").withFullyQualifiedName("team"); + } + + private static User basicUser() { + return new User() + .withId(UUID.randomUUID()) + .withName("alice") + .withFullyQualifiedName("alice") + .withIsBot(false); + } + + private static Dashboard basicDashboard() { + return new Dashboard().withId(UUID.randomUUID()).withName("d").withFullyQualifiedName("svc.d"); + } + + private static List fakeEntityRefs(int count, String type) { + return java.util.stream.IntStream.range(0, count) + .mapToObj( + i -> + new EntityReference() + .withId(UUID.randomUUID()) + .withType(type) + .withName(type + "_" + i) + .withFullyQualifiedName(type + "_" + i)) + .toList(); + } +} From c4351c8306cc25418f211e977b4d3c5592584538 Mon Sep 17 00:00:00 2001 From: mohitdeuex Date: Mon, 27 Apr 2026 14:19:53 +0530 Subject: [PATCH 2/3] Minor nit --- .../openmetadata/service/search/indexes/ContainerIndex.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/ContainerIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/ContainerIndex.java index 4ef7934fa223..3978da2d3e6f 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/ContainerIndex.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/ContainerIndex.java @@ -3,6 +3,7 @@ import static org.openmetadata.service.search.EntityBuilderConstant.DATA_MODEL_COLUMNS_NAME_KEYWORD; import java.util.ArrayList; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -39,7 +40,7 @@ public Set getExcludedFields() { public Set getRequiredReindexFields() { Set fields = new HashSet<>(DataAssetIndex.super.getRequiredReindexFields()); fields.add("dataModel"); - return java.util.Collections.unmodifiableSet(fields); + return Collections.unmodifiableSet(fields); } public Map buildSearchIndexDocInternal(Map doc) { From ef4cc5820eb491adc5720d23adc53564e62f5315 Mon Sep 17 00:00:00 2001 From: mohitdeuex Date: Mon, 27 Apr 2026 22:30:30 +0530 Subject: [PATCH 3/3] Fix Failing Tests --- .../apps/bundles/searchIndex/EntityReader.java | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/EntityReader.java b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/EntityReader.java index eaf9ceefc442..a2ab5750cd50 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/EntityReader.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/EntityReader.java @@ -326,11 +326,15 @@ static List getSearchIndexFields(String entityType) { if (TIME_SERIES_ENTITIES.contains(entityType)) { return List.of(); } - Set required = - org.openmetadata.service.Entity.getSearchRepository() - .getSearchIndexFactory() - .getReindexFieldsFor(entityType); - return new ArrayList<>(required); + org.openmetadata.service.search.SearchRepository repo = + org.openmetadata.service.Entity.getSearchRepository(); + if (repo == null || repo.getSearchIndexFactory() == null) { + // Fallback for environments where the search subsystem isn't bootstrapped (e.g. unit + // tests that exercise the reader without the full Entity registry). Behaves the same + // as the pre-selective-fields code path. + return List.of("*"); + } + return new ArrayList<>(repo.getSearchIndexFactory().getReindexFieldsFor(entityType)); } static int calculateNumberOfReaders(int totalEntityRecords, int batchSize) {