Skip to content

Commit d121367

Browse files
mohityadav766claude
andcommitted
Make Selective Field Query during Reindexing (#27723)
* Make Selective Field Query * Minor nit * Fix Failing Tests (cherry picked from commit 385d589) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 4ace688 commit d121367

15 files changed

Lines changed: 400 additions & 3 deletions

File tree

openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/EntityReader.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,15 @@ static List<String> getSearchIndexFields(String entityType) {
326326
if (TIME_SERIES_ENTITIES.contains(entityType)) {
327327
return List.of();
328328
}
329-
return List.of("*");
329+
org.openmetadata.service.search.SearchRepository repo =
330+
org.openmetadata.service.Entity.getSearchRepository();
331+
if (repo == null || repo.getSearchIndexFactory() == null) {
332+
// Fallback for environments where the search subsystem isn't bootstrapped (e.g. unit
333+
// tests that exercise the reader without the full Entity registry). Behaves the same
334+
// as the pre-selective-fields code path.
335+
return List.of("*");
336+
}
337+
return new ArrayList<>(repo.getSearchIndexFactory().getReindexFieldsFor(entityType));
330338
}
331339

332340
static int calculateNumberOfReaders(int totalEntityRecords, int batchSize) {

openmetadata-service/src/main/java/org/openmetadata/service/search/SearchIndexFactory.java

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,28 @@
104104
@Slf4j
105105
public class SearchIndexFactory {
106106

107+
/**
108+
* Returns the minimal set of fields the reindex path must request from
109+
* {@code EntityRepository.setFields} for the given entity type. Probes the corresponding
110+
* index class via {@link #buildIndex(String, Object)} with a {@code null} entity and calls
111+
* {@link SearchIndex#getRequiredReindexFields()}. Index constructors must be safe with a null
112+
* entity for this probe to work — they are today because field declarations are static.
113+
*/
114+
public java.util.Set<String> getReindexFieldsFor(String entityType) {
115+
try {
116+
SearchIndex probe = buildIndex(entityType, null);
117+
if (probe != null) {
118+
return probe.getRequiredReindexFields();
119+
}
120+
} catch (Exception e) {
121+
LOG.warn(
122+
"Failed to probe reindex fields for entity type {}; falling back to common set: {}",
123+
entityType,
124+
e.getMessage());
125+
}
126+
return SearchIndex.COMMON_REINDEX_FIELDS;
127+
}
128+
107129
public SearchIndex buildIndex(String entityType, Object entity) {
108130
return switch (entityType) {
109131
case Entity.TABLE -> new TableIndex((Table) entity);
@@ -171,7 +193,9 @@ public SearchIndex buildIndex(String entityType, Object entity) {
171193
case Entity.PIPELINE_EXECUTION -> {
172194
PipelineExecutionIndex.PipelineExecutionData data =
173195
(PipelineExecutionIndex.PipelineExecutionData) entity;
174-
yield new PipelineExecutionIndex(data.getPipeline(), data.getPipelineStatus());
196+
yield data == null
197+
? new PipelineExecutionIndex(null, null)
198+
: new PipelineExecutionIndex(data.getPipeline(), data.getPipelineStatus());
175199
}
176200
default -> buildExternalIndexes(entityType, entity);
177201
};

openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/ContainerIndex.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import static org.openmetadata.service.search.EntityBuilderConstant.DATA_MODEL_COLUMNS_NAME_KEYWORD;
44

55
import java.util.ArrayList;
6+
import java.util.Collections;
67
import java.util.HashSet;
78
import java.util.List;
89
import java.util.Map;
@@ -26,6 +27,13 @@ public Set<String> getExcludedFields() {
2627
return Set.of("children");
2728
}
2829

30+
@Override
31+
public Set<String> getRequiredReindexFields() {
32+
Set<String> fields = new HashSet<>(ColumnIndex.super.getRequiredReindexFields());
33+
fields.add("dataModel");
34+
return Collections.unmodifiableSet(fields);
35+
}
36+
2937
public Map<String, Object> buildSearchIndexDocInternal(Map<String, Object> doc) {
3038
Set<List<TagLabel>> tagsWithChildren = new HashSet<>();
3139
List<String> columnsWithChildrenName = new ArrayList<>();

openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DashboardIndex.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,13 @@ public Set<String> getExcludedFields() {
2323
return Set.of("dataModels");
2424
}
2525

26+
@Override
27+
public Set<String> getRequiredReindexFields() {
28+
Set<String> fields = new java.util.HashSet<>(SearchIndex.super.getRequiredReindexFields());
29+
fields.add("charts");
30+
return java.util.Collections.unmodifiableSet(fields);
31+
}
32+
2633
public Map<String, Object> buildSearchIndexDocInternal(Map<String, Object> doc) {
2734
ParseTags parseTags = new ParseTags(Entity.getEntityTags(Entity.DASHBOARD, dashboard));
2835
Map<String, Object> commonAttributes = getCommonAttributesMap(dashboard, Entity.DASHBOARD);

openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DatabaseIndex.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,13 @@ public Set<String> getExcludedFields() {
1717
return Set.of("databaseSchemas");
1818
}
1919

20+
@Override
21+
public Set<String> getRequiredReindexFields() {
22+
Set<String> fields = new java.util.HashSet<>(SearchIndex.super.getRequiredReindexFields());
23+
fields.add("usageSummary");
24+
return java.util.Collections.unmodifiableSet(fields);
25+
}
26+
2027
public Map<String, Object> buildSearchIndexDocInternal(Map<String, Object> doc) {
2128
Map<String, Object> commonAttributes = getCommonAttributesMap(database, Entity.DATABASE);
2229
doc.putAll(commonAttributes);

openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/GlossaryTermIndex.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,13 @@ public Set<String> getExcludedFields() {
2222
return Set.of("children");
2323
}
2424

25+
@Override
26+
public Set<String> getRequiredReindexFields() {
27+
Set<String> fields = new java.util.HashSet<>(SearchIndex.super.getRequiredReindexFields());
28+
fields.add("relatedTerms");
29+
return java.util.Collections.unmodifiableSet(fields);
30+
}
31+
2532
public Map<String, Object> buildSearchIndexDocInternal(Map<String, Object> doc) {
2633
Map<String, Object> commonAttributes =
2734
getCommonAttributesMap(glossaryTerm, Entity.GLOSSARY_TERM);

openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/IngestionPipelineIndex.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,13 @@ public Set<String> getExcludedFields() {
2929
return excludeFields;
3030
}
3131

32+
@Override
33+
public Set<String> getRequiredReindexFields() {
34+
Set<String> fields = new java.util.HashSet<>(SearchIndex.super.getRequiredReindexFields());
35+
fields.add("pipelineStatuses");
36+
return java.util.Collections.unmodifiableSet(fields);
37+
}
38+
3239
public Map<String, Object> buildSearchIndexDocInternal(Map<String, Object> doc) {
3340
ParseTags parseTags =
3441
new ParseTags(Entity.getEntityTags(Entity.INGESTION_PIPELINE, ingestionPipeline));

openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/PipelineIndex.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package org.openmetadata.service.search.indexes;
22

33
import java.util.Map;
4+
import java.util.Set;
45
import org.openmetadata.schema.entity.data.Pipeline;
56
import org.openmetadata.service.Entity;
67
import org.openmetadata.service.search.ParseTags;
@@ -17,6 +18,14 @@ public Object getEntity() {
1718
return pipeline;
1819
}
1920

21+
@Override
22+
public Set<String> getRequiredReindexFields() {
23+
Set<String> fields = new java.util.HashSet<>(SearchIndex.super.getRequiredReindexFields());
24+
fields.add("tasks");
25+
return java.util.Collections.unmodifiableSet(fields);
26+
}
27+
28+
2029
public Map<String, Object> buildSearchIndexDocInternal(Map<String, Object> doc) {
2130
ParseTags parseTags = new ParseTags(Entity.getEntityTags(Entity.PIPELINE, pipeline));
2231
Map<String, Object> commonAttributes = getCommonAttributesMap(pipeline, Entity.PIPELINE);

openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/SearchIndex.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,22 @@ public interface SearchIndex {
5656
"connection",
5757
"changeSummary");
5858

59+
/**
60+
* Relationship/enrichment fields fetched by {@code EntityRepository.setFields} that every search
61+
* document populates. Stored-JSON fields (name, displayName, description, service, entity-native
62+
* counts) are NOT in this set — they live on the entity row and need no extra fetch.
63+
*/
64+
Set<String> COMMON_REINDEX_FIELDS =
65+
Set.of(
66+
"owners",
67+
"domains",
68+
"reviewers",
69+
"followers",
70+
"votes",
71+
"extension",
72+
"certification",
73+
"dataProducts");
74+
5975
public static final SearchClient searchClient = Entity.getSearchRepository().getSearchClient();
6076
static final Logger LOG = LoggerFactory.getLogger(SearchIndex.class);
6177

@@ -92,6 +108,21 @@ default Set<String> getExcludedFields() {
92108

93109
Map<String, Object> buildSearchIndexDocInternal(Map<String, Object> esDoc);
94110

111+
/**
112+
* Returns the minimal set of fields the {@code SearchIndexApp} reindex path must ask
113+
* {@code EntityRepository.setFields} to populate for this index to build a correct document.
114+
*
115+
* <p>Default is {@link #COMMON_REINDEX_FIELDS} plus {@code "tags"}. Individual index classes
116+
* override to add entity-specific relationships. Keep this method side-effect-free and safe to
117+
* call on a probe instance whose entity is {@code null} — it is invoked without an entity to
118+
* discover fields statically.
119+
*/
120+
default Set<String> getRequiredReindexFields() {
121+
Set<String> fields = new java.util.HashSet<>(COMMON_REINDEX_FIELDS);
122+
fields.add("tags");
123+
return java.util.Collections.unmodifiableSet(fields);
124+
}
125+
95126
default Map<String, Object> getCommonAttributesMap(EntityInterface entity, String entityType) {
96127
Map<String, Object> map = new HashMap<>();
97128
map.put(

openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/SpreadsheetIndex.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,14 @@ public Set<String> getExcludedFields() {
2828
return excludeSpreadsheetFields;
2929
}
3030

31+
@Override
32+
public Set<String> getRequiredReindexFields() {
33+
Set<String> fields = new java.util.HashSet<>(SearchIndex.super.getRequiredReindexFields());
34+
fields.add("worksheets");
35+
return java.util.Collections.unmodifiableSet(fields);
36+
}
37+
38+
3139
public Map<String, Object> buildSearchIndexDocInternal(Map<String, Object> doc) {
3240
ParseTags parseTags = new ParseTags(Entity.getEntityTags(Entity.SPREADSHEET, spreadsheet));
3341
List<TagLabel> tags = new ArrayList<>();

0 commit comments

Comments
 (0)