Skip to content

Commit 385d589

Browse files
Make Selective Field Query during Reindexing (#27723)
* Make Selective Field Query * Minor nit * Fix Failing Tests
1 parent f9eb03b commit 385d589

16 files changed

Lines changed: 470 additions & 4 deletions

openmetadata-service/src/main/java/org/openmetadata/service/apps/bundles/searchIndex/EntityReader.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,15 @@ static List<String> getSearchIndexFields(String entityType) {
326326
if (TIME_SERIES_ENTITIES.contains(entityType)) {
327327
return List.of();
328328
}
329-
return List.of("*");
329+
org.openmetadata.service.search.SearchRepository repo =
330+
org.openmetadata.service.Entity.getSearchRepository();
331+
if (repo == null || repo.getSearchIndexFactory() == null) {
332+
// Fallback for environments where the search subsystem isn't bootstrapped (e.g. unit
333+
// tests that exercise the reader without the full Entity registry). Behaves the same
334+
// as the pre-selective-fields code path.
335+
return List.of("*");
336+
}
337+
return new ArrayList<>(repo.getSearchIndexFactory().getReindexFieldsFor(entityType));
330338
}
331339

332340
static int calculateNumberOfReaders(int totalEntityRecords, int batchSize) {

openmetadata-service/src/main/java/org/openmetadata/service/search/SearchIndexFactory.java

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,28 @@
107107
@Slf4j
108108
public class SearchIndexFactory {
109109

110+
/**
111+
* Returns the minimal set of fields the reindex path must request from
112+
* {@code EntityRepository.setFields} for the given entity type. Probes the corresponding
113+
* index class via {@link #buildIndex(String, Object)} with a {@code null} entity and calls
114+
* {@link SearchIndex#getRequiredReindexFields()}. Index constructors must be safe with a null
115+
* entity for this probe to work — they are today because field declarations are static.
116+
*/
117+
public java.util.Set<String> getReindexFieldsFor(String entityType) {
118+
try {
119+
SearchIndex probe = buildIndex(entityType, null);
120+
if (probe != null) {
121+
return probe.getRequiredReindexFields();
122+
}
123+
} catch (Exception e) {
124+
LOG.warn(
125+
"Failed to probe reindex fields for entity type {}; falling back to common set: {}",
126+
entityType,
127+
e.getMessage());
128+
}
129+
return SearchIndex.COMMON_REINDEX_FIELDS;
130+
}
131+
110132
public SearchIndex buildIndex(String entityType, Object entity) {
111133
return switch (entityType) {
112134
case Entity.TABLE -> new TableIndex((Table) entity);
@@ -177,7 +199,9 @@ public SearchIndex buildIndex(String entityType, Object entity) {
177199
case Entity.PIPELINE_EXECUTION -> {
178200
PipelineExecutionIndex.PipelineExecutionData data =
179201
(PipelineExecutionIndex.PipelineExecutionData) entity;
180-
yield new PipelineExecutionIndex(data.getPipeline(), data.getPipelineStatus());
202+
yield data == null
203+
? new PipelineExecutionIndex(null, null)
204+
: new PipelineExecutionIndex(data.getPipeline(), data.getPipelineStatus());
181205
}
182206
default -> buildExternalIndexes(entityType, entity);
183207
};

openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/ContainerIndex.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import static org.openmetadata.service.search.EntityBuilderConstant.DATA_MODEL_COLUMNS_NAME_KEYWORD;
44

55
import java.util.ArrayList;
6+
import java.util.Collections;
67
import java.util.HashSet;
78
import java.util.List;
89
import java.util.Map;
@@ -35,6 +36,13 @@ public Set<String> getExcludedFields() {
3536
return Set.of("children");
3637
}
3738

39+
@Override
40+
public Set<String> getRequiredReindexFields() {
41+
Set<String> fields = new HashSet<>(DataAssetIndex.super.getRequiredReindexFields());
42+
fields.add("dataModel");
43+
return Collections.unmodifiableSet(fields);
44+
}
45+
3846
public Map<String, Object> buildSearchIndexDocInternal(Map<String, Object> doc) {
3947
if (container.getDataModel() != null && container.getDataModel().getColumns() != null) {
4048
List<FlattenColumn> cols = new ArrayList<>();

openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DashboardIndex.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,13 @@ public Set<String> getExcludedFields() {
3232
return Set.of("dataModels");
3333
}
3434

35+
@Override
36+
public Set<String> getRequiredReindexFields() {
37+
Set<String> fields = new java.util.HashSet<>(DataAssetIndex.super.getRequiredReindexFields());
38+
fields.add("charts");
39+
return java.util.Collections.unmodifiableSet(fields);
40+
}
41+
3542
public Map<String, Object> buildSearchIndexDocInternal(Map<String, Object> doc) {
3643
return doc;
3744
}

openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/DatabaseIndex.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,13 @@ public Set<String> getExcludedFields() {
2222
return Set.of("databaseSchemas");
2323
}
2424

25+
@Override
26+
public Set<String> getRequiredReindexFields() {
27+
Set<String> fields = new java.util.HashSet<>(TaggableIndex.super.getRequiredReindexFields());
28+
fields.add("usageSummary");
29+
return java.util.Collections.unmodifiableSet(fields);
30+
}
31+
2532
public Map<String, Object> buildSearchIndexDocInternal(Map<String, Object> doc) {
2633
return doc;
2734
}

openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/GlossaryTermIndex.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,13 @@ public Set<String> getExcludedFields() {
2929
return Set.of("children");
3030
}
3131

32+
@Override
33+
public Set<String> getRequiredReindexFields() {
34+
Set<String> fields = new java.util.HashSet<>(TaggableIndex.super.getRequiredReindexFields());
35+
fields.add("relatedTerms");
36+
return java.util.Collections.unmodifiableSet(fields);
37+
}
38+
3239
public Map<String, Object> buildSearchIndexDocInternal(Map<String, Object> doc) {
3340
if (doc.containsKey("glossary") && glossaryTerm.getGlossary() != null) {
3441
@SuppressWarnings("unchecked")

openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/IngestionPipelineIndex.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,13 @@ public Set<String> getExcludedFields() {
3333
return excludeFields;
3434
}
3535

36+
@Override
37+
public Set<String> getRequiredReindexFields() {
38+
Set<String> fields = new java.util.HashSet<>(TaggableIndex.super.getRequiredReindexFields());
39+
fields.add("pipelineStatuses");
40+
return java.util.Collections.unmodifiableSet(fields);
41+
}
42+
3643
public Map<String, Object> buildSearchIndexDocInternal(Map<String, Object> doc) {
3744
doc.put(
3845
"name",

openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/PipelineIndex.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package org.openmetadata.service.search.indexes;
22

33
import java.util.Map;
4+
import java.util.Set;
45
import org.openmetadata.schema.entity.data.Pipeline;
56
import org.openmetadata.service.Entity;
67

@@ -21,6 +22,13 @@ public String getEntityTypeName() {
2122
return Entity.PIPELINE;
2223
}
2324

25+
@Override
26+
public Set<String> getRequiredReindexFields() {
27+
Set<String> fields = new java.util.HashSet<>(DataAssetIndex.super.getRequiredReindexFields());
28+
fields.add("tasks");
29+
return java.util.Collections.unmodifiableSet(fields);
30+
}
31+
2432
@Override
2533
public Object getIndexServiceType() {
2634
return pipeline.getServiceType();

openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/SearchIndex.java

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,23 @@ public interface SearchIndex {
5353
"connection",
5454
"changeSummary");
5555

56+
/**
57+
* Relationship/enrichment fields fetched by {@code EntityRepository.setFields} that every search
58+
* document populates via {@link #populateCommonFields(Map, EntityInterface, String)}. Stored-JSON
59+
* fields (name, displayName, description, service, entity-native counts) are NOT in this set —
60+
* they live on the entity row and need no extra fetch.
61+
*/
62+
Set<String> COMMON_REINDEX_FIELDS =
63+
Set.of(
64+
"owners",
65+
"domains",
66+
"reviewers",
67+
"followers",
68+
"votes",
69+
"extension",
70+
"certification",
71+
"dataProducts");
72+
5673
SearchClient searchClient = Entity.getSearchRepository().getSearchClient();
5774
Logger LOG = LoggerFactory.getLogger(SearchIndex.class);
5875

@@ -114,6 +131,23 @@ default Set<String> getExcludedFields() {
114131

115132
Map<String, Object> buildSearchIndexDocInternal(Map<String, Object> esDoc);
116133

134+
/**
135+
* Returns the minimal set of fields the {@code SearchIndexApp} reindex path must ask
136+
* {@code EntityRepository.setFields} to populate for this index to build a correct document.
137+
*
138+
* <p>Default is {@link #COMMON_REINDEX_FIELDS}, augmented with {@code "tags"} when the index
139+
* implements {@link TaggableIndex}. Individual index classes override to add entity-specific
140+
* relationships. Keep this method side-effect-free and safe to call on a probe instance whose
141+
* entity is {@code null} — it is invoked without an entity to discover fields statically.
142+
*/
143+
default Set<String> getRequiredReindexFields() {
144+
Set<String> fields = new java.util.HashSet<>(COMMON_REINDEX_FIELDS);
145+
if (this instanceof TaggableIndex) {
146+
fields.add("tags");
147+
}
148+
return java.util.Collections.unmodifiableSet(fields);
149+
}
150+
117151
/**
118152
* Populates common entity fields into the search index document. Called automatically by {@link
119153
* #buildSearchIndexDoc()} for all EntityInterface-based entities. Individual index classes should

openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/SpreadsheetIndex.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,13 @@ public Object getIndexServiceType() {
3636
return spreadsheet.getServiceType();
3737
}
3838

39+
@Override
40+
public Set<String> getRequiredReindexFields() {
41+
Set<String> fields = new java.util.HashSet<>(DataAssetIndex.super.getRequiredReindexFields());
42+
fields.add("worksheets");
43+
return java.util.Collections.unmodifiableSet(fields);
44+
}
45+
3946
public Map<String, Object> buildSearchIndexDocInternal(Map<String, Object> doc) {
4047
doc.put("directory", getEntityWithDisplayName(spreadsheet.getDirectory()));
4148
doc.put("mimeType", spreadsheet.getMimeType());

0 commit comments

Comments
 (0)