From c2a502b189f438d970b35929553b2c5b45297522 Mon Sep 17 00:00:00 2001 From: lautel Date: Thu, 16 Apr 2026 15:49:02 +0200 Subject: [PATCH 1/3] Add new "textToEmbedSemantic" field --- .../search/vector/VectorDocBuilder.java | 278 +++++++++++++++++- .../vector/utils/AvailableEntityTypes.java | 3 +- .../service/search/vector/utils/DTOs.java | 3 + .../search/vector/VectorDocBuilderTest.java | 148 ++++++++++ .../en/api_collection_index_mapping.json | 3 + .../en/api_endpoint_index_mapping.json | 3 + .../elasticsearch/en/chart_index_mapping.json | 3 + .../en/container_index_mapping.json | 3 + .../dashboard_data_model_index_mapping.json | 3 + .../en/dashboard_index_mapping.json | 3 + .../en/data_products_index_mapping.json | 3 + .../en/database_index_mapping.json | 3 + .../en/database_schema_index_mapping.json | 3 + .../en/directory_index_mapping.json | 3 + .../elasticsearch/en/file_index_mapping.json | 3 + .../en/glossary_index_mapping.json | 3 + .../en/glossary_term_index_mapping.json | 3 + .../en/metric_index_mapping.json | 3 + .../en/mlmodel_index_mapping.json | 3 + .../en/pipeline_index_mapping.json | 3 + .../en/search_entity_index_mapping.json | 3 + .../en/spreadsheet_index_mapping.json | 3 + .../en/stored_procedure_index_mapping.json | 3 + .../elasticsearch/en/table_index_mapping.json | 3 + .../elasticsearch/en/tag_index_mapping.json | 3 + .../elasticsearch/en/topic_index_mapping.json | 3 + .../en/worksheet_index_mapping.json | 3 + .../jp/api_collection_index_mapping.json | 3 + .../jp/api_endpoint_index_mapping.json | 3 + .../elasticsearch/jp/chart_index_mapping.json | 3 + .../jp/container_index_mapping.json | 3 + .../dashboard_data_model_index_mapping.json | 3 + .../jp/dashboard_index_mapping.json | 3 + .../jp/data_products_index_mapping.json | 3 + .../jp/database_index_mapping.json | 3 + .../jp/database_schema_index_mapping.json | 3 + .../jp/directory_index_mapping.json | 3 + .../elasticsearch/jp/file_index_mapping.json | 3 + .../jp/glossary_index_mapping.json | 3 + .../jp/glossary_term_index_mapping.json | 3 + .../jp/metric_index_mapping.json | 3 + .../jp/mlmodel_index_mapping.json | 3 + .../jp/pipeline_index_mapping.json | 3 + .../jp/search_entity_index_mapping.json | 3 + .../jp/spreadsheet_index_mapping.json | 3 + .../jp/stored_procedure_index_mapping.json | 3 + .../elasticsearch/jp/table_index_mapping.json | 3 + .../elasticsearch/jp/tag_index_mapping.json | 3 + .../elasticsearch/jp/topic_index_mapping.json | 3 + .../jp/worksheet_index_mapping.json | 3 + .../ru/api_collection_index_mapping.json | 3 + .../ru/api_endpoint_index_mapping.json | 3 + .../elasticsearch/ru/chart_index_mapping.json | 3 + .../ru/container_index_mapping.json | 3 + .../dashboard_data_model_index_mapping.json | 3 + .../ru/dashboard_index_mapping.json | 3 + .../ru/data_products_index_mapping.json | 3 + .../ru/database_index_mapping.json | 3 + .../ru/database_schema_index_mapping.json | 3 + .../ru/directory_index_mapping.json | 3 + .../elasticsearch/ru/file_index_mapping.json | 3 + .../ru/glossary_index_mapping.json | 3 + .../ru/glossary_term_index_mapping.json | 3 + .../ru/metric_index_mapping.json | 3 + .../ru/mlmodel_index_mapping.json | 3 + .../ru/pipeline_index_mapping.json | 3 + .../ru/search_entity_index_mapping.json | 3 + .../ru/spreadsheet_index_mapping.json | 3 + .../ru/stored_procedure_index_mapping.json | 3 + .../elasticsearch/ru/table_index_mapping.json | 3 + .../elasticsearch/ru/tag_index_mapping.json | 3 + .../elasticsearch/ru/topic_index_mapping.json | 3 + .../ru/worksheet_index_mapping.json | 3 + .../zh/api_collection_index_mapping.json | 3 + .../zh/api_endpoint_index_mapping.json | 3 + .../elasticsearch/zh/chart_index_mapping.json | 3 + .../zh/container_index_mapping.json | 3 + .../dashboard_data_model_index_mapping.json | 3 + .../zh/dashboard_index_mapping.json | 3 + .../zh/data_products_index_mapping.json | 3 + .../zh/database_index_mapping.json | 3 + .../zh/database_schema_index_mapping.json | 3 + .../zh/directory_index_mapping.json | 3 + .../elasticsearch/zh/file_index_mapping.json | 3 + .../zh/glossary_index_mapping.json | 3 + .../zh/glossary_term_index_mapping.json | 3 + .../zh/metric_index_mapping.json | 3 + .../zh/mlmodel_index_mapping.json | 3 + .../zh/pipeline_index_mapping.json | 3 + .../zh/search_entity_index_mapping.json | 3 + .../zh/spreadsheet_index_mapping.json | 3 + .../zh/stored_procedure_index_mapping.json | 3 + .../elasticsearch/zh/table_index_mapping.json | 3 + .../elasticsearch/zh/tag_index_mapping.json | 3 + .../elasticsearch/zh/topic_index_mapping.json | 3 + .../zh/worksheet_index_mapping.json | 3 + 96 files changed, 704 insertions(+), 4 deletions(-) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/VectorDocBuilder.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/VectorDocBuilder.java index 1de4dd39e5a3..dbef38dc5f13 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/VectorDocBuilder.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/VectorDocBuilder.java @@ -51,6 +51,24 @@ public interface BodyTextExtractor { private static final Map BODY_TEXT_EXTRACTORS = new ConcurrentHashMap<>(); + private static final int MAX_CHILD_NAMES_IN_CONTEXT = 20; + + /** + * Child-entity enumeration spec for container-like types. When an entity has children on the + * object (populated during reindexing via {@code fields=*}), their names are joined into a + * short natural-language phrase and appended to the semantic body, so queries match against + * what a container actually contains. + */ + private record SemanticChildrenSpec(String getterName, String phrasePrefix) {} + + private static final Map SEMANTIC_CHILDREN_SPECS = + Map.of( + "database", new SemanticChildrenSpec("getDatabaseSchemas", "Contains schemas"), + "databaseSchema", new SemanticChildrenSpec("getTables", "Contains tables"), + "apiCollection", new SemanticChildrenSpec("getApiEndpoints", "Contains endpoints"), + "container", new SemanticChildrenSpec("getChildren", "Contains"), + "dataProduct", new SemanticChildrenSpec("getAssets", "Contains assets")); + /** * Register a custom {@link BodyTextExtractor} for an entity type. The registry is consulted by * {@link #buildBodyText(EntityInterface, String)} before the default description-based logic, @@ -92,7 +110,12 @@ public static List> fromEntity( /** * Generate embedding fields to merge into an entity's search index document. Returns a map with: - * embedding, textToEmbed, chunkIndex, chunkCount, parentId, fingerprint. + * embedding, textToEmbed, textToEmbedSemantic, chunkIndex, chunkCount, parentId, fingerprint. + * + *

{@code textToEmbed} preserves the legacy rich-context format (empty fields rendered as + * {@code []}) and is consumed by agent tooling as LLM context. {@code textToEmbedSemantic} is + * the compact variant that omits empty fields and is the actual input fed to the embedding + * model. */ public static Map buildEmbeddingFields( EntityInterface entity, EmbeddingClient embeddingClient) { @@ -101,21 +124,26 @@ public static Map buildEmbeddingFields( String metaLight = buildMetaLightText(entity, entityType); String body = buildBodyText(entity, entityType); + String semanticMetaLight = buildSemanticMetaLightText(entity, entityType); + String semanticBody = buildSemanticBodyText(entity, entityType); String fingerprint = computeFingerprintForEntity(entity); List chunks = TextChunkManager.chunk(body); int chunkCount = chunks.size(); + List semanticChunks = TextChunkManager.chunk(semanticBody); - // Use the first chunk for the entity's embedding String contTag = ""; String textToEmbed = String.format("%s%s%s | chunk %d/%d", metaLight, contTag, chunks.get(0), 1, chunkCount); + String semanticBodyChunk = semanticChunks.get(0); + String textToEmbedSemantic = joinSemanticParts(semanticMetaLight, semanticBodyChunk); - float[] embedding = embeddingClient.embed(textToEmbed); + float[] embedding = embeddingClient.embed(textToEmbedSemantic); Map fields = new HashMap<>(); fields.put("embedding", embedding); fields.put("textToEmbed", textToEmbed); + fields.put("textToEmbedSemantic", textToEmbedSemantic); fields.put("chunkIndex", 0); fields.put("chunkCount", chunkCount); fields.put("parentId", parentId); @@ -280,6 +308,250 @@ static String buildBodyText(EntityInterface entity, String entityType) { return String.join("; ", bodyParts); } + /** + * Natural-language metadata for the semantic embedding input. Emits content as sentence-like + * phrases without {@code key: value;} label scaffolding, and drops high-noise/low-signal fields + * (FQN, entityType, serviceType, owners, customProperties, chunk marker) so the pooled vector + * isn't dominated by structural tokens that appear in every document. + */ + static String buildSemanticMetaLightText(EntityInterface entity, String entityType) { + boolean isGlossary = entity instanceof Glossary; + boolean isGlossaryTerm = entity instanceof GlossaryTerm; + boolean isMetric = entity instanceof Metric; + + List phrases = new ArrayList<>(); + + String name = entity.getName(); + String displayName = entity.getDisplayName(); + String subject = null; + if (displayName != null && !displayName.isBlank() && !displayName.equals(name)) { + subject = (name == null || name.isBlank()) ? displayName : displayName + " (" + name + ")"; + } else if (name != null && !name.isBlank()) { + subject = name; + } + String typeLabel = humanizeEntityType(entityType); + if (!typeLabel.isEmpty() && subject != null) { + phrases.add(typeLabel + " " + subject); + } else if (!typeLabel.isEmpty()) { + phrases.add(typeLabel); + } else if (subject != null) { + phrases.add(subject); + } + + if (isGlossaryTerm) { + appendGlossaryTermPhrases(phrases, (GlossaryTerm) entity); + } + if (isMetric) { + appendMetricPhrases(phrases, (Metric) entity); + } + + appendTagPhrases(phrases, entity, isGlossary, isGlossaryTerm); + appendDomainPhrase(phrases, entity); + + if (!isGlossary && !isGlossaryTerm) { + String tier = extractTierLabel(entity); + if (tier != null) { + phrases.add(tier.replace('.', ' ')); + } + String cert = extractCertificationLabel(entity); + if (cert != null) { + phrases.add(cert.replace('.', ' ')); + } + } + + return String.join(". ", phrases); + } + + private static void appendGlossaryTermPhrases(List phrases, GlossaryTerm term) { + List synonyms = term.getSynonyms(); + if (synonyms != null && !synonyms.isEmpty()) { + phrases.add("Also known as " + String.join(", ", synonyms)); + } + List relatedTerms = term.getRelatedTerms(); + if (relatedTerms != null && !relatedTerms.isEmpty()) { + List relatedNames = + relatedTerms.stream() + .map(tr -> tr.getTerm() == null ? null : tr.getTerm().getName()) + .filter(Objects::nonNull) + .collect(Collectors.toList()); + if (!relatedNames.isEmpty()) { + phrases.add("Related to " + String.join(", ", relatedNames)); + } + } + } + + private static void appendMetricPhrases(List phrases, Metric metric) { + List parts = new ArrayList<>(); + if (metric.getMetricType() != null) { + parts.add(metric.getMetricType().value() + " metric"); + } + if (metric.getUnitOfMeasurement() != null) { + String unit = metric.getUnitOfMeasurement().value(); + String value = + "OTHER".equals(unit) && metric.getCustomUnitOfMeasurement() != null + ? metric.getCustomUnitOfMeasurement() + : unit; + parts.add("measured in " + value); + } + if (metric.getGranularity() != null) { + parts.add("granularity " + metric.getGranularity()); + } + if (!parts.isEmpty()) { + phrases.add(String.join(", ", parts)); + } + MetricExpression expr = metric.getMetricExpression(); + if (expr != null && expr.getCode() != null) { + phrases.add(expr.getCode()); + } + } + + private static void appendTagPhrases( + List phrases, EntityInterface entity, boolean isGlossary, boolean isGlossaryTerm) { + List tagsPojo = entity.getTags() != null ? entity.getTags() : Collections.emptyList(); + List classificationTagNames = + tagsPojo.stream() + .filter(tag -> tag.getSource() == null || !"Glossary".equals(tag.getSource().value())) + .filter(tag -> !tag.getTagFQN().startsWith("Tier.")) + .map(tag -> tag.getTagFQN().replace('.', ' ')) + .collect(Collectors.toList()); + if (!classificationTagNames.isEmpty()) { + phrases.add("Tagged as " + String.join(", ", classificationTagNames)); + } + if (!isGlossary && !isGlossaryTerm) { + List glossaryTermNames = + tagsPojo.stream() + .filter(tag -> tag.getSource() != null && "Glossary".equals(tag.getSource().value())) + .map(tag -> tag.getName() != null ? tag.getName() : tag.getTagFQN()) + .collect(Collectors.toList()); + if (!glossaryTermNames.isEmpty()) { + phrases.add("Related glossary terms " + String.join(", ", glossaryTermNames)); + } + } + } + + private static void appendDomainPhrase(List phrases, EntityInterface entity) { + List domainsPojo = + entity.getDomains() != null ? entity.getDomains() : Collections.emptyList(); + List domainNames = + domainsPojo.stream() + .map(d -> d.getDisplayName() != null ? d.getDisplayName() : d.getName()) + .filter(Objects::nonNull) + .collect(Collectors.toList()); + if (!domainNames.isEmpty()) { + phrases.add("In domain " + String.join(", ", domainNames)); + } + } + + private static String joinSemanticParts(String metaLight, String body) { + if (metaLight.isEmpty()) { + return body; + } + if (body.isEmpty()) { + return metaLight; + } + return metaLight + ". " + body; + } + + static String buildSemanticBodyText(EntityInterface entity, String entityType) { + if (entityType != null) { + BodyTextExtractor customExtractor = BODY_TEXT_EXTRACTORS.get(entityType); + if (customExtractor != null) { + try { + String custom = customExtractor.extract(entity); + if (custom != null) { + return custom; + } + } catch (Exception e) { + LOG.warn( + "Custom BodyTextExtractor failed for [{}], falling back to default", entityType, e); + } + } + } + + List bodyParts = new ArrayList<>(); + String description = removeHtml(entity.getDescription() == null ? "" : entity.getDescription()); + if (!description.isEmpty()) { + bodyParts.add(description); + } + + if (entity instanceof Table table) { + List columns = table.getColumns(); + if (columns != null && !columns.isEmpty()) { + bodyParts.add("Columns include " + columnsToString(columns)); + } + } + + String childContext = buildChildContextPhrase(entity, entityType); + if (childContext != null) { + bodyParts.add(childContext); + } + + return String.join(". ", bodyParts); + } + + /** + * Convert an entity type identifier into a natural-language label by inserting spaces at every + * lowercase→uppercase boundary. {@code dataProduct} becomes {@code "data Product"}, + * {@code databaseSchema} becomes {@code "database Schema"}, {@code table} stays {@code "table"}. + * Returns an empty string for null or blank input so callers can trivially skip the prefix. + */ + static String humanizeEntityType(String entityType) { + if (entityType == null || entityType.isBlank()) { + return ""; + } + return entityType.replaceAll("([a-z])([A-Z])", "$1 $2"); + } + + /** + * Produce a "Contains X, Y, Z" phrase listing the names of a container entity's direct + * children (database schemas, tables, endpoints, charts, etc.). Children are read via + * reflection using the getter name in {@link #SEMANTIC_CHILDREN_SPECS}, so this does not + * introduce compile-time coupling to every container type. Returns null when the entity is + * not a known container, when the getter is missing, or when the child list is empty. + */ + static String buildChildContextPhrase(EntityInterface entity, String entityType) { + if (entityType == null) { + return null; + } + SemanticChildrenSpec spec = SEMANTIC_CHILDREN_SPECS.get(entityType); + if (spec == null) { + return null; + } + List childNames = readChildNames(entity, spec.getterName()); + if (childNames.isEmpty()) { + return null; + } + List limited = + childNames.size() > MAX_CHILD_NAMES_IN_CONTEXT + ? childNames.subList(0, MAX_CHILD_NAMES_IN_CONTEXT) + : childNames; + return spec.phrasePrefix() + " " + String.join(", ", limited); + } + + private static List readChildNames(EntityInterface entity, String getterName) { + try { + Method method = entity.getClass().getMethod(getterName); + Object result = method.invoke(entity); + if (!(result instanceof List refs) || refs.isEmpty()) { + return Collections.emptyList(); + } + List names = new ArrayList<>(refs.size()); + for (Object ref : refs) { + if (ref instanceof EntityReference entityRef) { + String displayName = entityRef.getDisplayName(); + String name = + displayName != null && !displayName.isBlank() ? displayName : entityRef.getName(); + if (name != null && !name.isBlank()) { + names.add(name); + } + } + } + return names; + } catch (Exception e) { + return Collections.emptyList(); + } + } + static String extractServiceType(EntityInterface entity) { try { Method method = entity.getClass().getMethod("getServiceType"); diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/utils/AvailableEntityTypes.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/utils/AvailableEntityTypes.java index e838053c9f48..10c7a27560ae 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/utils/AvailableEntityTypes.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/utils/AvailableEntityTypes.java @@ -28,7 +28,8 @@ private AvailableEntityTypes() {} "storedProcedure", "searchIndex", "topic", - "contextMemory"); + "contextMemory", + "container"); public static final Set SET = LIST.stream().map(s -> s.toLowerCase(Locale.ROOT)).collect(Collectors.toUnmodifiableSet()); diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/utils/DTOs.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/utils/DTOs.java index 6c7d60887aa5..050e7056eacf 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/utils/DTOs.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/utils/DTOs.java @@ -1,5 +1,6 @@ package org.openmetadata.service.search.vector.utils; +import com.fasterxml.jackson.annotation.JsonAlias; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import java.util.List; import java.util.Map; @@ -20,6 +21,8 @@ public static class VectorSearchRequest { public int size = 10; public Integer from = 0; public int k = 1_000; + + @JsonAlias("min_score") public double threshold = 0.0; } diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/search/vector/VectorDocBuilderTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/search/vector/VectorDocBuilderTest.java index 5f3b153aaaee..468329e66465 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/search/vector/VectorDocBuilderTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/search/vector/VectorDocBuilderTest.java @@ -12,6 +12,8 @@ import java.util.Map; import java.util.UUID; import org.junit.jupiter.api.Test; +import org.openmetadata.schema.EntityInterface; +import org.openmetadata.schema.entity.data.Database; import org.openmetadata.schema.entity.data.GlossaryTerm; import org.openmetadata.schema.entity.data.Table; import org.openmetadata.schema.type.Column; @@ -26,6 +28,12 @@ class VectorDocBuilderTest { private static final EmbeddingClient MOCK_CLIENT = new EmbeddingClientTest.MockEmbeddingClient(384); + static { + EntityInterface.CANONICAL_ENTITY_NAME_MAP.put("table", "table"); + EntityInterface.CANONICAL_ENTITY_NAME_MAP.put("database", "database"); + EntityInterface.CANONICAL_ENTITY_NAME_MAP.put("glossaryterm", "glossaryTerm"); + } + @Test void testBuildEmbeddingFieldsBasic() { Table table = createTestTable("test_table", "Test Table", "A test table for unit testing"); @@ -36,11 +44,151 @@ void testBuildEmbeddingFieldsBasic() { assertEquals(table.getId().toString(), fields.get("parentId")); assertNotNull(fields.get("embedding")); assertNotNull(fields.get("textToEmbed")); + assertNotNull(fields.get("textToEmbedSemantic")); assertNotNull(fields.get("fingerprint")); assertEquals(0, fields.get("chunkIndex")); assertTrue((int) fields.get("chunkCount") >= 1); } + @Test + void testSemanticTextDropsStructuralScaffolding() { + Table table = createTestTable("orders", null, "Order table"); + table.setFullyQualifiedName("postgres.jaffle_shop.public.orders"); + + String semantic = VectorDocBuilder.buildSemanticMetaLightText(table, "table"); + + assertTrue(semantic.contains("orders")); + assertFalse(semantic.contains("name:")); + assertFalse(semantic.contains("displayName:")); + assertFalse(semantic.contains("entityType:")); + assertFalse(semantic.contains("serviceType:")); + assertFalse(semantic.contains("fullyQualifiedName:")); + assertFalse(semantic.contains("postgres.jaffle_shop.public.orders")); + assertFalse(semantic.contains("[]")); + assertFalse(semantic.contains(" | ")); + } + + @Test + void testSemanticTextIncludesPopulatedFieldsAsPhrases() { + Table table = createTestTable("orders", "Orders Display", "desc"); + TagLabel tag = new TagLabel(); + tag.setTagFQN("PII.Sensitive"); + tag.setName("Sensitive"); + table.setTags(List.of(tag)); + + String semantic = VectorDocBuilder.buildSemanticMetaLightText(table, "table"); + + assertTrue(semantic.contains("Orders Display")); + assertTrue(semantic.contains("orders")); + assertTrue(semantic.contains("Tagged as PII Sensitive")); + assertFalse(semantic.contains("owners")); + assertFalse(semantic.contains("user.")); + } + + @Test + void testSemanticBodyTextSkipsEmptyDescriptionAndColumns() { + Table table = createTestTable("empty", null, null); + table.setColumns(null); + + String semanticBody = VectorDocBuilder.buildSemanticBodyText(table, "table"); + + assertEquals("", semanticBody); + } + + @Test + void testSemanticTextPrependsTypeLabelWhenContentIsEmpty() { + Table table = new Table(); + table.setId(UUID.randomUUID()); + table.setName("lonely"); + table.setDeleted(false); + + Map fields = VectorDocBuilder.buildEmbeddingFields(table, MOCK_CLIENT); + String semantic = (String) fields.get("textToEmbedSemantic"); + + assertEquals("table lonely", semantic); + } + + @Test + void testSemanticTextJoinsMetaAndBodyWithPeriod() { + Table table = createTestTable("customers", "Customers dashboard", "A sample dashboard"); + + Map fields = VectorDocBuilder.buildEmbeddingFields(table, MOCK_CLIENT); + String semantic = (String) fields.get("textToEmbedSemantic"); + + assertTrue(semantic.startsWith("table Customers dashboard (customers)")); + assertTrue(semantic.contains(". A sample dashboard")); + assertFalse(semantic.contains("chunk")); + } + + @Test + void testSemanticBodyIncludesChildContextForContainers() { + Database database = new Database(); + database.setId(UUID.randomUUID()); + database.setName("customers"); + database.setDeleted(false); + + EntityReference ethereum = new EntityReference(); + ethereum.setId(UUID.randomUUID()); + ethereum.setType("databaseSchema"); + ethereum.setName("CRYPTO_ETHEREUM"); + EntityReference bitcoin = new EntityReference(); + bitcoin.setId(UUID.randomUUID()); + bitcoin.setType("databaseSchema"); + bitcoin.setName("CRYPTO_BITCOIN"); + database.setDatabaseSchemas(List.of(ethereum, bitcoin)); + + String body = VectorDocBuilder.buildSemanticBodyText(database, "database"); + + assertTrue(body.contains("Contains schemas CRYPTO_ETHEREUM, CRYPTO_BITCOIN")); + } + + @Test + void testSemanticBodySkipsChildContextForNonContainers() { + Table table = createTestTable("orders", null, "Order table"); + + String body = VectorDocBuilder.buildSemanticBodyText(table, "table"); + + assertFalse(body.contains("Contains")); + } + + @Test + void testSemanticMetaLightUsesTypeLabelForContainerWithoutName() { + Database database = new Database(); + database.setId(UUID.randomUUID()); + database.setDeleted(false); + + String metaLight = VectorDocBuilder.buildSemanticMetaLightText(database, "database"); + + assertEquals("database", metaLight); + } + + @Test + void testHumanizeEntityTypeSplitsCamelCase() { + assertEquals("", VectorDocBuilder.humanizeEntityType(null)); + assertEquals("", VectorDocBuilder.humanizeEntityType("")); + assertEquals("table", VectorDocBuilder.humanizeEntityType("table")); + assertEquals("database Schema", VectorDocBuilder.humanizeEntityType("databaseSchema")); + assertEquals("data Product", VectorDocBuilder.humanizeEntityType("dataProduct")); + assertEquals("api Collection", VectorDocBuilder.humanizeEntityType("apiCollection")); + assertEquals("glossary Term", VectorDocBuilder.humanizeEntityType("glossaryTerm")); + } + + @Test + void testTextToEmbedRemainsLegacyFormat() { + Table table = createTestTable("orders", null, "Order table"); + + Map fields = VectorDocBuilder.buildEmbeddingFields(table, MOCK_CLIENT); + String legacy = (String) fields.get("textToEmbed"); + String semantic = (String) fields.get("textToEmbedSemantic"); + + assertTrue(legacy.contains("displayName: []"), "legacy textToEmbed keeps empty placeholders"); + assertTrue(legacy.contains(" | chunk 1/")); + assertFalse(semantic.contains("[]")); + assertFalse(semantic.contains("name:")); + assertTrue(semantic.contains("orders")); + assertTrue(semantic.contains("Order table")); + } + @Test void testBuildEmbeddingFieldsContainsEmbeddingVector() { Table table = createTestTable("vec_table", null, "A table with embedding"); diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/api_collection_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/api_collection_index_mapping.json index dcbbad55ba0b..455fa88e5d48 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/api_collection_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/api_collection_index_mapping.json @@ -715,6 +715,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/api_endpoint_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/api_endpoint_index_mapping.json index a6abb39cde7b..ad9f3ff301ce 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/api_endpoint_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/api_endpoint_index_mapping.json @@ -939,6 +939,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/chart_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/chart_index_mapping.json index c0919bea5c2b..3e773d36164f 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/chart_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/chart_index_mapping.json @@ -666,6 +666,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/container_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/container_index_mapping.json index 3486f458db27..142cc2a4a33d 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/container_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/container_index_mapping.json @@ -920,6 +920,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/dashboard_data_model_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/dashboard_data_model_index_mapping.json index 548a060be9fe..8573642f4735 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/dashboard_data_model_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/dashboard_data_model_index_mapping.json @@ -774,6 +774,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/dashboard_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/dashboard_index_mapping.json index 511234db039e..a6a6573b39e4 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/dashboard_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/dashboard_index_mapping.json @@ -807,6 +807,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/data_products_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/data_products_index_mapping.json index 38bd845522d4..4436a30a9b1d 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/data_products_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/data_products_index_mapping.json @@ -618,6 +618,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/database_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/database_index_mapping.json index 650abc2db7ba..76784d9a3845 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/database_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/database_index_mapping.json @@ -693,6 +693,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/database_schema_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/database_schema_index_mapping.json index 30bf5aab242f..4d7c1e80d436 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/database_schema_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/database_schema_index_mapping.json @@ -657,6 +657,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/directory_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/directory_index_mapping.json index 863cefd2b847..540f98892693 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/directory_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/directory_index_mapping.json @@ -756,6 +756,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/file_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/file_index_mapping.json index dff0c9df151e..79b2f387d6f2 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/file_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/file_index_mapping.json @@ -808,6 +808,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/glossary_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/glossary_index_mapping.json index 0910f440a1e0..dd51101c637d 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/glossary_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/glossary_index_mapping.json @@ -398,6 +398,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/glossary_term_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/glossary_term_index_mapping.json index 394d51dff52f..7d913da5f17b 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/glossary_term_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/glossary_term_index_mapping.json @@ -551,6 +551,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/metric_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/metric_index_mapping.json index 1cf0ce13957d..4b1ccc638c8d 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/metric_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/metric_index_mapping.json @@ -681,6 +681,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/mlmodel_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/mlmodel_index_mapping.json index 784d30c9b53b..4b4c6d81a986 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/mlmodel_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/mlmodel_index_mapping.json @@ -815,6 +815,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/pipeline_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/pipeline_index_mapping.json index 5cf766b022ea..a9b6e1892f7d 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/pipeline_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/pipeline_index_mapping.json @@ -722,6 +722,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/search_entity_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/search_entity_index_mapping.json index 2d60398fd39b..8d3137f3643e 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/search_entity_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/search_entity_index_mapping.json @@ -777,6 +777,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/spreadsheet_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/spreadsheet_index_mapping.json index ae5471f5a550..84dc64fa7dfa 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/spreadsheet_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/spreadsheet_index_mapping.json @@ -797,6 +797,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/stored_procedure_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/stored_procedure_index_mapping.json index 18c91897a437..2912f61ddac3 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/stored_procedure_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/stored_procedure_index_mapping.json @@ -795,6 +795,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/table_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/table_index_mapping.json index 0c9972b2987a..f079d844478a 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/table_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/table_index_mapping.json @@ -1039,6 +1039,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/tag_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/tag_index_mapping.json index 995662d9b0c6..f9bd6fa3b80d 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/tag_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/tag_index_mapping.json @@ -382,6 +382,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/topic_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/topic_index_mapping.json index 73ddd11bf64e..9b6bb36bc654 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/topic_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/topic_index_mapping.json @@ -802,6 +802,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/worksheet_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/worksheet_index_mapping.json index 795154504b8f..3d665a60f771 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/worksheet_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/worksheet_index_mapping.json @@ -921,6 +921,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/api_collection_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/api_collection_index_mapping.json index b95e36924021..460fae6b97cf 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/api_collection_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/api_collection_index_mapping.json @@ -688,6 +688,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/api_endpoint_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/api_endpoint_index_mapping.json index 17e874e2042c..061b70f7cb51 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/api_endpoint_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/api_endpoint_index_mapping.json @@ -902,6 +902,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/chart_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/chart_index_mapping.json index f9c66f90f4a4..310f46ba1cf9 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/chart_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/chart_index_mapping.json @@ -694,6 +694,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/container_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/container_index_mapping.json index c38914b0ed9b..cd460a4f3ecf 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/container_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/container_index_mapping.json @@ -842,6 +842,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/dashboard_data_model_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/dashboard_data_model_index_mapping.json index 55852422ee29..8f75cd9e5c7f 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/dashboard_data_model_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/dashboard_data_model_index_mapping.json @@ -727,6 +727,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/dashboard_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/dashboard_index_mapping.json index 8a9c30f008fd..e56017e87a1d 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/dashboard_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/dashboard_index_mapping.json @@ -789,6 +789,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/data_products_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/data_products_index_mapping.json index 9d8042d4af36..ccb3198fc56a 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/data_products_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/data_products_index_mapping.json @@ -621,6 +621,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/database_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/database_index_mapping.json index 45b07b4732d4..6e5d1cf6f1b0 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/database_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/database_index_mapping.json @@ -684,6 +684,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/database_schema_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/database_schema_index_mapping.json index 2a7dbdf916c4..3fbff97aa6f0 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/database_schema_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/database_schema_index_mapping.json @@ -648,6 +648,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/directory_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/directory_index_mapping.json index 90f8ee00b574..7cccf8acb452 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/directory_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/directory_index_mapping.json @@ -738,6 +738,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/file_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/file_index_mapping.json index a181da9dc367..6cc284e8cb12 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/file_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/file_index_mapping.json @@ -753,6 +753,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/glossary_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/glossary_index_mapping.json index e578a542ea67..6fe55702a1a4 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/glossary_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/glossary_index_mapping.json @@ -394,6 +394,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/glossary_term_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/glossary_term_index_mapping.json index 3af30efa8e63..7f40c9790fd1 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/glossary_term_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/glossary_term_index_mapping.json @@ -548,6 +548,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/metric_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/metric_index_mapping.json index afc4d8246465..8a91bddf48ca 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/metric_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/metric_index_mapping.json @@ -665,6 +665,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/mlmodel_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/mlmodel_index_mapping.json index 3a7606a39cc8..976bf0e339bc 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/mlmodel_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/mlmodel_index_mapping.json @@ -791,6 +791,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/pipeline_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/pipeline_index_mapping.json index 58147475a001..64957669a103 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/pipeline_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/pipeline_index_mapping.json @@ -679,6 +679,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/search_entity_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/search_entity_index_mapping.json index 7bc97c2105e6..2ee10f61ca38 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/search_entity_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/search_entity_index_mapping.json @@ -764,6 +764,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/spreadsheet_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/spreadsheet_index_mapping.json index 0d31c42e550e..24c0caa286f6 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/spreadsheet_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/spreadsheet_index_mapping.json @@ -738,6 +738,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/stored_procedure_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/stored_procedure_index_mapping.json index 025a13cfec7a..bffb75f7cc08 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/stored_procedure_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/stored_procedure_index_mapping.json @@ -871,6 +871,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/table_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/table_index_mapping.json index 55bb2ea9becd..a977ba0f108a 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/table_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/table_index_mapping.json @@ -1020,6 +1020,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/tag_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/tag_index_mapping.json index 992bcbfbf995..c5407048dfd2 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/tag_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/tag_index_mapping.json @@ -331,6 +331,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/topic_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/topic_index_mapping.json index ca4235accfbe..f057d540f694 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/topic_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/topic_index_mapping.json @@ -774,6 +774,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/worksheet_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/worksheet_index_mapping.json index e66809c9f907..9d958fb41794 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/worksheet_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/worksheet_index_mapping.json @@ -811,6 +811,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/api_collection_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/api_collection_index_mapping.json index dd619e7ff12f..1fabb8176374 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/api_collection_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/api_collection_index_mapping.json @@ -732,6 +732,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/api_endpoint_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/api_endpoint_index_mapping.json index 33db1f08346b..3c827f10dedd 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/api_endpoint_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/api_endpoint_index_mapping.json @@ -956,6 +956,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/chart_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/chart_index_mapping.json index c66db89d2de0..50e9660d0bb7 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/chart_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/chart_index_mapping.json @@ -683,6 +683,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/container_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/container_index_mapping.json index cc0fe1165d69..9680f8226187 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/container_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/container_index_mapping.json @@ -893,6 +893,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/dashboard_data_model_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/dashboard_data_model_index_mapping.json index 2e267e133752..7adcdad1e831 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/dashboard_data_model_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/dashboard_data_model_index_mapping.json @@ -746,6 +746,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/dashboard_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/dashboard_index_mapping.json index 827140bf35ea..7ac4dbce337b 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/dashboard_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/dashboard_index_mapping.json @@ -824,6 +824,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/data_products_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/data_products_index_mapping.json index bb5d64fe8769..b70117273f53 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/data_products_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/data_products_index_mapping.json @@ -630,6 +630,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/database_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/database_index_mapping.json index 07e431ddfe44..38cb7c480e47 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/database_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/database_index_mapping.json @@ -710,6 +710,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/database_schema_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/database_schema_index_mapping.json index 9647947a2990..b99ed27b008e 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/database_schema_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/database_schema_index_mapping.json @@ -674,6 +674,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/directory_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/directory_index_mapping.json index 94b8de1c780b..0e0b272614ab 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/directory_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/directory_index_mapping.json @@ -639,6 +639,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/file_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/file_index_mapping.json index 2a64fe42baeb..d68256a3200b 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/file_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/file_index_mapping.json @@ -694,6 +694,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/glossary_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/glossary_index_mapping.json index 33b393fd4752..daa2c2d1a457 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/glossary_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/glossary_index_mapping.json @@ -416,6 +416,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/glossary_term_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/glossary_term_index_mapping.json index c2b44ba71e3d..8f5c9b99865f 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/glossary_term_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/glossary_term_index_mapping.json @@ -569,6 +569,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/metric_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/metric_index_mapping.json index 29a215b2fcd6..0ad3ded35019 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/metric_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/metric_index_mapping.json @@ -651,6 +651,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/mlmodel_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/mlmodel_index_mapping.json index 9c721a279416..d92ace04ad6d 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/mlmodel_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/mlmodel_index_mapping.json @@ -832,6 +832,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/pipeline_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/pipeline_index_mapping.json index 8fa47edcd062..d85254398500 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/pipeline_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/pipeline_index_mapping.json @@ -739,6 +739,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/search_entity_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/search_entity_index_mapping.json index 5b278c1abfac..b205b19bd460 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/search_entity_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/search_entity_index_mapping.json @@ -794,6 +794,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/spreadsheet_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/spreadsheet_index_mapping.json index 93fe96dcbbe6..93eb62b1fe51 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/spreadsheet_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/spreadsheet_index_mapping.json @@ -693,6 +693,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/stored_procedure_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/stored_procedure_index_mapping.json index 88e24a4cf76a..5f6d97d42e6c 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/stored_procedure_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/stored_procedure_index_mapping.json @@ -812,6 +812,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/table_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/table_index_mapping.json index 43d146229443..3df7e91ed98d 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/table_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/table_index_mapping.json @@ -1026,6 +1026,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/tag_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/tag_index_mapping.json index f5a848c0f6f0..a57e59b03608 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/tag_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/tag_index_mapping.json @@ -395,6 +395,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/topic_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/topic_index_mapping.json index 915427b8bb2a..80c0f9dd5bea 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/topic_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/topic_index_mapping.json @@ -819,6 +819,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/worksheet_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/worksheet_index_mapping.json index 6ed09a90f999..afc1c0badf68 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/worksheet_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/worksheet_index_mapping.json @@ -753,6 +753,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/api_collection_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/api_collection_index_mapping.json index 5c3c172b4754..1e1a3227bba0 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/api_collection_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/api_collection_index_mapping.json @@ -688,6 +688,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/api_endpoint_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/api_endpoint_index_mapping.json index 6dd7f54700af..609648d4dade 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/api_endpoint_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/api_endpoint_index_mapping.json @@ -903,6 +903,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/chart_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/chart_index_mapping.json index 364055c50e75..a260491329df 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/chart_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/chart_index_mapping.json @@ -680,6 +680,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/container_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/container_index_mapping.json index 8dd00025225f..43cafd875a9c 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/container_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/container_index_mapping.json @@ -844,6 +844,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/dashboard_data_model_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/dashboard_data_model_index_mapping.json index cc23ca872cd5..83b51aad81a1 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/dashboard_data_model_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/dashboard_data_model_index_mapping.json @@ -725,6 +725,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/dashboard_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/dashboard_index_mapping.json index 6cf145762784..d248d1941bcb 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/dashboard_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/dashboard_index_mapping.json @@ -746,6 +746,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/data_products_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/data_products_index_mapping.json index d3a0ae59ebfa..57607b57e94b 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/data_products_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/data_products_index_mapping.json @@ -614,6 +614,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/database_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/database_index_mapping.json index 87edee06ea7e..a7ab80bdf26c 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/database_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/database_index_mapping.json @@ -667,6 +667,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/database_schema_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/database_schema_index_mapping.json index 88dc8fd35372..a22fa2607d19 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/database_schema_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/database_schema_index_mapping.json @@ -627,6 +627,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/directory_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/directory_index_mapping.json index 57b846f2e6ff..eeb1343abad5 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/directory_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/directory_index_mapping.json @@ -711,6 +711,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/file_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/file_index_mapping.json index cce32121196d..e3970fb14f94 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/file_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/file_index_mapping.json @@ -726,6 +726,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/glossary_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/glossary_index_mapping.json index 9fd014b9f885..aefb277fa932 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/glossary_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/glossary_index_mapping.json @@ -337,6 +337,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/glossary_term_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/glossary_term_index_mapping.json index 70990dc9ceeb..0922707af05e 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/glossary_term_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/glossary_term_index_mapping.json @@ -511,6 +511,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/metric_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/metric_index_mapping.json index fdfc7e342329..49ff97736c90 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/metric_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/metric_index_mapping.json @@ -657,6 +657,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/mlmodel_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/mlmodel_index_mapping.json index 7e181cac2615..aad3dc6ca4cb 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/mlmodel_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/mlmodel_index_mapping.json @@ -785,6 +785,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/pipeline_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/pipeline_index_mapping.json index 9b86f1c7d28e..3bbe8cf41ac2 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/pipeline_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/pipeline_index_mapping.json @@ -682,6 +682,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/search_entity_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/search_entity_index_mapping.json index f7dc94c44089..22a4befa5a75 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/search_entity_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/search_entity_index_mapping.json @@ -748,6 +748,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/spreadsheet_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/spreadsheet_index_mapping.json index b5039b2f1716..cf72763e6088 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/spreadsheet_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/spreadsheet_index_mapping.json @@ -711,6 +711,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/stored_procedure_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/stored_procedure_index_mapping.json index bbc0829bd210..04e7b62a5e87 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/stored_procedure_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/stored_procedure_index_mapping.json @@ -871,6 +871,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/table_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/table_index_mapping.json index 227e47d76779..09ad09794a08 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/table_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/table_index_mapping.json @@ -1011,6 +1011,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/tag_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/tag_index_mapping.json index e5dffd841d1f..416616be0a7f 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/tag_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/tag_index_mapping.json @@ -326,6 +326,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/topic_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/topic_index_mapping.json index 6b38f31bd482..b3a1c96ef540 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/topic_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/topic_index_mapping.json @@ -725,6 +725,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/worksheet_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/worksheet_index_mapping.json index c82e9c871640..97b819a7e4ea 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/worksheet_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/worksheet_index_mapping.json @@ -786,6 +786,9 @@ "textToEmbed": { "type": "text" }, + "textToEmbedSemantic": { + "type": "text" + }, "chunkIndex": { "type": "integer" }, From 1ef331a2f9f1a37ac16626ca0e760ca07a4d1ece Mon Sep 17 00:00:00 2001 From: lautel Date: Mon, 20 Apr 2026 10:56:44 +0200 Subject: [PATCH 2/3] Rename textToEmbed > textToLLMContext and textToEmbedSemantic > textToEmbed --- .../it/tests/PatchTableEmbeddingIT.java | 7 ++-- .../tests/VectorEmbeddingIntegrationIT.java | 13 ++++--- .../mcp/tools/SemanticSearchToolTest.java | 5 ++- .../search/vector/VectorDocBuilder.java | 14 +++---- .../search/vector/VectorDocBuilderTest.java | 39 ++++++++++--------- .../en/api_collection_index_mapping.json | 4 +- .../en/api_endpoint_index_mapping.json | 4 +- .../elasticsearch/en/chart_index_mapping.json | 4 +- .../en/container_index_mapping.json | 4 +- .../dashboard_data_model_index_mapping.json | 4 +- .../en/dashboard_index_mapping.json | 4 +- .../en/data_products_index_mapping.json | 4 +- .../en/database_index_mapping.json | 4 +- .../en/database_schema_index_mapping.json | 4 +- .../en/directory_index_mapping.json | 4 +- .../elasticsearch/en/file_index_mapping.json | 4 +- .../en/glossary_index_mapping.json | 4 +- .../en/glossary_term_index_mapping.json | 4 +- .../en/metric_index_mapping.json | 4 +- .../en/mlmodel_index_mapping.json | 4 +- .../en/pipeline_index_mapping.json | 4 +- .../en/search_entity_index_mapping.json | 4 +- .../en/spreadsheet_index_mapping.json | 4 +- .../en/stored_procedure_index_mapping.json | 4 +- .../elasticsearch/en/table_index_mapping.json | 4 +- .../elasticsearch/en/tag_index_mapping.json | 4 +- .../elasticsearch/en/topic_index_mapping.json | 4 +- .../en/worksheet_index_mapping.json | 4 +- .../jp/api_collection_index_mapping.json | 4 +- .../jp/api_endpoint_index_mapping.json | 4 +- .../elasticsearch/jp/chart_index_mapping.json | 4 +- .../jp/container_index_mapping.json | 4 +- .../dashboard_data_model_index_mapping.json | 4 +- .../jp/dashboard_index_mapping.json | 4 +- .../jp/data_products_index_mapping.json | 4 +- .../jp/database_index_mapping.json | 4 +- .../jp/database_schema_index_mapping.json | 4 +- .../jp/directory_index_mapping.json | 4 +- .../elasticsearch/jp/file_index_mapping.json | 4 +- .../jp/glossary_index_mapping.json | 4 +- .../jp/glossary_term_index_mapping.json | 4 +- .../jp/metric_index_mapping.json | 4 +- .../jp/mlmodel_index_mapping.json | 4 +- .../jp/pipeline_index_mapping.json | 4 +- .../jp/search_entity_index_mapping.json | 4 +- .../jp/spreadsheet_index_mapping.json | 4 +- .../jp/stored_procedure_index_mapping.json | 4 +- .../elasticsearch/jp/table_index_mapping.json | 4 +- .../elasticsearch/jp/tag_index_mapping.json | 4 +- .../elasticsearch/jp/topic_index_mapping.json | 4 +- .../jp/worksheet_index_mapping.json | 4 +- .../ru/api_collection_index_mapping.json | 4 +- .../ru/api_endpoint_index_mapping.json | 4 +- .../elasticsearch/ru/chart_index_mapping.json | 4 +- .../ru/container_index_mapping.json | 4 +- .../dashboard_data_model_index_mapping.json | 4 +- .../ru/dashboard_index_mapping.json | 4 +- .../ru/data_products_index_mapping.json | 4 +- .../ru/database_index_mapping.json | 4 +- .../ru/database_schema_index_mapping.json | 4 +- .../ru/directory_index_mapping.json | 4 +- .../elasticsearch/ru/file_index_mapping.json | 4 +- .../ru/glossary_index_mapping.json | 4 +- .../ru/glossary_term_index_mapping.json | 4 +- .../ru/metric_index_mapping.json | 4 +- .../ru/mlmodel_index_mapping.json | 4 +- .../ru/pipeline_index_mapping.json | 4 +- .../ru/search_entity_index_mapping.json | 4 +- .../ru/spreadsheet_index_mapping.json | 4 +- .../ru/stored_procedure_index_mapping.json | 4 +- .../elasticsearch/ru/table_index_mapping.json | 4 +- .../elasticsearch/ru/tag_index_mapping.json | 4 +- .../elasticsearch/ru/topic_index_mapping.json | 4 +- .../ru/worksheet_index_mapping.json | 4 +- .../zh/api_collection_index_mapping.json | 4 +- .../zh/api_endpoint_index_mapping.json | 4 +- .../elasticsearch/zh/chart_index_mapping.json | 4 +- .../zh/container_index_mapping.json | 4 +- .../dashboard_data_model_index_mapping.json | 4 +- .../zh/dashboard_index_mapping.json | 4 +- .../zh/data_products_index_mapping.json | 4 +- .../zh/database_index_mapping.json | 4 +- .../zh/database_schema_index_mapping.json | 4 +- .../zh/directory_index_mapping.json | 4 +- .../elasticsearch/zh/file_index_mapping.json | 4 +- .../zh/glossary_index_mapping.json | 4 +- .../zh/glossary_term_index_mapping.json | 4 +- .../zh/metric_index_mapping.json | 4 +- .../zh/mlmodel_index_mapping.json | 4 +- .../zh/pipeline_index_mapping.json | 4 +- .../zh/search_entity_index_mapping.json | 4 +- .../zh/spreadsheet_index_mapping.json | 4 +- .../zh/stored_procedure_index_mapping.json | 4 +- .../elasticsearch/zh/table_index_mapping.json | 4 +- .../elasticsearch/zh/tag_index_mapping.json | 4 +- .../elasticsearch/zh/topic_index_mapping.json | 4 +- .../zh/worksheet_index_mapping.json | 4 +- 97 files changed, 225 insertions(+), 221 deletions(-) diff --git a/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/PatchTableEmbeddingIT.java b/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/PatchTableEmbeddingIT.java index e6b265c6f8f8..1044ef576b76 100644 --- a/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/PatchTableEmbeddingIT.java +++ b/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/PatchTableEmbeddingIT.java @@ -113,10 +113,11 @@ private void runEmbeddingTest(TestNamespace ns, SearchRepository searchRepo) thr updatedFingerprint, "Fingerprint should change after description update"); - String textToEmbed = getFieldFromDoc(searchClient, entityIndexName, tableId, "textToEmbed"); + String textToLLMContext = + getFieldFromDoc(searchClient, entityIndexName, tableId, "textToLLMContext"); assertTrue( - textToEmbed.contains("Revenue metrics"), - "textToEmbed should reflect the patched description"); + textToLLMContext.contains("Revenue metrics"), + "textToLLMContext should reflect the patched description"); String embeddingJson = getFieldFromDoc(searchClient, entityIndexName, tableId, "embedding"); assertNotNull(embeddingJson, "Embedding vector should exist after PATCH"); diff --git a/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/VectorEmbeddingIntegrationIT.java b/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/VectorEmbeddingIntegrationIT.java index 7f4e16749928..3057cc6c41fb 100644 --- a/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/VectorEmbeddingIntegrationIT.java +++ b/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/VectorEmbeddingIntegrationIT.java @@ -128,7 +128,7 @@ void testEntityEmbeddingCreationViaPartialUpdate() throws Exception { Map doc = getDocumentById(testTable.getId().toString()); assertNotNull(doc, "Entity document should exist"); - assertNotNull(doc.get("textToEmbed"), "Document should have text_to_embed"); + assertNotNull(doc.get("textToLLMContext"), "Document should have textToLLMContext"); assertNotNull(doc.get("embedding"), "Document should have embedding"); assertNotNull(doc.get("fingerprint"), "Document should have fingerprint"); assertEquals( @@ -323,7 +323,7 @@ void testGenerateEmbeddingFields() { assertNotNull(fields); assertNotNull(fields.get("embedding")); - assertNotNull(fields.get("textToEmbed")); + assertNotNull(fields.get("textToLLMContext")); assertNotNull(fields.get("fingerprint")); assertEquals(testTable.getId().toString(), fields.get("parentId")); assertEquals(0, fields.get("chunkIndex")); @@ -347,7 +347,7 @@ void testPatchTableDescriptionUpdatesEmbeddingForSemanticSearch() throws Excepti Map initialDoc = getDocumentById(testTable.getId().toString()); String initialFingerprint = (String) initialDoc.get("fingerprint"); - String initialTextToEmbed = (String) initialDoc.get("textToEmbed"); + String initialTextToEmbed = (String) initialDoc.get("textToLLMContext"); String patchedDescription = "Revenue metrics for quarterly financial reporting analysis"; testTable.setDescription(patchedDescription); @@ -358,15 +358,16 @@ void testPatchTableDescriptionUpdatesEmbeddingForSemanticSearch() throws Excepti Map updatedDoc = getDocumentById(testTable.getId().toString()); String updatedFingerprint = (String) updatedDoc.get("fingerprint"); - String updatedTextToEmbed = (String) updatedDoc.get("textToEmbed"); + String updatedTextToEmbed = (String) updatedDoc.get("textToLLMContext"); assertFalse( initialFingerprint.equals(updatedFingerprint), "Fingerprint should change after PATCH"); assertFalse( - initialTextToEmbed.equals(updatedTextToEmbed), "textToEmbed should change after PATCH"); + initialTextToEmbed.equals(updatedTextToEmbed), + "textToLLMContext should change after PATCH"); assertTrue( updatedTextToEmbed.contains("Revenue metrics"), - "Updated textToEmbed should reflect patched description"); + "Updated textToLLMContext should reflect patched description"); List> results = executeKnnSearch("quarterly financial revenue reporting", 10); diff --git a/openmetadata-mcp/src/test/java/org/openmetadata/mcp/tools/SemanticSearchToolTest.java b/openmetadata-mcp/src/test/java/org/openmetadata/mcp/tools/SemanticSearchToolTest.java index c486c5728705..d2021de4c727 100644 --- a/openmetadata-mcp/src/test/java/org/openmetadata/mcp/tools/SemanticSearchToolTest.java +++ b/openmetadata-mcp/src/test/java/org/openmetadata/mcp/tools/SemanticSearchToolTest.java @@ -203,7 +203,8 @@ void testHitFieldsCleaned() throws Exception { hit.put("columns", List.of(Map.of("name", "id", "dataType", "INT"))); hit.put("embedding", new float[] {0.1f, 0.2f}); hit.put("fingerprint", "abc123"); - hit.put("textToEmbed", "name: users; entityType: table | description: A short description"); + hit.put( + "textToLLMContext", "name: users; entityType: table | description: A short description"); VectorSearchResponse response = new VectorSearchResponse(10L, List.of(hit)); @@ -233,7 +234,7 @@ void testHitFieldsCleaned() throws Exception { assertTrue(!cleaned.containsKey("_score")); assertTrue(!cleaned.containsKey("embedding")); assertTrue(!cleaned.containsKey("fingerprint")); - assertTrue(!cleaned.containsKey("textToEmbed")); + assertTrue(!cleaned.containsKey("textToLLMContext")); } } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/VectorDocBuilder.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/VectorDocBuilder.java index dbef38dc5f13..96d5c241a086 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/VectorDocBuilder.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/VectorDocBuilder.java @@ -110,10 +110,10 @@ public static List> fromEntity( /** * Generate embedding fields to merge into an entity's search index document. Returns a map with: - * embedding, textToEmbed, textToEmbedSemantic, chunkIndex, chunkCount, parentId, fingerprint. + * embedding, textToLLMContext, textToEmbed, chunkIndex, chunkCount, parentId, fingerprint. * - *

{@code textToEmbed} preserves the legacy rich-context format (empty fields rendered as - * {@code []}) and is consumed by agent tooling as LLM context. {@code textToEmbedSemantic} is + *

{@code textToLLMContext} preserves the legacy rich-context format (empty fields rendered as + * {@code []}) and is consumed by agent tooling as LLM context. {@code textToEmbed} is * the compact variant that omits empty fields and is the actual input fed to the embedding * model. */ @@ -133,17 +133,17 @@ public static Map buildEmbeddingFields( List semanticChunks = TextChunkManager.chunk(semanticBody); String contTag = ""; - String textToEmbed = + String textToLLMContext = String.format("%s%s%s | chunk %d/%d", metaLight, contTag, chunks.get(0), 1, chunkCount); String semanticBodyChunk = semanticChunks.get(0); - String textToEmbedSemantic = joinSemanticParts(semanticMetaLight, semanticBodyChunk); + String textToEmbed = joinSemanticParts(semanticMetaLight, semanticBodyChunk); - float[] embedding = embeddingClient.embed(textToEmbedSemantic); + float[] embedding = embeddingClient.embed(textToEmbed); Map fields = new HashMap<>(); fields.put("embedding", embedding); + fields.put("textToLLMContext", textToLLMContext); fields.put("textToEmbed", textToEmbed); - fields.put("textToEmbedSemantic", textToEmbedSemantic); fields.put("chunkIndex", 0); fields.put("chunkCount", chunkCount); fields.put("parentId", parentId); diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/search/vector/VectorDocBuilderTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/search/vector/VectorDocBuilderTest.java index 468329e66465..0c881b8d2012 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/search/vector/VectorDocBuilderTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/search/vector/VectorDocBuilderTest.java @@ -43,8 +43,8 @@ void testBuildEmbeddingFieldsBasic() { assertNotNull(fields); assertEquals(table.getId().toString(), fields.get("parentId")); assertNotNull(fields.get("embedding")); + assertNotNull(fields.get("textToLLMContext")); assertNotNull(fields.get("textToEmbed")); - assertNotNull(fields.get("textToEmbedSemantic")); assertNotNull(fields.get("fingerprint")); assertEquals(0, fields.get("chunkIndex")); assertTrue((int) fields.get("chunkCount") >= 1); @@ -103,7 +103,7 @@ void testSemanticTextPrependsTypeLabelWhenContentIsEmpty() { table.setDeleted(false); Map fields = VectorDocBuilder.buildEmbeddingFields(table, MOCK_CLIENT); - String semantic = (String) fields.get("textToEmbedSemantic"); + String semantic = (String) fields.get("textToEmbed"); assertEquals("table lonely", semantic); } @@ -113,7 +113,7 @@ void testSemanticTextJoinsMetaAndBodyWithPeriod() { Table table = createTestTable("customers", "Customers dashboard", "A sample dashboard"); Map fields = VectorDocBuilder.buildEmbeddingFields(table, MOCK_CLIENT); - String semantic = (String) fields.get("textToEmbedSemantic"); + String semantic = (String) fields.get("textToEmbed"); assertTrue(semantic.startsWith("table Customers dashboard (customers)")); assertTrue(semantic.contains(". A sample dashboard")); @@ -178,10 +178,11 @@ void testTextToEmbedRemainsLegacyFormat() { Table table = createTestTable("orders", null, "Order table"); Map fields = VectorDocBuilder.buildEmbeddingFields(table, MOCK_CLIENT); - String legacy = (String) fields.get("textToEmbed"); - String semantic = (String) fields.get("textToEmbedSemantic"); + String legacy = (String) fields.get("textToLLMContext"); + String semantic = (String) fields.get("textToEmbed"); - assertTrue(legacy.contains("displayName: []"), "legacy textToEmbed keeps empty placeholders"); + assertTrue( + legacy.contains("displayName: []"), "legacy textToLLMContext keeps empty placeholders"); assertTrue(legacy.contains(" | chunk 1/")); assertFalse(semantic.contains("[]")); assertFalse(semantic.contains("name:")); @@ -206,10 +207,10 @@ void testBuildEmbeddingFieldsTextToEmbedContainsEntityInfo() { Map fields = VectorDocBuilder.buildEmbeddingFields(table, MOCK_CLIENT); - String textToEmbed = (String) fields.get("textToEmbed"); - assertNotNull(textToEmbed); - assertTrue(textToEmbed.contains("info_table")); - assertTrue(textToEmbed.contains("Important description")); + String textToLLMContext = (String) fields.get("textToLLMContext"); + assertNotNull(textToLLMContext); + assertTrue(textToLLMContext.contains("info_table")); + assertTrue(textToLLMContext.contains("Important description")); } @Test @@ -397,11 +398,11 @@ void testBuildEmbeddingFieldsWithGlossaryTermRelations() { assertNotNull(fields); assertNotNull(fields.get("embedding")); - assertNotNull(fields.get("textToEmbed")); - String textToEmbed = (String) fields.get("textToEmbed"); - assertTrue(textToEmbed.contains("finance.profit")); - assertTrue(textToEmbed.contains("finance.cost")); - assertTrue(textToEmbed.contains("relatedTerms:")); + assertNotNull(fields.get("textToLLMContext")); + String textToLLMContext = (String) fields.get("textToLLMContext"); + assertTrue(textToLLMContext.contains("finance.profit")); + assertTrue(textToLLMContext.contains("finance.cost")); + assertTrue(textToLLMContext.contains("relatedTerms:")); } @Test @@ -412,10 +413,10 @@ void testBuildEmbeddingFieldsWithGlossaryTermNoRelatedTerms() { Map fields = VectorDocBuilder.buildEmbeddingFields(term, MOCK_CLIENT); assertNotNull(fields); - assertNotNull(fields.get("textToEmbed")); - String textToEmbed = (String) fields.get("textToEmbed"); - assertTrue(textToEmbed.contains("relatedTerms:")); - assertFalse(textToEmbed.contains("finance.")); + assertNotNull(fields.get("textToLLMContext")); + String textToLLMContext = (String) fields.get("textToLLMContext"); + assertTrue(textToLLMContext.contains("relatedTerms:")); + assertFalse(textToLLMContext.contains("finance.")); } @Test diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/api_collection_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/api_collection_index_mapping.json index 04b0a259b51e..c36fa4d03d72 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/api_collection_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/api_collection_index_mapping.json @@ -715,10 +715,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/api_endpoint_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/api_endpoint_index_mapping.json index 4b16ca605348..172cfb6b955b 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/api_endpoint_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/api_endpoint_index_mapping.json @@ -939,10 +939,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/chart_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/chart_index_mapping.json index b4f970e09bdc..d01450a51146 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/chart_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/chart_index_mapping.json @@ -666,10 +666,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/container_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/container_index_mapping.json index c62ac62e64e8..e43a56c4874b 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/container_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/container_index_mapping.json @@ -920,10 +920,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/dashboard_data_model_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/dashboard_data_model_index_mapping.json index 45704a4e24ee..89106e3b1d2d 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/dashboard_data_model_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/dashboard_data_model_index_mapping.json @@ -774,10 +774,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/dashboard_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/dashboard_index_mapping.json index a6e4a739754e..b54e245c00ad 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/dashboard_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/dashboard_index_mapping.json @@ -807,10 +807,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/data_products_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/data_products_index_mapping.json index 4436a30a9b1d..7c34faaa9d5c 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/data_products_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/data_products_index_mapping.json @@ -615,10 +615,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/database_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/database_index_mapping.json index 76784d9a3845..403cc26438c6 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/database_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/database_index_mapping.json @@ -690,10 +690,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/database_schema_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/database_schema_index_mapping.json index 4d7c1e80d436..6e51ed087e7b 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/database_schema_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/database_schema_index_mapping.json @@ -654,10 +654,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/directory_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/directory_index_mapping.json index 22b271f77335..1c167e1b8d37 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/directory_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/directory_index_mapping.json @@ -756,10 +756,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/file_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/file_index_mapping.json index 6806197583b2..d8684473b2f9 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/file_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/file_index_mapping.json @@ -808,10 +808,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/glossary_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/glossary_index_mapping.json index dd51101c637d..d985b9e70ac2 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/glossary_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/glossary_index_mapping.json @@ -395,10 +395,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/glossary_term_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/glossary_term_index_mapping.json index 7d913da5f17b..0376916225fc 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/glossary_term_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/glossary_term_index_mapping.json @@ -548,10 +548,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/metric_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/metric_index_mapping.json index d6b263c41320..08a1713c3823 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/metric_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/metric_index_mapping.json @@ -681,10 +681,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/mlmodel_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/mlmodel_index_mapping.json index f2f4b67c30da..2d4f2f6a9af9 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/mlmodel_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/mlmodel_index_mapping.json @@ -815,10 +815,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/pipeline_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/pipeline_index_mapping.json index 6873215cafc2..ea5227c5ce7f 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/pipeline_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/pipeline_index_mapping.json @@ -722,10 +722,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/search_entity_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/search_entity_index_mapping.json index 4d0186ab3a76..c33454f4587e 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/search_entity_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/search_entity_index_mapping.json @@ -777,10 +777,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/spreadsheet_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/spreadsheet_index_mapping.json index b61ba8968dca..67337f1755c5 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/spreadsheet_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/spreadsheet_index_mapping.json @@ -797,10 +797,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/stored_procedure_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/stored_procedure_index_mapping.json index a7d7673bcdce..3bcc49fff2d9 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/stored_procedure_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/stored_procedure_index_mapping.json @@ -795,10 +795,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/table_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/table_index_mapping.json index 8dab63c02ae1..98bdec5352e6 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/table_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/table_index_mapping.json @@ -1039,10 +1039,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/tag_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/tag_index_mapping.json index f9bd6fa3b80d..d42f307ae37e 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/tag_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/tag_index_mapping.json @@ -379,10 +379,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/topic_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/topic_index_mapping.json index deb6565448f6..1265f2beb7cc 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/topic_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/topic_index_mapping.json @@ -802,10 +802,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/worksheet_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/worksheet_index_mapping.json index 794bd2445e15..70703bb99111 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/worksheet_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/worksheet_index_mapping.json @@ -921,10 +921,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/api_collection_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/api_collection_index_mapping.json index 1ba391617a19..e125214ddc98 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/api_collection_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/api_collection_index_mapping.json @@ -688,10 +688,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/api_endpoint_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/api_endpoint_index_mapping.json index 29891b2cc9db..1b81b53e5165 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/api_endpoint_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/api_endpoint_index_mapping.json @@ -902,10 +902,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/chart_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/chart_index_mapping.json index 495132a7260b..8d30a8b09c3c 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/chart_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/chart_index_mapping.json @@ -694,10 +694,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/container_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/container_index_mapping.json index e4966d924cae..abddb5364546 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/container_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/container_index_mapping.json @@ -842,10 +842,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/dashboard_data_model_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/dashboard_data_model_index_mapping.json index 1f5ad13e20a6..4298af04f18a 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/dashboard_data_model_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/dashboard_data_model_index_mapping.json @@ -727,10 +727,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/dashboard_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/dashboard_index_mapping.json index cc24cb35ffa0..c6bbeefe036c 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/dashboard_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/dashboard_index_mapping.json @@ -789,10 +789,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/data_products_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/data_products_index_mapping.json index ccb3198fc56a..39b9818ce14b 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/data_products_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/data_products_index_mapping.json @@ -618,10 +618,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/database_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/database_index_mapping.json index 6e5d1cf6f1b0..4e8543cfc103 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/database_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/database_index_mapping.json @@ -681,10 +681,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/database_schema_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/database_schema_index_mapping.json index 3fbff97aa6f0..cbf44c10cd80 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/database_schema_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/database_schema_index_mapping.json @@ -645,10 +645,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/directory_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/directory_index_mapping.json index 775fc4627570..9a377c7b800a 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/directory_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/directory_index_mapping.json @@ -738,10 +738,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/file_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/file_index_mapping.json index 040a397f1add..e3d1746cd9ca 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/file_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/file_index_mapping.json @@ -753,10 +753,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/glossary_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/glossary_index_mapping.json index 6fe55702a1a4..b9687fb48ef8 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/glossary_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/glossary_index_mapping.json @@ -391,10 +391,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/glossary_term_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/glossary_term_index_mapping.json index 7f40c9790fd1..5e52a1f95c08 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/glossary_term_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/glossary_term_index_mapping.json @@ -545,10 +545,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/metric_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/metric_index_mapping.json index f1c31da3230c..082fa1201e6e 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/metric_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/metric_index_mapping.json @@ -665,10 +665,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/mlmodel_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/mlmodel_index_mapping.json index f4a6c3c2d1de..b98478bf0eea 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/mlmodel_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/mlmodel_index_mapping.json @@ -791,10 +791,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/pipeline_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/pipeline_index_mapping.json index 4ee939a0d537..ac62855eb46b 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/pipeline_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/pipeline_index_mapping.json @@ -679,10 +679,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/search_entity_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/search_entity_index_mapping.json index a4a8f0cb187f..6ac15cffbb89 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/search_entity_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/search_entity_index_mapping.json @@ -764,10 +764,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/spreadsheet_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/spreadsheet_index_mapping.json index e5cb99f96c16..da38cc093cde 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/spreadsheet_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/spreadsheet_index_mapping.json @@ -738,10 +738,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/stored_procedure_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/stored_procedure_index_mapping.json index 3b465d55d9db..731bbed20021 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/stored_procedure_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/stored_procedure_index_mapping.json @@ -871,10 +871,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/table_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/table_index_mapping.json index 612939e0c0f2..705fba021c3d 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/table_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/table_index_mapping.json @@ -1020,10 +1020,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/tag_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/tag_index_mapping.json index c5407048dfd2..e9d180584a2e 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/tag_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/tag_index_mapping.json @@ -328,10 +328,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/topic_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/topic_index_mapping.json index 56d09a736f48..3ea0e1b164d6 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/topic_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/topic_index_mapping.json @@ -774,10 +774,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/worksheet_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/worksheet_index_mapping.json index 97aee56edc2e..996c4f730ecd 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/worksheet_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/worksheet_index_mapping.json @@ -811,10 +811,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/api_collection_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/api_collection_index_mapping.json index a65b9f503caa..3a6b4292272e 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/api_collection_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/api_collection_index_mapping.json @@ -732,10 +732,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/api_endpoint_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/api_endpoint_index_mapping.json index b239b938d836..57c5b1d1031f 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/api_endpoint_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/api_endpoint_index_mapping.json @@ -956,10 +956,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/chart_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/chart_index_mapping.json index 322b85d8579b..c753bae4eabd 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/chart_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/chart_index_mapping.json @@ -683,10 +683,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/container_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/container_index_mapping.json index d29862eb1e9e..22a1d78cbbda 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/container_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/container_index_mapping.json @@ -893,10 +893,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/dashboard_data_model_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/dashboard_data_model_index_mapping.json index 598904407578..f303c44687c0 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/dashboard_data_model_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/dashboard_data_model_index_mapping.json @@ -746,10 +746,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/dashboard_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/dashboard_index_mapping.json index f118c2c70739..a98e727084e8 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/dashboard_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/dashboard_index_mapping.json @@ -824,10 +824,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/data_products_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/data_products_index_mapping.json index b70117273f53..33a36fccd183 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/data_products_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/data_products_index_mapping.json @@ -627,10 +627,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/database_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/database_index_mapping.json index 38cb7c480e47..28ddf6277ff6 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/database_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/database_index_mapping.json @@ -707,10 +707,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/database_schema_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/database_schema_index_mapping.json index b99ed27b008e..1c6dc4fd99dd 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/database_schema_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/database_schema_index_mapping.json @@ -671,10 +671,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/directory_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/directory_index_mapping.json index 86766e51d223..be12a419f4e1 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/directory_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/directory_index_mapping.json @@ -639,10 +639,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/file_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/file_index_mapping.json index d28d84646c33..7ba08002a671 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/file_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/file_index_mapping.json @@ -694,10 +694,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/glossary_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/glossary_index_mapping.json index daa2c2d1a457..08931778c2ae 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/glossary_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/glossary_index_mapping.json @@ -413,10 +413,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/glossary_term_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/glossary_term_index_mapping.json index 8f5c9b99865f..6779763ead55 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/glossary_term_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/glossary_term_index_mapping.json @@ -566,10 +566,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/metric_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/metric_index_mapping.json index 684364dbb4c8..d255839eb1e8 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/metric_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/metric_index_mapping.json @@ -651,10 +651,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/mlmodel_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/mlmodel_index_mapping.json index dbcb8f57d8b6..1e0a0a13f43e 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/mlmodel_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/mlmodel_index_mapping.json @@ -832,10 +832,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/pipeline_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/pipeline_index_mapping.json index 652316cdb0a6..3fbe642d2e72 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/pipeline_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/pipeline_index_mapping.json @@ -739,10 +739,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/search_entity_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/search_entity_index_mapping.json index 758a63142c4f..42be6ab4ebff 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/search_entity_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/search_entity_index_mapping.json @@ -794,10 +794,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/spreadsheet_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/spreadsheet_index_mapping.json index c8a72689ae9c..307909569845 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/spreadsheet_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/spreadsheet_index_mapping.json @@ -693,10 +693,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/stored_procedure_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/stored_procedure_index_mapping.json index 756996260db0..37b25ea16527 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/stored_procedure_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/stored_procedure_index_mapping.json @@ -812,10 +812,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/table_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/table_index_mapping.json index 0145d45c896a..a4da935bad97 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/table_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/table_index_mapping.json @@ -1026,10 +1026,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/tag_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/tag_index_mapping.json index a57e59b03608..877ee0777cb2 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/tag_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/tag_index_mapping.json @@ -392,10 +392,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/topic_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/topic_index_mapping.json index abc0d275cf71..f765d3ea65cf 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/topic_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/topic_index_mapping.json @@ -819,10 +819,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/worksheet_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/worksheet_index_mapping.json index 674578dab391..68bf9a877479 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/worksheet_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/worksheet_index_mapping.json @@ -753,10 +753,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/api_collection_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/api_collection_index_mapping.json index 9a3410e48be0..e86da4b52861 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/api_collection_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/api_collection_index_mapping.json @@ -688,10 +688,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/api_endpoint_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/api_endpoint_index_mapping.json index 66e424efd8ce..bf1f64a7634a 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/api_endpoint_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/api_endpoint_index_mapping.json @@ -903,10 +903,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/chart_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/chart_index_mapping.json index a69fbe6bab96..c250ee0c43d2 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/chart_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/chart_index_mapping.json @@ -680,10 +680,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/container_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/container_index_mapping.json index cb409a760e9c..bf07cb257abc 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/container_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/container_index_mapping.json @@ -844,10 +844,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/dashboard_data_model_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/dashboard_data_model_index_mapping.json index 4a316fb4f4c6..7a4874e25f2f 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/dashboard_data_model_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/dashboard_data_model_index_mapping.json @@ -725,10 +725,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/dashboard_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/dashboard_index_mapping.json index 4e3a6d0e0234..2a0bf0b4ca0a 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/dashboard_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/dashboard_index_mapping.json @@ -746,10 +746,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/data_products_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/data_products_index_mapping.json index 57607b57e94b..57b7f0c8cf59 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/data_products_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/data_products_index_mapping.json @@ -611,10 +611,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/database_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/database_index_mapping.json index a7ab80bdf26c..82b79b92a0cf 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/database_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/database_index_mapping.json @@ -664,10 +664,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/database_schema_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/database_schema_index_mapping.json index a22fa2607d19..9b5748f061ee 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/database_schema_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/database_schema_index_mapping.json @@ -624,10 +624,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/directory_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/directory_index_mapping.json index 981f9ca0aa72..79048659730a 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/directory_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/directory_index_mapping.json @@ -711,10 +711,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/file_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/file_index_mapping.json index 61ab7481598e..b59448d76373 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/file_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/file_index_mapping.json @@ -726,10 +726,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/glossary_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/glossary_index_mapping.json index aefb277fa932..4b16d41c17e3 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/glossary_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/glossary_index_mapping.json @@ -334,10 +334,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/glossary_term_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/glossary_term_index_mapping.json index 0922707af05e..dac786d82ed7 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/glossary_term_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/glossary_term_index_mapping.json @@ -508,10 +508,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/metric_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/metric_index_mapping.json index 6597a09c030a..12dc8aaf3fd5 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/metric_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/metric_index_mapping.json @@ -657,10 +657,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/mlmodel_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/mlmodel_index_mapping.json index 0570073f8d9c..11c8edb1b19c 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/mlmodel_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/mlmodel_index_mapping.json @@ -785,10 +785,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/pipeline_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/pipeline_index_mapping.json index 600a9201d1fc..23c504b3b145 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/pipeline_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/pipeline_index_mapping.json @@ -682,10 +682,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/search_entity_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/search_entity_index_mapping.json index 68a82f82cb7c..e9800c7e815a 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/search_entity_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/search_entity_index_mapping.json @@ -748,10 +748,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/spreadsheet_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/spreadsheet_index_mapping.json index 6e7f32a7cdd2..c2a609f00a7c 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/spreadsheet_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/spreadsheet_index_mapping.json @@ -711,10 +711,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/stored_procedure_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/stored_procedure_index_mapping.json index 0456d9af17ac..e3d692961c18 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/stored_procedure_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/stored_procedure_index_mapping.json @@ -871,10 +871,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/table_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/table_index_mapping.json index 6629a626d3bf..5d160188175b 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/table_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/table_index_mapping.json @@ -1011,10 +1011,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/tag_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/tag_index_mapping.json index 416616be0a7f..cbb68e2d20a7 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/tag_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/tag_index_mapping.json @@ -323,10 +323,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/topic_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/topic_index_mapping.json index 36da9c782b05..d000c8f917fb 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/topic_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/topic_index_mapping.json @@ -725,10 +725,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/worksheet_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/worksheet_index_mapping.json index 83c8f001f695..e6835a29fb7f 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/worksheet_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/worksheet_index_mapping.json @@ -786,10 +786,10 @@ "fingerprint": { "type": "keyword" }, - "textToEmbed": { + "textToLLMContext": { "type": "text" }, - "textToEmbedSemantic": { + "textToEmbed": { "type": "text" }, "chunkIndex": { From 843e67e4173cf01f65817e5473f91120ac5f55d2 Mon Sep 17 00:00:00 2001 From: lautel Date: Tue, 21 Apr 2026 10:56:39 +0200 Subject: [PATCH 3/3] Avoid reflection usage --- .../search/vector/VectorDocBuilder.java | 131 +++++++++++------- 1 file changed, 78 insertions(+), 53 deletions(-) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/VectorDocBuilder.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/VectorDocBuilder.java index 96d5c241a086..521081dc503b 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/VectorDocBuilder.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/vector/VectorDocBuilder.java @@ -8,20 +8,28 @@ import java.util.Map; import java.util.Objects; import java.util.concurrent.ConcurrentHashMap; +import java.util.function.BiConsumer; +import java.util.function.Function; import java.util.stream.Collectors; import lombok.experimental.UtilityClass; import lombok.extern.slf4j.Slf4j; import org.openmetadata.schema.EntityInterface; import org.openmetadata.schema.api.data.MetricExpression; +import org.openmetadata.schema.entity.data.APICollection; +import org.openmetadata.schema.entity.data.Container; +import org.openmetadata.schema.entity.data.Database; +import org.openmetadata.schema.entity.data.DatabaseSchema; import org.openmetadata.schema.entity.data.Glossary; import org.openmetadata.schema.entity.data.GlossaryTerm; import org.openmetadata.schema.entity.data.Metric; import org.openmetadata.schema.entity.data.Table; +import org.openmetadata.schema.entity.domains.DataProduct; import org.openmetadata.schema.type.AssetCertification; import org.openmetadata.schema.type.Column; import org.openmetadata.schema.type.EntityReference; import org.openmetadata.schema.type.TagLabel; import org.openmetadata.schema.type.TermRelation; +import org.openmetadata.service.Entity; import org.openmetadata.service.search.vector.client.EmbeddingClient; import org.openmetadata.service.search.vector.utils.TextChunkManager; @@ -57,17 +65,37 @@ public interface BodyTextExtractor { * Child-entity enumeration spec for container-like types. When an entity has children on the * object (populated during reindexing via {@code fields=*}), their names are joined into a * short natural-language phrase and appended to the semantic body, so queries match against - * what a container actually contains. + * what a container actually contains. The cast inside each getter is guarded by the map key: + * an entry keyed by {@link Entity#DATABASE} is only consulted for {@link Database} entities. */ - private record SemanticChildrenSpec(String getterName, String phrasePrefix) {} + private record SemanticChildrenSpec( + Function> childGetter, String phrasePrefix) {} private static final Map SEMANTIC_CHILDREN_SPECS = Map.of( - "database", new SemanticChildrenSpec("getDatabaseSchemas", "Contains schemas"), - "databaseSchema", new SemanticChildrenSpec("getTables", "Contains tables"), - "apiCollection", new SemanticChildrenSpec("getApiEndpoints", "Contains endpoints"), - "container", new SemanticChildrenSpec("getChildren", "Contains"), - "dataProduct", new SemanticChildrenSpec("getAssets", "Contains assets")); + Entity.DATABASE, + new SemanticChildrenSpec( + e -> ((Database) e).getDatabaseSchemas(), "Contains schemas"), + Entity.DATABASE_SCHEMA, + new SemanticChildrenSpec(e -> ((DatabaseSchema) e).getTables(), "Contains tables"), + Entity.API_COLLECTION, + new SemanticChildrenSpec( + e -> ((APICollection) e).getApiEndpoints(), "Contains endpoints"), + Entity.CONTAINER, + new SemanticChildrenSpec(e -> ((Container) e).getChildren(), "Contains"), + Entity.DATA_PRODUCT, + new SemanticChildrenSpec(e -> ((DataProduct) e).getAssets(), "Contains assets")); + + /** + * Entity-type-specific enrichments appended to {@link #buildSemanticMetaLightText} after the + * shared subject/type phrase. Table-driven so new type enrichers are one map entry rather than + * another {@code instanceof} branch. + */ + private static final Map, EntityInterface>> SEMANTIC_ENRICHERS = + Map.of( + Entity.GLOSSARY_TERM, + (phrases, e) -> appendGlossaryTermPhrases(phrases, (GlossaryTerm) e), + Entity.METRIC, (phrases, e) -> appendMetricPhrases(phrases, (Metric) e)); /** * Register a custom {@link BodyTextExtractor} for an entity type. The registry is consulted by @@ -317,10 +345,27 @@ static String buildBodyText(EntityInterface entity, String entityType) { static String buildSemanticMetaLightText(EntityInterface entity, String entityType) { boolean isGlossary = entity instanceof Glossary; boolean isGlossaryTerm = entity instanceof GlossaryTerm; - boolean isMetric = entity instanceof Metric; List phrases = new ArrayList<>(); + appendSubjectPhrase(phrases, entity, entityType); + + BiConsumer, EntityInterface> enricher = SEMANTIC_ENRICHERS.get(entityType); + if (enricher != null) { + enricher.accept(phrases, entity); + } + + appendTagPhrases(phrases, entity, isGlossary, isGlossaryTerm); + appendDomainPhrase(phrases, entity); + if (!isGlossary && !isGlossaryTerm) { + appendTierAndCertificationPhrases(phrases, entity); + } + + return String.join(". ", phrases); + } + + private static void appendSubjectPhrase( + List phrases, EntityInterface entity, String entityType) { String name = entity.getName(); String displayName = entity.getDisplayName(); String subject = null; @@ -337,29 +382,18 @@ static String buildSemanticMetaLightText(EntityInterface entity, String entityTy } else if (subject != null) { phrases.add(subject); } + } - if (isGlossaryTerm) { - appendGlossaryTermPhrases(phrases, (GlossaryTerm) entity); - } - if (isMetric) { - appendMetricPhrases(phrases, (Metric) entity); + private static void appendTierAndCertificationPhrases( + List phrases, EntityInterface entity) { + String tier = extractTierLabel(entity); + if (tier != null) { + phrases.add(tier.replace('.', ' ')); } - - appendTagPhrases(phrases, entity, isGlossary, isGlossaryTerm); - appendDomainPhrase(phrases, entity); - - if (!isGlossary && !isGlossaryTerm) { - String tier = extractTierLabel(entity); - if (tier != null) { - phrases.add(tier.replace('.', ' ')); - } - String cert = extractCertificationLabel(entity); - if (cert != null) { - phrases.add(cert.replace('.', ' ')); - } + String cert = extractCertificationLabel(entity); + if (cert != null) { + phrases.add(cert.replace('.', ' ')); } - - return String.join(". ", phrases); } private static void appendGlossaryTermPhrases(List phrases, GlossaryTerm term) { @@ -504,10 +538,10 @@ static String humanizeEntityType(String entityType) { /** * Produce a "Contains X, Y, Z" phrase listing the names of a container entity's direct - * children (database schemas, tables, endpoints, charts, etc.). Children are read via - * reflection using the getter name in {@link #SEMANTIC_CHILDREN_SPECS}, so this does not - * introduce compile-time coupling to every container type. Returns null when the entity is - * not a known container, when the getter is missing, or when the child list is empty. + * children (database schemas, tables, endpoints, charts, etc.). The per-type getter is looked + * up in {@link #SEMANTIC_CHILDREN_SPECS} as a typed method reference, so this stays + * compile-time checked. Returns null when the entity is not a known container or when the + * child list is empty. */ static String buildChildContextPhrase(EntityInterface entity, String entityType) { if (entityType == null) { @@ -517,7 +551,7 @@ static String buildChildContextPhrase(EntityInterface entity, String entityType) if (spec == null) { return null; } - List childNames = readChildNames(entity, spec.getterName()); + List childNames = readChildNames(spec.childGetter().apply(entity)); if (childNames.isEmpty()) { return null; } @@ -528,28 +562,19 @@ static String buildChildContextPhrase(EntityInterface entity, String entityType) return spec.phrasePrefix() + " " + String.join(", ", limited); } - private static List readChildNames(EntityInterface entity, String getterName) { - try { - Method method = entity.getClass().getMethod(getterName); - Object result = method.invoke(entity); - if (!(result instanceof List refs) || refs.isEmpty()) { - return Collections.emptyList(); - } - List names = new ArrayList<>(refs.size()); - for (Object ref : refs) { - if (ref instanceof EntityReference entityRef) { - String displayName = entityRef.getDisplayName(); - String name = - displayName != null && !displayName.isBlank() ? displayName : entityRef.getName(); - if (name != null && !name.isBlank()) { - names.add(name); - } - } - } - return names; - } catch (Exception e) { + private static List readChildNames(List refs) { + if (refs == null || refs.isEmpty()) { return Collections.emptyList(); } + List names = new ArrayList<>(refs.size()); + for (EntityReference ref : refs) { + String displayName = ref.getDisplayName(); + String name = displayName != null && !displayName.isBlank() ? displayName : ref.getName(); + if (name != null && !name.isBlank()) { + names.add(name); + } + } + return names; } static String extractServiceType(EntityInterface entity) {