Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
ba6c9ea
fix(search): prevent ES/OS document rejections via engine-native mapp…
mohityadav766 Jun 3, 2026
ae11b47
Update generated TypeScript types
github-actions[bot] Jun 3, 2026
4a1afce
Merge branch 'main' into feat/schema-indexing-safety
mohityadav766 Jun 3, 2026
5f5d4b9
fix(search): address PR review feedback
mohityadav766 Jun 3, 2026
ec8a077
Update generated TypeScript types
github-actions[bot] Jun 3, 2026
5c79b0b
Merge branch 'main' into feat/schema-indexing-safety
mohityadav766 Jun 4, 2026
90947f1
Merge branch 'main' into feat/schema-indexing-safety
mohityadav766 Jun 4, 2026
8f158e1
fix(search): resolve customPropertiesTyped/extension immense-term rei…
mohityadav766 Jun 4, 2026
f85ded6
fix(search): resolve customPropertiesTyped/extension immense-term rei…
mohityadav766 Jun 4, 2026
42cceb7
Merge remote-tracking branch 'origin/feat/schema-indexing-safety' int…
mohityadav766 Jun 4, 2026
08b5b13
chore: remove accidentally-committed local debug scripts (admin tokens)
mohityadav766 Jun 4, 2026
9373a0c
fix(search): set dynamic:false on pipelineStatuses to stop config typ…
mohityadav766 Jun 4, 2026
3b4e635
fix(search): apply mapping hardening on the updateIndex (PutMapping) …
mohityadav766 Jun 4, 2026
83e85c0
fix(search): drop free-form pipelineStatuses.config from the ingestio…
mohityadav766 Jun 4, 2026
e67f580
revert: drop pipelineStatuses dynamic:false; keep config strip as the…
mohityadav766 Jun 5, 2026
dc05035
Merge branch 'main' into feat/schema-indexing-safety
mohityadav766 Jun 25, 2026
326efb8
Fix Build
mohityadav766 Jun 25, 2026
dc66f63
Updates
mohityadav766 Jun 25, 2026
4b6bef1
Update generated TypeScript types
github-actions[bot] Jun 25, 2026
f3f6d51
Update IndexMappingVersion Tracker and HealthCheck
mohityadav766 Jun 25, 2026
a196ddd
Merge remote-tracking branch 'origin/feat/schema-indexing-safety' int…
mohityadav766 Jun 25, 2026
d7cfdaa
Update migration
mohityadav766 Jun 25, 2026
7c57cf1
Merge branch 'main' into feat/schema-indexing-safety
mohityadav766 Jun 25, 2026
6604b93
Merge branch 'main' into feat/schema-indexing-safety
mohityadav766 Jun 29, 2026
c2704a6
Fix CheckStyle
mohityadav766 Jun 29, 2026
c43985f
fix(search): drop free-form pipelineStatus metadata before indexing
mohityadav766 Jun 29, 2026
d2e5ac7
refactor(ui): redesign Search Index Mappings settings page
mohityadav766 Jun 29, 2026
9c5dbb9
Merge branch 'main' into feat/schema-indexing-safety
mohityadav766 Jun 29, 2026
dfbc456
Fix Failure
mohityadav766 Jun 29, 2026
c1485f7
Fix Failure
mohityadav766 Jun 29, 2026
8fa8bea
test(search): detect engine rejection via status code in IndexingLimi…
mohityadav766 Jun 30, 2026
3d36931
feat(search): make column-flatten depth limit configurable per entity…
mohityadav766 Jun 30, 2026
2dda25c
Merge branch 'main' into feat/schema-indexing-safety
mohityadav766 Jun 30, 2026
72f2d65
fix(search): allowlist mapping language before resource lookup (CodeQL)
mohityadav766 Jun 30, 2026
b6da0f1
fix(search): store only admin overrides so shipped mapping changes ap…
mohityadav766 Jun 30, 2026
2a36625
fix(search): derive mapping resource language from enum to clear Code…
mohityadav766 Jun 30, 2026
f7cf80e
Merge branch 'main' into feat/schema-indexing-safety
mohityadav766 Jun 30, 2026
ad7d937
test(search): make hardened-boolean acceptance engine-aware in Indexi…
mohityadav766 Jun 30, 2026
e8358fd
test(search): assert indexed columnNamesFuzzy content, not a flaky fu…
mohityadav766 Jun 30, 2026
9fd220b
refactor(ui): migrate Search Index Mappings page to ui-core-components
mohityadav766 Jun 30, 2026
aba8307
Merge branch 'main' into feat/schema-indexing-safety
mohityadav766 Jun 30, 2026
3fc45f4
test(search): widen recursive-delete search precondition await to 120s
mohityadav766 Jun 30, 2026
e70fdab
Merge branch 'main' into feat/schema-indexing-safety
mohityadav766 Jun 30, 2026
5160d2d
fix(search): serve extension.<name> custom-property search via custom…
mohityadav766 Jul 1, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,12 @@ void recursiveHardDelete_serviceSubtree_leavesNoOrphansAndSearchClean(TestNamesp
subtree != null, "service type provides no deletable subtree builder; skipping");

for (SearchDoc sd : subtree.searchDocs()) {
// Secondary docs (e.g. column_search_index) are written on the async per-entity indexing lane
// and can sit briefly behind a concurrent full-reindex alias swap, so allow the same
// tolerance
// as the post-delete checks rather than a tighter 60s that flakes under full IT load.
Awaitility.await("descendant indexed in search before delete: " + sd.index())
.atMost(Duration.ofSeconds(60))
.atMost(Duration.ofSeconds(120))
.pollInterval(Duration.ofSeconds(1))
.ignoreExceptions()
.untilAsserted(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
package org.openmetadata.it.tests;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;

import es.co.elastic.clients.transport.rest5_client.low_level.Request;
import es.co.elastic.clients.transport.rest5_client.low_level.Response;
import es.co.elastic.clients.transport.rest5_client.low_level.ResponseException;
import es.co.elastic.clients.transport.rest5_client.low_level.Rest5Client;
import java.util.List;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.stream.Stream;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.parallel.Execution;
import org.junit.jupiter.api.parallel.ExecutionMode;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import org.openmetadata.it.bootstrap.TestSuiteBootstrap;
import org.openmetadata.service.search.SearchFieldLimits;
import org.openmetadata.service.search.SearchIndexSettings;
import org.openmetadata.service.search.opensearch.OsUtils;

/**
* Proves the engine-native hardening in {@link SearchIndexSettings} prevents documents from being
* rejected by the real search engine. For each risk class, a document is rejected by an index built
* from the raw mapping and accepted by an index built from the hardened mapping. Runs against
* whichever engine the active Maven profile selected, so the two CI profiles cover both engines.
*/
@Execution(ExecutionMode.CONCURRENT)
public class IndexingLimitsIT {

private static final List<String> CREATED_INDICES = new CopyOnWriteArrayList<>();

Comment on lines +31 to +35
@AfterAll
static void cleanup() {
Rest5Client client = TestSuiteBootstrap.createSearchClient();
for (String index : CREATED_INDICES) {
try {
client.performRequest(new Request("DELETE", "/" + index));
} catch (Exception ignored) {
// best-effort cleanup
}
}
}

@Test
void keywordOverByteLimitRejectedRawAcceptedWhenHardened() throws Exception {
String rawMapping = "{\"mappings\":{\"properties\":{\"fqn\":{\"type\":\"keyword\"}}}}";
String doc = "{\"fqn\":\"" + "a".repeat(40000) + "\"}";

assertTrue(
rejects("kw_raw", rawMapping, doc), "raw keyword index must reject the immense term");
assertTrue(
accepts("kw_hardened", harden(rawMapping), doc),
"hardened index (ignore_above) must accept the value");
}

static Stream<Arguments> malformedValuesByType() {
return Stream.of(
Arguments.of("integer", "\"not-a-number\""),
Arguments.of("long", "\"not-a-long\""),
Arguments.of("double", "\"not-a-double\""),
Arguments.of("float", "\"NaN\""),
Arguments.of("date", "\"not-a-date\""),
Arguments.of("boolean", "\"not-a-bool\""));
}

@ParameterizedTest(name = "{0}")
@MethodSource("malformedValuesByType")
void malformedValueRejectedRawAcceptedWhenHardened(String type, String jsonValue)
throws Exception {
String rawMapping = "{\"mappings\":{\"properties\":{\"v\":{\"type\":\"" + type + "\"}}}}";
String doc = "{\"v\":" + jsonValue + "}";

assertTrue(
rejects(type + "_raw", rawMapping, doc),
"raw " + type + " index must reject the bad value");
// OpenSearch does not support ignore_malformed on boolean (OsUtils strips it), so a hardened
// boolean index there still rejects the malformed value; every other guarded type accepts on
// both engines.
boolean expectedToAccept = !("boolean".equals(type) && isOpenSearch());
assertEquals(
expectedToAccept,
accepts(type + "_hardened", harden(rawMapping), doc),
"hardened " + type + " index acceptance must match the engine's ignore_malformed support");
}

private static boolean isOpenSearch() {
return "opensearch".equalsIgnoreCase(System.getProperty("searchType", "elasticsearch"));
}

private String harden(String mapping) {
String hardened = SearchIndexSettings.harden(mapping, SearchFieldLimits.defaults());
// Mirror the production OpenSearch path: harden() then enrichIndexMappingForOpenSearch()
// (e.g. strips ignore_malformed from boolean, which OpenSearch rejects).
if (isOpenSearch()) {
hardened = OsUtils.enrichIndexMappingForOpenSearch(hardened);
}
return hardened;
}
Comment on lines +94 to +102

private boolean rejects(String index, String mapping, String doc) throws Exception {
createIndex(index, mapping);
return indexStatus(index, doc) >= 400;
}

private boolean accepts(String index, String mapping, String doc) throws Exception {
createIndex(index, mapping);
return indexStatus(index, doc) < 400;
}

/**
* Status code of indexing {@code doc} into {@code index}. The rest5 low-level client surfaces a
* rejected write as the {@link Response} carrying the 4xx (not always a thrown exception), so the
* status code is authoritative — mirroring how {@code ElasticSearchClient} reads {@code
* e.getResponse()}. A thrown {@link ResponseException} is unwrapped to its response so a rejection
* is detected whichever way the client surfaces it.
*/
private int indexStatus(String index, String doc) throws Exception {
int statusCode;
try {
statusCode = index(index, doc).getStatusCode();
} catch (ResponseException rejected) {
statusCode = rejected.getResponse().getStatusCode();
}
return statusCode;
}

private void createIndex(String index, String mapping) throws Exception {
Rest5Client client = TestSuiteBootstrap.createSearchClient();
CREATED_INDICES.add(index);
Request request = new Request("PUT", "/" + index);
request.setJsonEntity(mapping);
client.performRequest(request);
}

private Response index(String index, String doc) throws Exception {
Rest5Client client = TestSuiteBootstrap.createSearchClient();
Request request = new Request("PUT", "/" + index + "/_doc/1?refresh=true");
request.setJsonEntity(doc);
return client.performRequest(request);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,14 @@ static void cleanupCustomProperties() {
}

/**
* Test that the extension field uses flattened type in Elasticsearch mapping.
* With flattened type, all custom properties are stored in a single field,
* preventing field explosion.
* Test that the extension field is a disabled object in the mapping. Custom-property values can
* exceed Lucene's 32766-byte keyword limit, and flattened/flat_object leaves are indexed as
* keywords (which OpenSearch cannot guard with ignore_above), so the raw extension is stored but
* not indexed. Custom-property search goes through customPropertiesTyped instead. Storing it as a
* disabled object also prevents field explosion.
*/
@Test
void testExtensionFieldIsFlattenedType(TestNamespace ns) throws Exception {
void testExtensionFieldIsDisabledObject(TestNamespace ns) throws Exception {
Rest5Client searchClient = TestSuiteBootstrap.createSearchClient();

Request request = new Request("GET", "/" + TABLE_INDEX + "/_mapping");
Expand All @@ -122,11 +124,10 @@ void testExtensionFieldIsFlattenedType(TestNamespace ns) throws Exception {
JsonNode extensionMapping = findExtensionMapping(root);
assertNotNull(extensionMapping, "Extension field should exist in mapping");

String extensionType = extensionMapping.path("type").asText();
assertTrue(
"flattened".equals(extensionType) || "flat_object".equals(extensionType),
"Extension field should be flattened (ES) or flat_object (OpenSearch) type, but was: "
+ extensionType);
assertFalse(
extensionMapping.path("enabled").asBoolean(true),
"Extension field should be a disabled object (enabled:false), but was: "
+ extensionMapping);
}

/**
Expand Down
Loading
Loading