From 7116659d14db7d143615354a9f425478a407d09b Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Fri, 8 May 2026 11:17:36 -0700 Subject: [PATCH 01/38] fix(restore): bulk + async restore for large entity hierarchies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit EntityRepository.restoreEntity walked descendants synchronously, taking 4+ minutes on a 12k-table database and exceeding typical proxy timeouts. restoreChildren now groups CONTAINS children by type and dispatches one bulkRestoreSubtree per type, batching DB writes, version history, change events, and cache invalidation; the existing ES cascade handles descendant index updates in one update_by_query. Adds an async option (?async=true) on the deep-hierarchy restore endpoints that returns 202 Accepted with a job id and runs the restore on AsyncService, emitting WebSocket notifications on restoreEntityChannel. Java SDK adds .restore().async().execute() fluent builders on Tables/Databases plus restoreServerAsync on EntityServiceBase; Python SDK mirrors this with restore_request().with_async().execute() and restore_async() helpers on BaseEntity, exposing a new AsyncJobResponse type. Tests: EntityRepositoryRestoreTest verifies the per-type grouping and bulk dispatch path; RestoreFluentAPITest covers the Java SDK fluent behavior; RestoreHierarchyIT exercises sync and async restore against a real DB→schemas→tables tree end-to-end; test_restore_async.py covers the Python SDK paths. Fixes #4003 Co-Authored-By: Claude Opus 4.7 --- ingestion/src/metadata/sdk/entities/base.py | 90 +++++++- .../tests/unit/sdk/test_restore_async.py | 97 +++++++++ .../it/tests/RestoreHierarchyIT.java | 176 ++++++++++++++++ .../openmetadata/sdk/fluent/Databases.java | 53 +++++ .../org/openmetadata/sdk/fluent/Tables.java | 53 +++++ .../sdk/models/AsyncJobResponse.java | 49 +++++ .../sdk/services/EntityServiceBase.java | 35 +++- .../sdk/fluent/RestoreFluentAPITest.java | 109 ++++++++++ .../service/jdbi3/EntityRepository.java | 121 ++++++++++- .../service/resources/EntityResource.java | 56 +++++ .../dashboards/DashboardResource.java | 22 +- .../resources/databases/DatabaseResource.java | 24 ++- .../databases/DatabaseSchemaResource.java | 23 ++- .../databases/StoredProcedureResource.java | 22 +- .../resources/databases/TableResource.java | 22 +- .../DashboardDataModelResource.java | 22 +- .../database/DatabaseServiceResource.java | 23 ++- .../resources/storages/ContainerResource.java | 23 ++- .../service/socket/WebSocketManager.java | 1 + .../service/util/RestoreEntityMessage.java | 32 +++ .../service/util/RestoreEntityResponse.java | 34 +++ .../util/WebsocketNotificationHandler.java | 34 +++ .../jdbi3/EntityRepositoryRestoreTest.java | 195 ++++++++++++++++++ 23 files changed, 1281 insertions(+), 35 deletions(-) create mode 100644 ingestion/tests/unit/sdk/test_restore_async.py create mode 100644 openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/RestoreHierarchyIT.java create mode 100644 openmetadata-sdk/src/main/java/org/openmetadata/sdk/models/AsyncJobResponse.java create mode 100644 openmetadata-sdk/src/test/java/org/openmetadata/sdk/fluent/RestoreFluentAPITest.java create mode 100644 openmetadata-service/src/main/java/org/openmetadata/service/util/RestoreEntityMessage.java create mode 100644 openmetadata-service/src/main/java/org/openmetadata/service/util/RestoreEntityResponse.java create mode 100644 openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java diff --git a/ingestion/src/metadata/sdk/entities/base.py b/ingestion/src/metadata/sdk/entities/base.py index 8804e99fc643..de30fdb6d3e3 100644 --- a/ingestion/src/metadata/sdk/entities/base.py +++ b/ingestion/src/metadata/sdk/entities/base.py @@ -61,6 +61,54 @@ def execute_async(self) -> Any: return export_async(entity=self.entity, name=self.name) +@dataclass +class AsyncJobResponse: + """Response shape for server-side async operations. + + Returned with HTTP 202 Accepted by endpoints such as ``PUT /restore?async=true`` + (issue #4003). The ``job_id`` correlates with WebSocket notifications on the + ``restoreEntityChannel`` channel emitted when the work completes. + """ + + job_id: str + message: Optional[str] = None # noqa: UP045 + + @classmethod + def from_response(cls, payload: Any) -> "AsyncJobResponse": # noqa: UP037 + if isinstance(payload, AsyncJobResponse): + return payload + if isinstance(payload, dict): + return cls( + job_id=str(payload.get("jobId", "")), + message=payload.get("message"), + ) + raise TypeError(f"Cannot coerce {type(payload).__name__} into AsyncJobResponse") + + +@dataclass +class RestoreOperation(Generic[TEntity]): + """Fluent restore builder with optional server-side async dispatch. + + Mirrors the Java SDK's ``Tables.find(id).restore().async().execute()`` style. + ``execute()`` runs the synchronous restore and returns the restored entity; + ``with_async()`` switches to the server-side async path that returns an + :class:`AsyncJobResponse` with a job id (issue #4003). + """ + + entity_cls: Any # the BaseEntity subclass that owns this operation + entity_id: str + async_enabled: bool = field(default=False, init=False) + + def with_async(self) -> "RestoreOperation[TEntity]": # noqa: UP037 + self.async_enabled = True + return self + + def execute(self) -> Any: + if self.async_enabled: + return self.entity_cls._restore_server_async(self.entity_id) + return self.entity_cls._restore_sync(self.entity_id) + + @dataclass class CsvImportOperation(Generic[TEntity]): """Stateful helper for CSV import operations.""" @@ -388,8 +436,12 @@ def remove_followers(cls, entity_id: UuidLike, follower_ids: Sequence[UuidLike]) @classmethod def restore(cls, entity_id: UuidLike) -> TEntity: - """Restore a soft-deleted entity.""" + """Restore a soft-deleted entity (synchronous).""" + + return cls._restore_sync(entity_id) + @classmethod + def _restore_sync(cls, entity_id: UuidLike) -> TEntity: client = cls._get_client() rest_client = cls._get_rest_client(client) endpoint = cls._get_endpoint_path(client) @@ -399,6 +451,42 @@ def restore(cls, entity_id: UuidLike) -> TEntity: ) return cls._coerce_entity(response) + @classmethod + def restore_async(cls, entity_id: UuidLike) -> "AsyncJobResponse": # noqa: UP037 + """Trigger a server-side async restore. + + Issues ``PUT /restore?async=true`` and returns the 202 Accepted payload + containing the job id. Use this for hierarchies large enough that the + synchronous response would exceed proxy / ALB idle timeouts (issue #4003). + """ + + return cls._restore_server_async(entity_id) + + @classmethod + def _restore_server_async(cls, entity_id: UuidLike) -> "AsyncJobResponse": # noqa: UP037 + client = cls._get_client() + rest_client = cls._get_rest_client(client) + endpoint = cls._get_endpoint_path(client) + response = rest_client.put( + f"{endpoint}/restore?async=true", + json={"id": cls._stringify_identifier(entity_id)}, + ) + return AsyncJobResponse.from_response(response) + + @classmethod + def restore_request(cls, entity_id: UuidLike) -> "RestoreOperation[TEntity]": # noqa: UP037 + """Return a fluent restore builder. + + Examples:: + + restored = Table.restore_request(table_id).execute() + job = Table.restore_request(table_id).with_async().execute() + """ + + return RestoreOperation( + entity_cls=cls, entity_id=cls._stringify_identifier(entity_id) + ) + @classmethod def update_custom_properties(cls, identifier: UuidLike): """Convenience accessor for custom property updates by entity id.""" diff --git a/ingestion/tests/unit/sdk/test_restore_async.py b/ingestion/tests/unit/sdk/test_restore_async.py new file mode 100644 index 000000000000..ca31c42efaf8 --- /dev/null +++ b/ingestion/tests/unit/sdk/test_restore_async.py @@ -0,0 +1,97 @@ +# Copyright 2026 Collate +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tests for the fluent restore + server-side async option (issue #4003).""" + +from unittest.mock import MagicMock + +import pytest + +from metadata.sdk import Tables +from metadata.sdk.entities.base import AsyncJobResponse, RestoreOperation + + +@pytest.fixture +def mock_client(): + client = MagicMock() + client.get_suffix.return_value = "/tables" + rest_client = MagicMock() + client.client = rest_client + Tables.set_default_client(client) + return client + + +def test_restore_sync_calls_put_without_async_param(mock_client): + table_id = "b67eac63-9e43-41f5-afb9-387c85df1d8b" + rest_client = mock_client.client + rest_client.put.return_value = {"id": table_id, "name": "t", "deleted": False} + + Tables.restore(table_id) + + rest_client.put.assert_called_once() + path = rest_client.put.call_args[0][0] + assert path.endswith("/restore") + assert "async=true" not in path + + +def test_restore_async_appends_async_query_param(mock_client): + table_id = "b67eac63-9e43-41f5-afb9-387c85df1d8b" + rest_client = mock_client.client + rest_client.put.return_value = {"jobId": "job-42", "message": "Restore initiated"} + + response = Tables.restore_async(table_id) + + assert isinstance(response, AsyncJobResponse) + assert response.job_id == "job-42" + assert response.message == "Restore initiated" + rest_client.put.assert_called_once() + path = rest_client.put.call_args[0][0] + assert path.endswith("/restore?async=true") + + +def test_fluent_restore_request_sync_returns_entity(mock_client): + table_id = "b67eac63-9e43-41f5-afb9-387c85df1d8b" + rest_client = mock_client.client + rest_client.put.return_value = {"id": table_id, "name": "t", "deleted": False} + + op = Tables.restore_request(table_id) + assert isinstance(op, RestoreOperation) + op.execute() + + path = rest_client.put.call_args[0][0] + assert "async=true" not in path + + +def test_fluent_restore_request_with_async_returns_job_response(mock_client): + table_id = "b67eac63-9e43-41f5-afb9-387c85df1d8b" + rest_client = mock_client.client + rest_client.put.return_value = {"jobId": "job-7", "message": "Restore initiated"} + + job = Tables.restore_request(table_id).with_async().execute() + + assert isinstance(job, AsyncJobResponse) + assert job.job_id == "job-7" + assert "async=true" in rest_client.put.call_args[0][0] + + +def test_async_job_response_from_response_handles_dict(): + response = AsyncJobResponse.from_response({"jobId": "abc", "message": "ok"}) + assert response.job_id == "abc" + assert response.message == "ok" + + +def test_async_job_response_from_response_passes_through_existing(): + original = AsyncJobResponse(job_id="abc", message="ok") + assert AsyncJobResponse.from_response(original) is original + + +def test_async_job_response_from_response_rejects_unknown_type(): + with pytest.raises(TypeError): + AsyncJobResponse.from_response("not a dict") diff --git a/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/RestoreHierarchyIT.java b/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/RestoreHierarchyIT.java new file mode 100644 index 000000000000..e518ae45feec --- /dev/null +++ b/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/RestoreHierarchyIT.java @@ -0,0 +1,176 @@ +/* + * Copyright 2026 Collate + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.openmetadata.it.tests; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.time.Duration; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.awaitility.Awaitility; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.openmetadata.it.factories.DatabaseSchemaTestFactory; +import org.openmetadata.it.factories.DatabaseServiceTestFactory; +import org.openmetadata.it.factories.DatabaseTestFactory; +import org.openmetadata.it.factories.TableTestFactory; +import org.openmetadata.it.util.SdkClients; +import org.openmetadata.it.util.TestNamespace; +import org.openmetadata.it.util.TestNamespaceExtension; +import org.openmetadata.schema.entity.data.Database; +import org.openmetadata.schema.entity.data.DatabaseSchema; +import org.openmetadata.schema.entity.data.Table; +import org.openmetadata.schema.entity.services.DatabaseService; +import org.openmetadata.schema.type.Include; +import org.openmetadata.sdk.client.OpenMetadataClient; +import org.openmetadata.sdk.fluent.Databases; +import org.openmetadata.sdk.models.AsyncJobResponse; + +/** + * End-to-end tests for the bulk + async restore introduced for issue #4003. + * + *

Builds a small Database → DatabaseSchemas → Tables hierarchy, soft-deletes the database + * (which cascades), then verifies that: + * + *

+ */ +@ExtendWith(TestNamespaceExtension.class) +public class RestoreHierarchyIT { + + private static final int SCHEMA_COUNT = 3; + private static final int TABLES_PER_SCHEMA = 4; + + @BeforeAll + static void setup() { + SdkClients.adminClient(); + } + + @Test + void syncRestore_restoresFullHierarchy(TestNamespace ns) { + Hierarchy h = createHierarchy(ns, "sync"); + softDeleteAndAssertCascade(h); + + Database restored = Databases.find(h.database.getId().toString()).restore().execute(); + assertNotNull(restored); + assertFalse(Boolean.TRUE.equals(restored.getDeleted())); + + assertHierarchyRestored(h); + } + + @Test + void asyncRestore_returns202AndRestoresFullHierarchy(TestNamespace ns) { + Hierarchy h = createHierarchy(ns, "async"); + softDeleteAndAssertCascade(h); + + AsyncJobResponse job = + Databases.find(h.database.getId().toString()).restore().async().execute(); + assertNotNull(job); + assertNotNull(job.getJobId()); + assertEquals("Restore initiated successfully.", job.getMessage()); + + // Async work runs on the server's executor — poll for completion. + Awaitility.await("async restore for " + h.database.getFullyQualifiedName()) + .atMost(Duration.ofSeconds(60)) + .pollInterval(Duration.ofSeconds(1)) + .ignoreExceptions() + .until( + () -> { + Database current = SdkClients.adminClient().databases().get(h.database.getId()); + return !Boolean.TRUE.equals(current.getDeleted()); + }); + + assertHierarchyRestored(h); + } + + private static class Hierarchy { + DatabaseService service; + Database database; + List schemas; + List tables; + + Hierarchy( + DatabaseService service, + Database database, + List schemas, + List
tables) { + this.service = service; + this.database = database; + this.schemas = schemas; + this.tables = tables; + } + } + + private Hierarchy createHierarchy(TestNamespace ns, String tag) { + DatabaseService service = DatabaseServiceTestFactory.create(ns, "Postgres"); + Database database = DatabaseTestFactory.create(ns, service.getFullyQualifiedName()); + + List schemas = new java.util.ArrayList<>(); + List
tables = new java.util.ArrayList<>(); + for (int s = 0; s < SCHEMA_COUNT; s++) { + DatabaseSchema schema = + DatabaseSchemaTestFactory.create(database.getFullyQualifiedName(), tag + "_schema_" + s); + schemas.add(schema); + for (int t = 0; t < TABLES_PER_SCHEMA; t++) { + tables.add( + TableTestFactory.createSimpleWithName( + tag + "_table_" + s + "_" + t, ns, schema.getFullyQualifiedName())); + } + } + return new Hierarchy(service, database, schemas, tables); + } + + private void softDeleteAndAssertCascade(Hierarchy h) { + Map recursiveDelete = new HashMap<>(); + recursiveDelete.put("recursive", "true"); + SdkClients.adminClient().databases().delete(h.database.getId().toString(), recursiveDelete); + + OpenMetadataClient client = SdkClients.adminClient(); + Database deletedDb = + client.databases().get(h.database.getId().toString(), "deleted", Include.ALL.value()); + assertTrue(Boolean.TRUE.equals(deletedDb.getDeleted()), "database should be soft-deleted"); + + for (DatabaseSchema schema : h.schemas) { + DatabaseSchema fetched = + client.databaseSchemas().get(schema.getId().toString(), "deleted", Include.ALL.value()); + assertTrue(Boolean.TRUE.equals(fetched.getDeleted()), "schema cascade delete failed"); + } + for (Table table : h.tables) { + Table fetched = client.tables().get(table.getId().toString(), "deleted", Include.ALL.value()); + assertTrue(Boolean.TRUE.equals(fetched.getDeleted()), "table cascade delete failed"); + } + } + + private void assertHierarchyRestored(Hierarchy h) { + OpenMetadataClient client = SdkClients.adminClient(); + Database fetchedDb = client.databases().get(h.database.getId().toString()); + assertFalse(Boolean.TRUE.equals(fetchedDb.getDeleted()), "database not restored"); + + for (DatabaseSchema schema : h.schemas) { + DatabaseSchema fetched = client.databaseSchemas().get(schema.getId().toString()); + assertFalse(Boolean.TRUE.equals(fetched.getDeleted()), "schema not restored"); + } + for (Table table : h.tables) { + Table fetched = client.tables().get(table.getId().toString()); + assertFalse(Boolean.TRUE.equals(fetched.getDeleted()), "table not restored"); + } + } +} diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Databases.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Databases.java index 7741c54c1f07..065894ef6ed4 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Databases.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Databases.java @@ -246,6 +246,10 @@ public FluentDatabase fetch() { public DatabaseDeleter delete() { return new DatabaseDeleter(client, identifier); } + + public DatabaseRestorer restore() { + return new DatabaseRestorer(client, identifier); + } } // ==================== Deleter ==================== @@ -279,6 +283,55 @@ public void confirm() { } } + // ==================== Restorer ==================== + + /** + * Fluent restore builder. {@link #execute()} runs the synchronous restore and returns the + * restored {@link Database}. Switching to {@link #async()} returns an + * {@link AsyncDatabaseRestorer} whose {@code execute()} triggers the server-side async + * path and returns an {@link org.openmetadata.sdk.models.AsyncJobResponse} with a job id — + * use this for services with thousands of schemas / tables (issue #4003). + */ + public static class DatabaseRestorer { + private final OpenMetadataClient client; + private final String id; + + public DatabaseRestorer(OpenMetadataClient client, String id) { + this.client = client; + this.id = id; + } + + public AsyncDatabaseRestorer async() { + return new AsyncDatabaseRestorer(client, id); + } + + public Database execute() { + try { + return client.databases().restore(id); + } catch (org.openmetadata.sdk.exceptions.OpenMetadataException e) { + throw new RuntimeException(e); + } + } + } + + public static class AsyncDatabaseRestorer { + private final OpenMetadataClient client; + private final String id; + + public AsyncDatabaseRestorer(OpenMetadataClient client, String id) { + this.client = client; + this.id = id; + } + + public org.openmetadata.sdk.models.AsyncJobResponse execute() { + try { + return client.databases().restoreServerAsync(id); + } catch (org.openmetadata.sdk.exceptions.OpenMetadataException e) { + throw new RuntimeException(e); + } + } + } + // ==================== Lister ==================== public static class DatabaseLister { diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Tables.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Tables.java index c11760f1f2d9..bc1c5b94b5df 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Tables.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Tables.java @@ -312,6 +312,10 @@ public org.openmetadata.sdk.fluent.wrappers.FluentTable fetch() { public TableDeleter delete() { return new TableDeleter(client, identifier); } + + public TableRestorer restore() { + return new TableRestorer(client, identifier); + } } // ==================== Table Operations ==================== @@ -373,6 +377,55 @@ public void confirm() { } } + // ==================== Restorer ==================== + + /** + * Fluent restore builder. {@link #execute()} runs the synchronous restore and returns the + * restored {@link Table}. Switching to {@link #async()} returns an + * {@link AsyncTableRestorer} whose {@code execute()} triggers the server-side async path + * and returns an {@link org.openmetadata.sdk.models.AsyncJobResponse} with a job id (issue + * #4003). + */ + public static class TableRestorer { + private final OpenMetadataClient client; + private final String id; + + public TableRestorer(OpenMetadataClient client, String id) { + this.client = client; + this.id = id; + } + + public AsyncTableRestorer async() { + return new AsyncTableRestorer(client, id); + } + + public Table execute() { + try { + return client.tables().restore(id); + } catch (org.openmetadata.sdk.exceptions.OpenMetadataException e) { + throw new RuntimeException(e); + } + } + } + + public static class AsyncTableRestorer { + private final OpenMetadataClient client; + private final String id; + + public AsyncTableRestorer(OpenMetadataClient client, String id) { + this.client = client; + this.id = id; + } + + public org.openmetadata.sdk.models.AsyncJobResponse execute() { + try { + return client.tables().restoreServerAsync(id); + } catch (org.openmetadata.sdk.exceptions.OpenMetadataException e) { + throw new RuntimeException(e); + } + } + } + // ==================== Lister ==================== public static class TableLister { diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/models/AsyncJobResponse.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/models/AsyncJobResponse.java new file mode 100644 index 000000000000..fd39a1bc4ecf --- /dev/null +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/models/AsyncJobResponse.java @@ -0,0 +1,49 @@ +/* + * Copyright 2026 Collate + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.openmetadata.sdk.models; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +/** + * Response shape returned with HTTP 202 Accepted for server-side async operations such as + * restore (issue #4003) and delete. Contains a job id that can be used with the SDK + * WebSocketListener to await completion notifications. + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class AsyncJobResponse { + private String jobId; + private String message; + + public AsyncJobResponse() {} + + public AsyncJobResponse(String jobId, String message) { + this.jobId = jobId; + this.message = message; + } + + public String getJobId() { + return jobId; + } + + public void setJobId(String jobId) { + this.jobId = jobId; + } + + public String getMessage() { + return message; + } + + public void setMessage(String message) { + this.message = message; + } +} diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/services/EntityServiceBase.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/services/EntityServiceBase.java index 14f5f7d72259..58b77348b05e 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/services/EntityServiceBase.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/services/EntityServiceBase.java @@ -421,15 +421,17 @@ public T restore(String id) throws OpenMetadataException { } /** - * Restore a soft-deleted entity (async) + * Restore a soft-deleted entity (client-side async wrapper). + * + *

Runs the synchronous restore call on the SDK's executor and returns a + * {@link CompletableFuture}. The server still does the work synchronously inside the request, + * so this still ties up an HTTP connection for the duration. For large hierarchies use + * {@link #restoreServerAsync(String)} instead, which returns a 202 with a job id. */ public CompletableFuture restoreAsync(UUID id) { return restoreAsync(id.toString()); } - /** - * Restore a soft-deleted entity (async) - */ public CompletableFuture restoreAsync(String id) { org.openmetadata.schema.api.data.RestoreEntity restoreEntity = new org.openmetadata.schema.api.data.RestoreEntity(); @@ -438,6 +440,31 @@ public CompletableFuture restoreAsync(String id) { HttpMethod.PUT, basePath + "/restore", restoreEntity, getEntityClass()); } + /** + * Trigger a server-side async restore. Issues {@code PUT /restore?async=true} and returns + * the 202 Accepted response containing the job id. Used to avoid proxy / ALB idle timeouts + * on large hierarchies (issue #4003). The caller can await completion via the SDK's + * WebSocketListener on the {@code restoreEntityChannel} channel. + */ + public org.openmetadata.sdk.models.AsyncJobResponse restoreServerAsync(UUID id) + throws OpenMetadataException { + return restoreServerAsync(id.toString()); + } + + public org.openmetadata.sdk.models.AsyncJobResponse restoreServerAsync(String id) + throws OpenMetadataException { + org.openmetadata.schema.api.data.RestoreEntity restoreEntity = + new org.openmetadata.schema.api.data.RestoreEntity(); + restoreEntity.setId(java.util.UUID.fromString(id)); + RequestOptions options = RequestOptions.builder().queryParam("async", "true").build(); + return httpClient.execute( + HttpMethod.PUT, + basePath + "/restore", + restoreEntity, + org.openmetadata.sdk.models.AsyncJobResponse.class, + options); + } + /** * Export entity data to CSV format. * diff --git a/openmetadata-sdk/src/test/java/org/openmetadata/sdk/fluent/RestoreFluentAPITest.java b/openmetadata-sdk/src/test/java/org/openmetadata/sdk/fluent/RestoreFluentAPITest.java new file mode 100644 index 000000000000..1dd98173c227 --- /dev/null +++ b/openmetadata-sdk/src/test/java/org/openmetadata/sdk/fluent/RestoreFluentAPITest.java @@ -0,0 +1,109 @@ +/* + * Copyright 2026 Collate + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.openmetadata.sdk.fluent; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.util.UUID; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.openmetadata.schema.entity.data.Database; +import org.openmetadata.schema.entity.data.Table; +import org.openmetadata.sdk.client.OpenMetadataClient; +import org.openmetadata.sdk.models.AsyncJobResponse; +import org.openmetadata.sdk.services.dataassets.TableService; +import org.openmetadata.sdk.services.databases.DatabaseService; + +/** + * Verifies the fluent restore builders added for issue #4003. {@code .restore().execute()} + * routes to the synchronous SDK call; chaining {@code .async()} switches to the server-side + * async path and returns an {@link AsyncJobResponse}. + */ +class RestoreFluentAPITest { + + @Mock private OpenMetadataClient mockClient; + @Mock private TableService mockTables; + @Mock private DatabaseService mockDatabases; + + @BeforeEach + void setUp() { + MockitoAnnotations.openMocks(this); + when(mockClient.tables()).thenReturn(mockTables); + when(mockClient.databases()).thenReturn(mockDatabases); + Tables.setDefaultClient(mockClient); + Databases.setDefaultClient(mockClient); + } + + @Test + void tablesFluent_syncRestore_callsRestore() throws Exception { + String id = UUID.randomUUID().toString(); + Table restored = new Table().withId(UUID.fromString(id)).withName("t"); + when(mockTables.restore(id)).thenReturn(restored); + + Table result = Tables.find(id).restore().execute(); + + assertSame(restored, result); + verify(mockTables).restore(id); + verify(mockTables, never()).restoreServerAsync(eq(id)); + } + + @Test + void tablesFluent_asyncRestore_callsRestoreServerAsync() throws Exception { + String id = UUID.randomUUID().toString(); + AsyncJobResponse expected = new AsyncJobResponse("job-1", "Restore initiated successfully."); + when(mockTables.restoreServerAsync(id)).thenReturn(expected); + + AsyncJobResponse result = Tables.find(id).restore().async().execute(); + + assertNotNull(result); + assertEquals("job-1", result.getJobId()); + assertEquals("Restore initiated successfully.", result.getMessage()); + verify(mockTables).restoreServerAsync(id); + verify(mockTables, never()).restore(eq(id)); + } + + @Test + void databasesFluent_syncRestore_callsRestore() throws Exception { + String id = UUID.randomUUID().toString(); + Database restored = new Database().withId(UUID.fromString(id)).withName("db"); + when(mockDatabases.restore(id)).thenReturn(restored); + + Database result = Databases.find(id).restore().execute(); + + assertSame(restored, result); + verify(mockDatabases).restore(id); + verify(mockDatabases, never()).restoreServerAsync(eq(id)); + } + + @Test + void databasesFluent_asyncRestore_callsRestoreServerAsync() throws Exception { + String id = UUID.randomUUID().toString(); + AsyncJobResponse expected = new AsyncJobResponse("job-2", "Restore initiated successfully."); + when(mockDatabases.restoreServerAsync(id)).thenReturn(expected); + + AsyncJobResponse result = Databases.find(id).restore().async().execute(); + + assertNotNull(result); + assertEquals("job-2", result.getJobId()); + verify(mockDatabases).restoreServerAsync(id); + verify(mockDatabases, never()).restore(eq(id)); + } +} diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java index a02f786c1573..6a1d2781ec46 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java @@ -5538,16 +5538,125 @@ public final PutResponse restoreEntity(String updatedBy, UUID id) { @Transaction protected void restoreChildren(UUID id, String updatedBy) { - // Restore deleted children entities List records = daoCollection.relationshipDAO().findTo(id, entityType, Relationship.CONTAINS.ordinal()); - if (!records.isEmpty()) { - // Recursively restore all contained entities - for (CollectionDAO.EntityRelationshipRecord record : records) { - LOG.info("Recursively restoring {} {}", record.getType(), record.getId()); - Entity.restoreEntity(updatedBy, record.getType(), record.getId()); + if (records.isEmpty()) { + return; + } + Map> idsByType = new HashMap<>(); + for (CollectionDAO.EntityRelationshipRecord record : records) { + idsByType.computeIfAbsent(record.getType(), k -> new ArrayList<>()).add(record.getId()); + } + for (var entry : idsByType.entrySet()) { + EntityRepository repo = Entity.getEntityRepository(entry.getKey()); + repo.bulkRestoreSubtree(entry.getValue(), updatedBy); + } + } + + /** + * Bulk-restore a set of soft-deleted entities of this repository's type along with their entire + * subtree of CONTAINS-related descendants. Replaces the per-entity recursive path that was + * O(descendants) HTTP-request-bound work with a per-level batched walk that uses the existing + * deferred-store bulk update infrastructure. + * + *

For a database with N descendants, the previous implementation issued ~N find calls, + * ~N updates and ~N search index writes, all serialized inside one HTTP request. This path + * does one batched DB load, one batched DB write and one batched change-event insert per + * level, and relies on {@link #restoreFromSearch(EntityInterface)} at the top-level to + * cascade the deleted flag flip across child indexes in a single ES update_by_query. + * + *

Subclasses that link non-CONTAINS related entities (e.g., charts attached to dashboards + * via HAS) should override {@link #restoreChildren(UUID, String)} or implement the + * {@link #restoreAdditionalChildren(UUID, String)} hook. + */ + @Transaction + public final void bulkRestoreSubtree(List ids, String updatedBy) { + if (ids == null || ids.isEmpty()) { + return; + } + List deletedEntities; + try (var ignored = phase("bulkRestoreLoad")) { + deletedEntities = find(ids, DELETED); + } + if (deletedEntities.isEmpty()) { + return; + } + + for (T entity : deletedEntities) { + restoreChildren(entity.getId(), updatedBy); + } + + long now = System.currentTimeMillis(); + List updaters = new ArrayList<>(deletedEntities.size()); + try (var ignored = phase("bulkRestoreUpdaters")) { + for (T original : deletedEntities) { + T updated = JsonUtils.readValue(JsonUtils.pojoToJson(original), entityClass); + updated.setUpdatedBy(updatedBy); + updated.setUpdatedAt(now); + EntityUpdater updater = getUpdater(original, updated, Operation.PUT, null); + updater.updateWithDeferredStore(); + updaters.add(updater); } } + + List changed = + updaters.stream().filter(u -> u.isVersionChanged() || u.isEntityChanged()).toList(); + if (changed.isEmpty()) { + runRestoreAdditionalChildren(deletedEntities, updatedBy); + return; + } + + try (var ignored = phase("bulkRestoreVersionHistory")) { + List historyIds = new ArrayList<>(); + List historyExtensions = new ArrayList<>(); + List historyJsons = new ArrayList<>(); + for (EntityUpdater u : changed) { + if (u.isVersionChanged()) { + historyIds.add(u.getOriginal().getId()); + historyExtensions.add( + EntityUtil.getVersionExtension(entityType, u.getOriginal().getVersion())); + historyJsons.add(JsonUtils.pojoToJson(u.getOriginal())); + } + } + if (!historyIds.isEmpty()) { + daoCollection + .entityExtensionDAO() + .insertMany(historyIds, historyExtensions, entityType, historyJsons); + } + } + + List changedEntities = changed.stream().map(EntityUpdater::getUpdated).toList(); + try (var ignored = phase("bulkRestoreUpdateMany")) { + updateMany(changedEntities); + } + try (var ignored = phase("bulkRestoreInvalidate")) { + invalidateMany(changedEntities); + } + try (var ignored = phase("bulkRestoreChangeEvents")) { + List changeEventJsons = new ArrayList<>(); + for (EntityUpdater u : changed) { + buildChangeEventJsonForBulkOperation(u.getUpdated(), ENTITY_RESTORED, updatedBy) + .ifPresent(changeEventJsons::add); + } + insertChangeEventsBatch(changeEventJsons); + } + + ListCountCache.invalidate(entityType); + runRestoreAdditionalChildren(deletedEntities, updatedBy); + } + + private void runRestoreAdditionalChildren(List entities, String updatedBy) { + for (T entity : entities) { + restoreAdditionalChildren(entity.getId(), updatedBy); + } + } + + /** + * Hook called once per restored entity for repositories that have non-CONTAINS related + * entities that need to be restored alongside the parent. Default: no-op. + */ + protected void restoreAdditionalChildren(UUID id, String updatedBy) { + // No-op. Override in subclasses for HAS-style related-entity restore. } public final void addRelationship( diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java index c8c5a48af470..c0bf9cd0de28 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java @@ -103,6 +103,7 @@ import org.openmetadata.service.util.RestUtil.DeleteResponse; import org.openmetadata.service.util.RestUtil.PatchResponse; import org.openmetadata.service.util.RestUtil.PutResponse; +import org.openmetadata.service.util.RestoreEntityResponse; import org.openmetadata.service.util.ValidatorUtil; import org.openmetadata.service.util.WebsocketNotificationHandler; @@ -771,6 +772,14 @@ public Response deleteByName( } public Response restoreEntity(UriInfo uriInfo, SecurityContext securityContext, UUID id) { + return restoreEntity(uriInfo, securityContext, id, false); + } + + public Response restoreEntity( + UriInfo uriInfo, SecurityContext securityContext, UUID id, boolean async) { + if (async) { + return restoreEntityAsync(uriInfo, securityContext, id); + } OperationContext operationContext = new OperationContext(entityType, MetadataOperation.EDIT_ALL); authorizer.authorize(securityContext, operationContext, getResourceContextById(id)); @@ -785,6 +794,53 @@ public Response restoreEntity(UriInfo uriInfo, SecurityContext securityContext, return response.toResponse(); } + /** + * Async restore variant. Returns 202 Accepted with a job ID and runs the restore on the + * shared async executor. The caller can subscribe to + * {@link org.openmetadata.service.socket.WebSocketManager#RESTORE_ENTITY_CHANNEL} to be + * notified when the restore completes or fails. Used to avoid proxy / ALB idle timeouts on + * large hierarchies (issue #4003). + */ + public Response restoreEntityAsync(UriInfo uriInfo, SecurityContext securityContext, UUID id) { + OperationContext operationContext = + new OperationContext(entityType, MetadataOperation.EDIT_ALL); + authorizer.authorize(securityContext, operationContext, getResourceContextById(id)); + String jobId = UUID.randomUUID().toString(); + String userName = securityContext.getUserPrincipal().getName(); + ExecutorService executorService = AsyncService.getInstance().getExecutorService(); + executorService.submit( + RequestLatencyContext.wrapWithContext( + () -> { + try { + PutResponse response = repository.restoreEntity(userName, id); + if (response == null) { + WebsocketNotificationHandler.sendRestoreOperationFailedNotification( + jobId, securityContext, id.toString(), "Entity is not in deleted state"); + return; + } + repository.restoreFromSearch(response.getEntity()); + addHref(uriInfo, response.getEntity()); + LOG.info( + "[AsyncRestore] Restored {}:{} (jobId={})", + Entity.getEntityTypeFromObject(response.getEntity()), + response.getEntity().getId(), + jobId); + WebsocketNotificationHandler.sendRestoreOperationCompleteNotification( + jobId, securityContext, response.getEntity()); + } catch (Exception e) { + LOG.error("[AsyncRestore] Failed to restore {}:{}", entityType, id, e); + WebsocketNotificationHandler.sendRestoreOperationFailedNotification( + jobId, + securityContext, + id.toString(), + e.getMessage() == null ? e.toString() : e.getMessage()); + } + })); + RestoreEntityResponse response = + new RestoreEntityResponse(jobId, "Restore initiated successfully."); + return Response.accepted().entity(response).type(MediaType.APPLICATION_JSON).build(); + } + public Response exportCsvInternalAsync( SecurityContext securityContext, String name, boolean recursive) { OperationContext operationContext = diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/dashboards/DashboardResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/dashboards/DashboardResource.java index 3ac05657402a..0edbda411716 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/dashboards/DashboardResource.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/dashboards/DashboardResource.java @@ -612,7 +612,9 @@ public Response delete( @Operation( operationId = "restore", summary = "Restore a soft deleted dashboard", - description = "Restore a soft deleted dashboard.", + description = + "Restore a soft deleted dashboard. Pass async=true to run the restore in the background" + + " and receive a 202 Accepted response with a job id.", responses = { @ApiResponse( responseCode = "200", @@ -620,12 +622,26 @@ public Response delete( content = @Content( mediaType = "application/json", - schema = @Schema(implementation = Dashboard.class))) + schema = @Schema(implementation = Dashboard.class))), + @ApiResponse( + responseCode = "202", + description = "Async restore started. Track completion via the jobId.", + content = + @Content( + mediaType = "application/json", + schema = + @Schema( + implementation = + org.openmetadata.service.util.RestoreEntityResponse.class))) }) public Response restoreDashboard( @Context UriInfo uriInfo, @Context SecurityContext securityContext, + @Parameter(description = "Run the restore asynchronously. (Default = `false`)") + @QueryParam("async") + @DefaultValue("false") + boolean async, @Valid RestoreEntity restore) { - return restoreEntity(uriInfo, securityContext, restore.getId()); + return restoreEntity(uriInfo, securityContext, restore.getId(), async); } } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/databases/DatabaseResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/databases/DatabaseResource.java index 200170085ec4..a35496332d96 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/databases/DatabaseResource.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/databases/DatabaseResource.java @@ -751,7 +751,11 @@ public Response delete( @Operation( operationId = "restore", summary = "Restore a soft deleted Database.", - description = "Restore a soft deleted Database.", + description = + "Restore a soft deleted Database. Pass async=true to run the restore in the" + + " background and receive a 202 Accepted response with a job id; useful for" + + " hierarchies large enough that the synchronous response would exceed proxy" + + " idle timeouts.", responses = { @ApiResponse( responseCode = "200", @@ -759,13 +763,27 @@ public Response delete( content = @Content( mediaType = "application/json", - schema = @Schema(implementation = Database.class))) + schema = @Schema(implementation = Database.class))), + @ApiResponse( + responseCode = "202", + description = "Async restore started. Track completion via the jobId.", + content = + @Content( + mediaType = "application/json", + schema = + @Schema( + implementation = + org.openmetadata.service.util.RestoreEntityResponse.class))) }) public Response restoreDatabase( @Context UriInfo uriInfo, @Context SecurityContext securityContext, + @Parameter(description = "Run the restore asynchronously. (Default = `false`)") + @QueryParam("async") + @DefaultValue("false") + boolean async, @Valid RestoreEntity restore) { - return restoreEntity(uriInfo, securityContext, restore.getId()); + return restoreEntity(uriInfo, securityContext, restore.getId(), async); } @PUT diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/databases/DatabaseSchemaResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/databases/DatabaseSchemaResource.java index 7be0e5fa9441..42e14d579c07 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/databases/DatabaseSchemaResource.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/databases/DatabaseSchemaResource.java @@ -812,7 +812,10 @@ public Response delete( @Operation( operationId = "restore", summary = "Restore a soft deleted database schema.", - description = "Restore a soft deleted database schema.", + description = + "Restore a soft deleted database schema. Pass async=true to run the restore in the" + + " background and receive a 202 Accepted response with a job id; useful when the" + + " schema contains thousands of tables.", responses = { @ApiResponse( responseCode = "200", @@ -820,13 +823,27 @@ public Response delete( content = @Content( mediaType = "application/json", - schema = @Schema(implementation = DatabaseSchema.class))) + schema = @Schema(implementation = DatabaseSchema.class))), + @ApiResponse( + responseCode = "202", + description = "Async restore started. Track completion via the jobId.", + content = + @Content( + mediaType = "application/json", + schema = + @Schema( + implementation = + org.openmetadata.service.util.RestoreEntityResponse.class))) }) public Response restoreDatabaseSchema( @Context UriInfo uriInfo, @Context SecurityContext securityContext, + @Parameter(description = "Run the restore asynchronously. (Default = `false`)") + @QueryParam("async") + @DefaultValue("false") + boolean async, @Valid RestoreEntity restore) { - return restoreEntity(uriInfo, securityContext, restore.getId()); + return restoreEntity(uriInfo, securityContext, restore.getId(), async); } @PUT diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/databases/StoredProcedureResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/databases/StoredProcedureResource.java index 760da9de6b55..ee00edcd1d86 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/databases/StoredProcedureResource.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/databases/StoredProcedureResource.java @@ -600,7 +600,9 @@ public Response delete( @Operation( operationId = "restore", summary = "Restore a soft deleted stored procedure.", - description = "Restore a soft deleted stored procedure.", + description = + "Restore a soft deleted stored procedure. Pass async=true to run the restore in the" + + " background and receive a 202 Accepted response with a job id.", responses = { @ApiResponse( responseCode = "200", @@ -608,12 +610,26 @@ public Response delete( content = @Content( mediaType = "application/json", - schema = @Schema(implementation = StoredProcedure.class))) + schema = @Schema(implementation = StoredProcedure.class))), + @ApiResponse( + responseCode = "202", + description = "Async restore started. Track completion via the jobId.", + content = + @Content( + mediaType = "application/json", + schema = + @Schema( + implementation = + org.openmetadata.service.util.RestoreEntityResponse.class))) }) public Response restoreStoredProcedure( @Context UriInfo uriInfo, @Context SecurityContext securityContext, + @Parameter(description = "Run the restore asynchronously. (Default = `false`)") + @QueryParam("async") + @DefaultValue("false") + boolean async, @Valid RestoreEntity restore) { - return restoreEntity(uriInfo, securityContext, restore.getId()); + return restoreEntity(uriInfo, securityContext, restore.getId(), async); } } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/databases/TableResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/databases/TableResource.java index fea914d1726f..cef472f41000 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/databases/TableResource.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/databases/TableResource.java @@ -783,7 +783,9 @@ public Response deleteByFqn( @Operation( operationId = "restore", summary = "Restore a soft deleted table", - description = "Restore a soft deleted table.", + description = + "Restore a soft deleted table. Pass async=true to run the restore in the background" + + " and receive a 202 Accepted response with a job id.", responses = { @ApiResponse( responseCode = "200", @@ -791,13 +793,27 @@ public Response deleteByFqn( content = @Content( mediaType = "application/json", - schema = @Schema(implementation = Table.class))) + schema = @Schema(implementation = Table.class))), + @ApiResponse( + responseCode = "202", + description = "Async restore started. Track completion via the jobId.", + content = + @Content( + mediaType = "application/json", + schema = + @Schema( + implementation = + org.openmetadata.service.util.RestoreEntityResponse.class))) }) public Response restoreTable( @Context UriInfo uriInfo, @Context SecurityContext securityContext, + @Parameter(description = "Run the restore asynchronously. (Default = `false`)") + @QueryParam("async") + @DefaultValue("false") + boolean async, @Valid RestoreEntity restore) { - return restoreEntity(uriInfo, securityContext, restore.getId()); + return restoreEntity(uriInfo, securityContext, restore.getId(), async); } @PUT diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/datamodels/DashboardDataModelResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/datamodels/DashboardDataModelResource.java index 8a69e00aa088..e27d43f34839 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/datamodels/DashboardDataModelResource.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/datamodels/DashboardDataModelResource.java @@ -625,7 +625,9 @@ public Response delete( @Operation( operationId = "restore", summary = "Restore a soft deleted data model.", - description = "Restore a soft deleted data model.", + description = + "Restore a soft deleted data model. Pass async=true to run the restore in the" + + " background and receive a 202 Accepted response with a job id.", responses = { @ApiResponse( responseCode = "200", @@ -633,13 +635,27 @@ public Response delete( content = @Content( mediaType = "application/json", - schema = @Schema(implementation = DashboardDataModel.class))) + schema = @Schema(implementation = DashboardDataModel.class))), + @ApiResponse( + responseCode = "202", + description = "Async restore started. Track completion via the jobId.", + content = + @Content( + mediaType = "application/json", + schema = + @Schema( + implementation = + org.openmetadata.service.util.RestoreEntityResponse.class))) }) public Response restoreDataModel( @Context UriInfo uriInfo, @Context SecurityContext securityContext, + @Parameter(description = "Run the restore asynchronously. (Default = `false`)") + @QueryParam("async") + @DefaultValue("false") + boolean async, @Valid RestoreEntity restore) { - return restoreEntity(uriInfo, securityContext, restore.getId()); + return restoreEntity(uriInfo, securityContext, restore.getId(), async); } @GET diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/services/database/DatabaseServiceResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/services/database/DatabaseServiceResource.java index 394a145c493f..fc93f9384842 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/services/database/DatabaseServiceResource.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/services/database/DatabaseServiceResource.java @@ -769,7 +769,10 @@ public Response delete( @Operation( operationId = "restore", summary = "Restore a soft deleted database service", - description = "Restore a soft deleted database service.", + description = + "Restore a soft deleted database service. Pass async=true to run the restore in the" + + " background and receive a 202 Accepted response with a job id; strongly" + + " recommended for services that contain many databases / schemas / tables.", responses = { @ApiResponse( responseCode = "200", @@ -777,13 +780,27 @@ public Response delete( content = @Content( mediaType = "application/json", - schema = @Schema(implementation = DatabaseService.class))) + schema = @Schema(implementation = DatabaseService.class))), + @ApiResponse( + responseCode = "202", + description = "Async restore started. Track completion via the jobId.", + content = + @Content( + mediaType = "application/json", + schema = + @Schema( + implementation = + org.openmetadata.service.util.RestoreEntityResponse.class))) }) public Response restoreDatabaseService( @Context UriInfo uriInfo, @Context SecurityContext securityContext, + @Parameter(description = "Run the restore asynchronously. (Default = `false`)") + @QueryParam("async") + @DefaultValue("false") + boolean async, @Valid RestoreEntity restore) { - return restoreEntity(uriInfo, securityContext, restore.getId()); + return restoreEntity(uriInfo, securityContext, restore.getId(), async); } @Override diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/storages/ContainerResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/storages/ContainerResource.java index 7253558c5979..5f27763ed53f 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/storages/ContainerResource.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/storages/ContainerResource.java @@ -621,7 +621,10 @@ public Response delete( @Operation( operationId = "restore", summary = "Restore a soft deleted Container.", - description = "Restore a soft deleted Container.", + description = + "Restore a soft deleted Container. Pass async=true to run the restore in the background" + + " and receive a 202 Accepted response with a job id; useful for deep container" + + " hierarchies.", responses = { @ApiResponse( responseCode = "200", @@ -629,13 +632,27 @@ public Response delete( content = @Content( mediaType = "application/json", - schema = @Schema(implementation = Container.class))) + schema = @Schema(implementation = Container.class))), + @ApiResponse( + responseCode = "202", + description = "Async restore started. Track completion via the jobId.", + content = + @Content( + mediaType = "application/json", + schema = + @Schema( + implementation = + org.openmetadata.service.util.RestoreEntityResponse.class))) }) public Response restoreContainer( @Context UriInfo uriInfo, @Context SecurityContext securityContext, + @Parameter(description = "Run the restore asynchronously. (Default = `false`)") + @QueryParam("async") + @DefaultValue("false") + boolean async, @Valid RestoreEntity restore) { - return restoreEntity(uriInfo, securityContext, restore.getId()); + return restoreEntity(uriInfo, securityContext, restore.getId(), async); } @PUT diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/socket/WebSocketManager.java b/openmetadata-service/src/main/java/org/openmetadata/service/socket/WebSocketManager.java index bbc2281a3ded..c6f0757646ed 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/socket/WebSocketManager.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/socket/WebSocketManager.java @@ -36,6 +36,7 @@ public class WebSocketManager { public static final String BULK_ASSETS_CHANNEL = "bulkAssetsChannel"; public static final String DELETE_ENTITY_CHANNEL = "deleteEntityChannel"; + public static final String RESTORE_ENTITY_CHANNEL = "restoreEntityChannel"; public static final String MOVE_GLOSSARY_TERM_CHANNEL = "moveGlossaryTermChannel"; public static final String RDF_INDEX_JOB_BROADCAST_CHANNEL = "rdfIndexJobStatus"; public static final String CHART_DATA_STREAM_CHANNEL = "chartDataStream"; diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/util/RestoreEntityMessage.java b/openmetadata-service/src/main/java/org/openmetadata/service/util/RestoreEntityMessage.java new file mode 100644 index 000000000000..7e3d8216fe77 --- /dev/null +++ b/openmetadata-service/src/main/java/org/openmetadata/service/util/RestoreEntityMessage.java @@ -0,0 +1,32 @@ +/* + * Copyright 2026 Collate + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.openmetadata.service.util; + +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; + +@NoArgsConstructor +public class RestoreEntityMessage { + @Getter @Setter private String jobId; + @Getter @Setter private String status; + @Getter @Setter private String entityName; + @Getter @Setter private String error; + + public RestoreEntityMessage(String jobId, String status, String entityName, String error) { + this.jobId = jobId; + this.status = status; + this.entityName = entityName; + this.error = error; + } +} diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/util/RestoreEntityResponse.java b/openmetadata-service/src/main/java/org/openmetadata/service/util/RestoreEntityResponse.java new file mode 100644 index 000000000000..b55519478cf4 --- /dev/null +++ b/openmetadata-service/src/main/java/org/openmetadata/service/util/RestoreEntityResponse.java @@ -0,0 +1,34 @@ +/* + * Copyright 2026 Collate + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.openmetadata.service.util; + +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; + +/** + * Response shape for an async restore request. Returned with HTTP 202 when a client passes + * {@code async=true} to the restore endpoint. The {@code jobId} can be used to correlate + * subsequent WebSocket notifications on + * {@link org.openmetadata.service.socket.WebSocketManager#RESTORE_ENTITY_CHANNEL}. + */ +@NoArgsConstructor +public class RestoreEntityResponse { + @Getter @Setter private String jobId; + @Getter @Setter private String message; + + public RestoreEntityResponse(String jobId, String message) { + this.jobId = jobId; + this.message = message; + } +} diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/util/WebsocketNotificationHandler.java b/openmetadata-service/src/main/java/org/openmetadata/service/util/WebsocketNotificationHandler.java index 2705fd8a0489..554b87aadbec 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/util/WebsocketNotificationHandler.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/util/WebsocketNotificationHandler.java @@ -381,6 +381,40 @@ public static void sendDeleteOperationFailedNotification( } } + public static void sendRestoreOperationCompleteNotification( + String jobId, SecurityContext securityContext, EntityInterface entity) { + RestoreEntityMessage message = + new RestoreEntityMessage(jobId, "COMPLETED", entity.getName(), null); + String jsonMessage = JsonUtils.pojoToJson(message); + UUID userId = getUserIdFromSecurityContext(securityContext); + LOG.info( + "[AsyncRestore] Restore operation completed - jobId: {}, userId: {}, entity: {}", + jobId, + userId, + entity.getName()); + if (userId != null) { + WebSocketManager.getInstance() + .sendToOne(userId, WebSocketManager.RESTORE_ENTITY_CHANNEL, jsonMessage); + } + } + + public static void sendRestoreOperationFailedNotification( + String jobId, SecurityContext securityContext, String entityName, String error) { + RestoreEntityMessage message = new RestoreEntityMessage(jobId, "FAILED", entityName, error); + String jsonMessage = JsonUtils.pojoToJson(message); + UUID userId = getUserIdFromSecurityContext(securityContext); + LOG.error( + "[AsyncRestore] Restore operation failed - jobId: {}, userId: {}, entity: {}, error: {}", + jobId, + userId, + entityName, + error); + if (userId != null) { + WebSocketManager.getInstance() + .sendToOne(userId, WebSocketManager.RESTORE_ENTITY_CHANNEL, jsonMessage); + } + } + public static void sendMoveOperationCompleteNotification( String jobId, SecurityContext securityContext, EntityInterface entity) { MoveGlossaryTermMessage message = diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java new file mode 100644 index 000000000000..38f803a80dd6 --- /dev/null +++ b/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java @@ -0,0 +1,195 @@ +/* + * Copyright 2026 Collate + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.openmetadata.service.jdbi3; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.atLeastOnce; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.mockStatic; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.UUID; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.mockito.ArgumentCaptor; +import org.mockito.MockedStatic; +import org.openmetadata.schema.entity.data.Pipeline; +import org.openmetadata.service.Entity; +import org.openmetadata.service.util.EntityUtil.Fields; +import org.openmetadata.service.util.EntityUtil.RelationIncludes; + +/** + * Unit tests for the iterative bulk restore path introduced for issue #4003. Verifies that + * {@link EntityRepository#restoreChildren(UUID, String)} groups children by entity type and + * dispatches a single {@link EntityRepository#bulkRestoreSubtree(List, String)} call per type + * (instead of N recursive {@code Entity.restoreEntity} calls), and that the bulk path skips + * empty inputs and invokes the {@code restoreAdditionalChildren} extension hook once per + * restored entity. + */ +class EntityRepositoryRestoreTest { + + private CollectionDAO daoCollection; + private CollectionDAO.EntityRelationshipDAO relationshipDAO; + private CollectionDAO.PipelineDAO pipelineDAO; + + private static class CountingPipelineRepo extends EntityRepository { + int restoreAdditionalChildrenCalls = 0; + final Set bulkRestoreInvokedWith = new HashSet<>(); + + CountingPipelineRepo(CollectionDAO.PipelineDAO dao) { + super("pipelines", Entity.PIPELINE, Pipeline.class, dao, "", ""); + } + + @Override + protected void setFields(Pipeline entity, Fields fields, RelationIncludes r) {} + + @Override + protected void clearFields(Pipeline entity, Fields fields) {} + + @Override + protected void prepare(Pipeline entity, boolean update) {} + + @Override + protected void storeEntity(Pipeline entity, boolean update) {} + + @Override + protected void storeRelationships(Pipeline entity) {} + + @Override + protected void restoreAdditionalChildren(UUID id, String updatedBy) { + restoreAdditionalChildrenCalls++; + bulkRestoreInvokedWith.add(id); + } + } + + @BeforeEach + void setUp() { + daoCollection = mock(CollectionDAO.class); + relationshipDAO = mock(CollectionDAO.EntityRelationshipDAO.class); + pipelineDAO = mock(CollectionDAO.PipelineDAO.class); + when(daoCollection.relationshipDAO()).thenReturn(relationshipDAO); + Entity.setCollectionDAO(daoCollection); + } + + @AfterEach + void tearDown() { + Entity.setCollectionDAO(null); + } + + @Test + void restoreChildren_withNoChildren_isNoOp() { + CountingPipelineRepo repo = new CountingPipelineRepo(pipelineDAO); + UUID parentId = UUID.randomUUID(); + when(relationshipDAO.findTo(eq(parentId), eq(Entity.PIPELINE), anyInt())).thenReturn(List.of()); + + repo.restoreChildren(parentId, "user"); + + verify(relationshipDAO).findTo(eq(parentId), eq(Entity.PIPELINE), anyInt()); + assertEquals(0, repo.restoreAdditionalChildrenCalls); + } + + @Test + void restoreChildren_groupsByTypeAndDispatchesOnceEach() { + CountingPipelineRepo repo = new CountingPipelineRepo(pipelineDAO); + UUID parentId = UUID.randomUUID(); + + UUID schemaA = UUID.randomUUID(); + UUID schemaB = UUID.randomUUID(); + UUID procA = UUID.randomUUID(); + + List children = new ArrayList<>(); + children.add(record(schemaA, Entity.DATABASE_SCHEMA)); + children.add(record(schemaB, Entity.DATABASE_SCHEMA)); + children.add(record(procA, Entity.STORED_PROCEDURE)); + when(relationshipDAO.findTo(eq(parentId), eq(Entity.PIPELINE), anyInt())).thenReturn(children); + + EntityRepository schemaRepo = mock(EntityRepository.class); + EntityRepository procRepo = mock(EntityRepository.class); + + try (MockedStatic entityMock = mockStatic(Entity.class)) { + entityMock + .when(() -> Entity.getEntityRepository(Entity.DATABASE_SCHEMA)) + .thenReturn(schemaRepo); + entityMock + .when(() -> Entity.getEntityRepository(Entity.STORED_PROCEDURE)) + .thenReturn(procRepo); + + repo.restoreChildren(parentId, "user"); + } + + ArgumentCaptor> schemaIds = captureUuidList(); + verify(schemaRepo, times(1)).bulkRestoreSubtree(schemaIds.capture(), eq("user")); + assertEquals(2, schemaIds.getValue().size()); + assertTrue(schemaIds.getValue().contains(schemaA)); + assertTrue(schemaIds.getValue().contains(schemaB)); + + ArgumentCaptor> procIds = captureUuidList(); + verify(procRepo, times(1)).bulkRestoreSubtree(procIds.capture(), eq("user")); + assertEquals(1, procIds.getValue().size()); + assertTrue(procIds.getValue().contains(procA)); + + verify(schemaRepo, never()).restoreEntity(eq("user"), eq(schemaA)); + verify(schemaRepo, never()).restoreEntity(eq("user"), eq(schemaB)); + verify(procRepo, never()).restoreEntity(eq("user"), eq(procA)); + } + + @Test + void bulkRestoreSubtree_emptyOrNullIds_isNoOp() { + CountingPipelineRepo repo = new CountingPipelineRepo(pipelineDAO); + + repo.bulkRestoreSubtree(null, "user"); + repo.bulkRestoreSubtree(List.of(), "user"); + + verify(pipelineDAO, never()) + .findEntitiesByIds(anyList(), eq(org.openmetadata.schema.type.Include.DELETED)); + assertEquals(0, repo.restoreAdditionalChildrenCalls); + } + + @Test + void bulkRestoreSubtree_noDeletedEntitiesFound_isNoOp() { + CountingPipelineRepo repo = new CountingPipelineRepo(pipelineDAO); + UUID id = UUID.randomUUID(); + when(pipelineDAO.findEntitiesByIds(anyList(), eq(org.openmetadata.schema.type.Include.DELETED))) + .thenReturn(List.of()); + + repo.bulkRestoreSubtree(List.of(id), "user"); + + verify(pipelineDAO, atLeastOnce()) + .findEntitiesByIds(anyList(), eq(org.openmetadata.schema.type.Include.DELETED)); + assertEquals(0, repo.restoreAdditionalChildrenCalls); + } + + private CollectionDAO.EntityRelationshipRecord record(UUID id, String type) { + return CollectionDAO.EntityRelationshipRecord.builder().id(id).type(type).build(); + } + + @SuppressWarnings("unchecked") + private static ArgumentCaptor> captureUuidList() { + return ArgumentCaptor.forClass(List.class); + } + + private static List anyList() { + return org.mockito.ArgumentMatchers.anyList(); + } +} From 8940b222c283637fbf1595c9789045dc0f894cf6 Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Fri, 8 May 2026 11:50:07 -0700 Subject: [PATCH 02/38] perf(restore,delete): batched per-level findTo + bulk soft-delete cascade MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two follow-up improvements to the bulk restore introduced for #4003. Batched findTo per tree level: bulkRestoreSubtree previously issued one findTo per parent during recursion, which at the schemas → tables level of a 12k-table database meant 12k DB round trips just to enumerate children. The new bulkRestoreContainedChildren helper does one findToBatchAllTypes per tree level regardless of fan-out, then groups the results by child type and dispatches to each repo's bulkRestoreSubtree. DashboardRepository's chart-restore logic moves from the now-bypassed restoreChildren override to the existing restoreAdditionalChildren extension hook so it still runs both for direct dashboard restores and when dashboards are descendants of a larger restore. Symmetric bulk soft-delete cascade: deleteByName/deleteById had the same per-entity recursion that this PR fixed for restore — soft- deleting a database with 12k tables ran 12k recursive Entity.deleteEntity calls, each writing one row + one ES update + one change event. New bulkSoftDeleteSubtree mirrors bulkRestoreSubtree: one batched findToBatchAllTypes per level, deferred-store DB writes, batched version history, batched change events, batched cache invalidation; per-descendant ES writes are skipped because the existing deleteFromSearch cascade flips the deleted flag on descendant indexes in one update_by_query. deleteChildren(List, hardDelete=false, ...) now dispatches to the bulk path; hard-delete keeps the existing batchDeleteChildren path. New softDeleteAdditionalChildren extension hook mirrors restoreAdditionalChildren; DashboardRepository's chart soft-delete migrates onto it for the same reason. Tests: extends EntityRepositoryRestoreTest with cases that verify findToBatchAllTypes is invoked exactly once per level (not once per parent) for both bulk operations, plus the existing grouping/dispatch shape for the soft-delete entry point. Extends RestoreHierarchyIT with a recursive soft-delete cascade assertion. Co-Authored-By: Claude Opus 4.7 --- .../it/tests/RestoreHierarchyIT.java | 27 ++ .../service/jdbi3/DashboardRepository.java | 34 ++- .../service/jdbi3/EntityRepository.java | 232 +++++++++++++++++- .../jdbi3/EntityRepositoryRestoreTest.java | 123 +++++++++- 4 files changed, 382 insertions(+), 34 deletions(-) diff --git a/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/RestoreHierarchyIT.java b/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/RestoreHierarchyIT.java index e518ae45feec..5490f3559c7c 100644 --- a/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/RestoreHierarchyIT.java +++ b/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/RestoreHierarchyIT.java @@ -76,6 +76,33 @@ void syncRestore_restoresFullHierarchy(TestNamespace ns) { assertHierarchyRestored(h); } + @Test + void recursiveSoftDelete_marksFullSubtreeDeletedInOnePassPerType(TestNamespace ns) { + Hierarchy h = createHierarchy(ns, "softdel"); + Map recursiveDelete = new HashMap<>(); + recursiveDelete.put("recursive", "true"); + SdkClients.adminClient().databases().delete(h.database.getId().toString(), recursiveDelete); + + OpenMetadataClient client = SdkClients.adminClient(); + Database deletedDb = + client.databases().get(h.database.getId().toString(), "deleted", Include.ALL.value()); + assertTrue(Boolean.TRUE.equals(deletedDb.getDeleted())); + + for (DatabaseSchema schema : h.schemas) { + DatabaseSchema fetched = + client.databaseSchemas().get(schema.getId().toString(), "deleted", Include.ALL.value()); + assertTrue( + Boolean.TRUE.equals(fetched.getDeleted()), + "schema " + schema.getName() + " was not soft-deleted via the bulk cascade"); + } + for (Table table : h.tables) { + Table fetched = client.tables().get(table.getId().toString(), "deleted", Include.ALL.value()); + assertTrue( + Boolean.TRUE.equals(fetched.getDeleted()), + "table " + table.getName() + " was not soft-deleted via the bulk cascade"); + } + } + @Test void asyncRestore_returns202AndRestoresFullHierarchy(TestNamespace ns) { Hierarchy h = createHierarchy(ns, "async"); diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/DashboardRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/DashboardRepository.java index d7a3d0507f9e..c8f89a1cf76a 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/DashboardRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/DashboardRepository.java @@ -209,14 +209,14 @@ public void clearFields(Dashboard dashboard, Fields fields) { fields.contains("usageSummary") ? dashboard.getUsageSummary() : null); } - // Override soft delete behavior to handle charts through HAS relation. + // Soft-delete chart links (HAS relation). The CONTAINS subtree is handled by the bulk + // path in EntityRepository.bulkSoftDeleteSubtree; chart handling is a per-dashboard + // concern and lives in the per-entity extension hook so it runs both for direct dashboard + // deletes and when dashboards are descendants of a larger soft-delete (e.g., + // DashboardService cascade). @Transaction @Override - protected void deleteChildren( - UUID dashboardId, boolean recursive, boolean hardDelete, String updatedBy) { - super.deleteChildren(dashboardId, recursive, hardDelete, updatedBy); - - // Load all charts linked to this dashboard + protected void softDeleteAdditionalChildren(UUID dashboardId, String updatedBy) { List chartRecords = daoCollection .relationshipDAO() @@ -225,7 +225,6 @@ protected void deleteChildren( return; } - // Batch-load dashboard relationships for these charts List dashboardRelationships = daoCollection .relationshipDAO() @@ -248,11 +247,10 @@ protected void deleteChildren( Include.NON_DELETED) .stream() .map(Dashboard::getId) - .filter(id -> !id.equals(dashboardId)) // (excluding the current dashboard + .filter(id -> !id.equals(dashboardId)) .collect(Collectors.toSet()); - // For deletion: get charts whose linked dashboards (excluding the current dashboard) - // have no other non‑deleted dashboards. + // Soft-delete charts whose only remaining dashboard is the one being deleted. List filteredChartRecordsToBeDeleted = new ArrayList<>(); @@ -274,16 +272,15 @@ protected void deleteChildren( } } - deleteChildren(filteredChartRecordsToBeDeleted, hardDelete, updatedBy); + deleteChildren(filteredChartRecordsToBeDeleted, false, updatedBy); } - // Override restore behavior to handle charts through HAS relation. + // Restore chart links (HAS relation). The CONTAINS subtree is now restored by the bulk + // path in EntityRepository.bulkRestoreSubtree; chart handling is a per-dashboard concern + // and lives in the per-entity extension hook. @Transaction @Override - protected void restoreChildren(UUID dashboardId, String updatedBy) { - super.restoreChildren(dashboardId, updatedBy); - - // Load all charts linked to this dashboard + protected void restoreAdditionalChildren(UUID dashboardId, String updatedBy) { List chartRecords = daoCollection .relationshipDAO() @@ -292,7 +289,6 @@ protected void restoreChildren(UUID dashboardId, String updatedBy) { return; } - // Batch-load dashboard relationships for these charts List dashboardRelationships = daoCollection .relationshipDAO() @@ -315,11 +311,9 @@ protected void restoreChildren(UUID dashboardId, String updatedBy) { Include.DELETED) .stream() .map(Dashboard::getId) - .filter(id -> !id.equals(dashboardId)) // (excluding the current dashboard + .filter(id -> !id.equals(dashboardId)) .collect(Collectors.toSet()); - // For restore: get charts whose linked dashboards (excluding the current dashboard) - // are all non‑deleted. List filteredChartRecordsToBeRestored = new ArrayList<>(); diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java index 6a1d2781ec46..49ab733bc88d 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java @@ -4192,18 +4192,36 @@ protected void deleteChildren(UUID id, boolean recursive, boolean hardDelete, St @Transaction protected void deleteChildren( List children, boolean hardDelete, String updatedBy) { - // Use batch deletion only for hard deletes with large numbers of children - // For soft deletes, we must maintain the correct order for restoration to work properly - if (hardDelete && children.size() > 100) { + if (children.isEmpty()) { + return; + } + // Soft delete dispatches to the per-type bulk path that mirrors bulkRestoreSubtree — + // one batched DB write + one batched change-event insert per type, regardless of + // descendant count. The per-type ES cascade in deleteFromSearch handles index updates. + if (!hardDelete) { + Map> idsByType = + children.stream() + .collect( + Collectors.groupingBy( + EntityRelationshipRecord::getType, + Collectors.mapping( + EntityRelationshipRecord::getId, Collectors.toList()))); + for (var entry : idsByType.entrySet()) { + EntityRepository repo = Entity.getEntityRepository(entry.getKey()); + repo.bulkSoftDeleteSubtree(entry.getValue(), updatedBy); + } + return; + } + // Hard delete keeps the existing batch-vs-sequential split: batchDeleteChildren only + // for >100 children (cleanup() per child has its own JDBI transaction; see the + // failure-semantics note on processDeletionBatch). + if (children.size() > 100) { LOG.info("Using batch deletion for {} children entities", children.size()); batchDeleteChildren(children, hardDelete, updatedBy); } else { - // For soft deletes or small numbers, use original sequential deletion - // This ensures proper parent-child relationships are maintained for restoration for (EntityRelationshipRecord entityRelationshipRecord : children) { LOG.info( - "Recursively {} deleting {} {}", - hardDelete ? "hard" : "soft", + "Recursively hard deleting {} {}", entityRelationshipRecord.getType(), entityRelationshipRecord.getId()); Entity.deleteEntity( @@ -5566,8 +5584,9 @@ protected void restoreChildren(UUID id, String updatedBy) { * cascade the deleted flag flip across child indexes in a single ES update_by_query. * *

Subclasses that link non-CONTAINS related entities (e.g., charts attached to dashboards - * via HAS) should override {@link #restoreChildren(UUID, String)} or implement the - * {@link #restoreAdditionalChildren(UUID, String)} hook. + * via HAS) should implement the {@link #restoreAdditionalChildren(UUID, String)} hook — + * the CONTAINS subtree is restored by the bulk path itself, so per-entity overrides of + * {@code restoreChildren} are no longer invoked from inside the bulk walk. */ @Transaction public final void bulkRestoreSubtree(List ids, String updatedBy) { @@ -5582,9 +5601,7 @@ public final void bulkRestoreSubtree(List ids, String updatedBy) { return; } - for (T entity : deletedEntities) { - restoreChildren(entity.getId(), updatedBy); - } + bulkRestoreContainedChildren(deletedEntities, updatedBy); long now = System.currentTimeMillis(); List updaters = new ArrayList<>(deletedEntities.size()); @@ -5651,6 +5668,42 @@ private void runRestoreAdditionalChildren(List entities, String updatedBy) { } } + /** + * Find all CONTAINS children for every entity in {@code parents} with one batched query, then + * dispatch grouped child IDs to each child type's {@link #bulkRestoreSubtree(List, String)}. + * Replaces the per-parent {@code findTo} round-trip that used to fire once per descendant — + * for a 12k-table database that's 12k DB hits collapsed into one per tree level. + */ + private void bulkRestoreContainedChildren(List parents, String updatedBy) { + List parentIds = new ArrayList<>(parents.size()); + for (T parent : parents) { + parentIds.add(parent.getId().toString()); + } + List relationships; + try (var ignored = phase("bulkRestoreFindChildren")) { + relationships = + daoCollection + .relationshipDAO() + .findToBatchAllTypes(parentIds, Relationship.CONTAINS.ordinal(), ALL); + } + if (relationships.isEmpty()) { + return; + } + Map> idsByChildType = new HashMap<>(); + for (var rel : relationships) { + if (!entityType.equals(rel.getFromEntity())) { + continue; + } + idsByChildType + .computeIfAbsent(rel.getToEntity(), k -> new ArrayList<>()) + .add(UUID.fromString(rel.getToId())); + } + for (var entry : idsByChildType.entrySet()) { + EntityRepository repo = Entity.getEntityRepository(entry.getKey()); + repo.bulkRestoreSubtree(entry.getValue(), updatedBy); + } + } + /** * Hook called once per restored entity for repositories that have non-CONTAINS related * entities that need to be restored alongside the parent. Default: no-op. @@ -5659,6 +5712,161 @@ protected void restoreAdditionalChildren(UUID id, String updatedBy) { // No-op. Override in subclasses for HAS-style related-entity restore. } + /** + * Bulk soft-delete the given entities of this repository's type along with their CONTAINS + * subtree. Symmetric to {@link #bulkRestoreSubtree(List, String)}: replaces the per-entity + * recursive {@code Entity.deleteEntity} loop in + * {@link #deleteChildren(List, boolean, String)} with a per-level batched walk that uses + * the deferred-store bulk update infrastructure. + * + *

Per-level shape: one batched {@code findToBatchAllTypes}, one batched DB load (NON + * deleted only — already-deleted entities are skipped, mirroring the per-entity guard), + * one batched {@code updateMany} that flips {@code deleted = true}, one batched version + * history insert, one batched change-event insert, one batched cache invalidation. + * Per-descendant ES writes are skipped — the top-level + * {@link #deleteFromSearch(EntityInterface, boolean)} cascade flips the deleted flag on + * descendant ES indexes in a single update_by_query. + * + *

Entity types where {@code supportsSoftDelete} is false fall back to the per-entity + * hard-delete path (matches the existing per-entity {@code delete()} fallback). Subclasses + * with non-CONTAINS linked entities should override + * {@link #softDeleteAdditionalChildren(UUID, String)}. + */ + @Transaction + public final void bulkSoftDeleteSubtree(List ids, String updatedBy) { + if (ids == null || ids.isEmpty()) { + return; + } + if (!supportsSoftDelete) { + for (UUID id : ids) { + Entity.deleteEntity(updatedBy, entityType, id, true, true); + } + return; + } + List entities; + try (var ignored = phase("bulkSoftDeleteLoad")) { + entities = find(ids, NON_DELETED); + } + if (entities.isEmpty()) { + return; + } + for (T entity : entities) { + checkSystemEntityDeletion(entity); + preDelete(entity, updatedBy); + } + + bulkSoftDeleteContainedChildren(entities, updatedBy); + + long now = System.currentTimeMillis(); + List updaters = new ArrayList<>(entities.size()); + try (var ignored = phase("bulkSoftDeleteUpdaters")) { + for (T original : entities) { + T updated = JsonUtils.readValue(JsonUtils.pojoToJson(original), entityClass); + updated.setUpdatedBy(updatedBy); + updated.setUpdatedAt(now); + updated.setDeleted(true); + EntityUpdater updater = getUpdater(original, updated, Operation.SOFT_DELETE, null); + updater.updateWithDeferredStore(); + updaters.add(updater); + } + } + + List changed = + updaters.stream().filter(u -> u.isVersionChanged() || u.isEntityChanged()).toList(); + if (changed.isEmpty()) { + runSoftDeleteAdditionalChildren(entities, updatedBy); + return; + } + + try (var ignored = phase("bulkSoftDeleteVersionHistory")) { + List historyIds = new ArrayList<>(); + List historyExtensions = new ArrayList<>(); + List historyJsons = new ArrayList<>(); + for (EntityUpdater u : changed) { + if (u.isVersionChanged()) { + historyIds.add(u.getOriginal().getId()); + historyExtensions.add( + EntityUtil.getVersionExtension(entityType, u.getOriginal().getVersion())); + historyJsons.add(JsonUtils.pojoToJson(u.getOriginal())); + } + } + if (!historyIds.isEmpty()) { + daoCollection + .entityExtensionDAO() + .insertMany(historyIds, historyExtensions, entityType, historyJsons); + } + } + + List changedEntities = changed.stream().map(EntityUpdater::getUpdated).toList(); + try (var ignored = phase("bulkSoftDeleteUpdateMany")) { + updateMany(changedEntities); + } + try (var ignored = phase("bulkSoftDeleteInvalidate")) { + invalidateMany(changedEntities); + } + try (var ignored = phase("bulkSoftDeleteChangeEvents")) { + List changeEventJsons = new ArrayList<>(); + for (EntityUpdater u : changed) { + buildChangeEventJsonForBulkOperation(u.getUpdated(), ENTITY_SOFT_DELETED, updatedBy) + .ifPresent(changeEventJsons::add); + } + insertChangeEventsBatch(changeEventJsons); + } + + ListCountCache.invalidate(entityType); + runSoftDeleteAdditionalChildren(entities, updatedBy); + } + + /** + * Mirror of {@link #bulkRestoreContainedChildren(List, String)} for soft delete: one + * batched {@code findToBatchAllTypes} per tree level, then dispatch grouped child IDs to + * each child type's {@link #bulkSoftDeleteSubtree(List, String)}. + */ + private void bulkSoftDeleteContainedChildren(List parents, String updatedBy) { + List parentIds = new ArrayList<>(parents.size()); + for (T parent : parents) { + parentIds.add(parent.getId().toString()); + } + List relationships; + try (var ignored = phase("bulkSoftDeleteFindChildren")) { + relationships = + daoCollection + .relationshipDAO() + .findToBatchAllTypes(parentIds, Relationship.CONTAINS.ordinal(), ALL); + } + if (relationships.isEmpty()) { + return; + } + Map> idsByChildType = new HashMap<>(); + for (var rel : relationships) { + if (!entityType.equals(rel.getFromEntity())) { + continue; + } + idsByChildType + .computeIfAbsent(rel.getToEntity(), k -> new ArrayList<>()) + .add(UUID.fromString(rel.getToId())); + } + for (var entry : idsByChildType.entrySet()) { + EntityRepository repo = Entity.getEntityRepository(entry.getKey()); + repo.bulkSoftDeleteSubtree(entry.getValue(), updatedBy); + } + } + + private void runSoftDeleteAdditionalChildren(List entities, String updatedBy) { + for (T entity : entities) { + softDeleteAdditionalChildren(entity.getId(), updatedBy); + } + } + + /** + * Hook called once per soft-deleted entity for repositories that have non-CONTAINS related + * entities that need to be soft-deleted alongside the parent (e.g., charts attached to + * dashboards via HAS). Default: no-op. + */ + protected void softDeleteAdditionalChildren(UUID id, String updatedBy) { + // No-op. Override in subclasses for HAS-style related-entity soft delete. + } + public final void addRelationship( UUID fromId, UUID toId, String fromEntity, String toEntity, Relationship relationship) { addRelationship(fromId, toId, fromEntity, toEntity, relationship, false); diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java index 38f803a80dd6..a9ebb14e339c 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java @@ -15,6 +15,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.ArgumentMatchers.anyInt; +import static org.mockito.ArgumentMatchers.anyList; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.atLeastOnce; import static org.mockito.Mockito.mock; @@ -35,6 +36,8 @@ import org.mockito.ArgumentCaptor; import org.mockito.MockedStatic; import org.openmetadata.schema.entity.data.Pipeline; +import org.openmetadata.schema.type.Include; +import org.openmetadata.schema.type.Relationship; import org.openmetadata.service.Entity; import org.openmetadata.service.util.EntityUtil.Fields; import org.openmetadata.service.util.EntityUtil.RelationIncludes; @@ -55,7 +58,9 @@ class EntityRepositoryRestoreTest { private static class CountingPipelineRepo extends EntityRepository { int restoreAdditionalChildrenCalls = 0; + int softDeleteAdditionalChildrenCalls = 0; final Set bulkRestoreInvokedWith = new HashSet<>(); + final Set bulkSoftDeleteInvokedWith = new HashSet<>(); CountingPipelineRepo(CollectionDAO.PipelineDAO dao) { super("pipelines", Entity.PIPELINE, Pipeline.class, dao, "", ""); @@ -81,6 +86,12 @@ protected void restoreAdditionalChildren(UUID id, String updatedBy) { restoreAdditionalChildrenCalls++; bulkRestoreInvokedWith.add(id); } + + @Override + protected void softDeleteAdditionalChildren(UUID id, String updatedBy) { + softDeleteAdditionalChildrenCalls++; + bulkSoftDeleteInvokedWith.add(id); + } } @BeforeEach @@ -180,6 +191,113 @@ void bulkRestoreSubtree_noDeletedEntitiesFound_isNoOp() { assertEquals(0, repo.restoreAdditionalChildrenCalls); } + @Test + void bulkRestoreSubtree_usesBatchedFindToOncePerLevel() { + CountingPipelineRepo repo = new CountingPipelineRepo(pipelineDAO); + UUID a = UUID.randomUUID(); + UUID b = UUID.randomUUID(); + Pipeline pa = new Pipeline().withId(a).withName("a").withFullyQualifiedName("svc.a"); + Pipeline pb = new Pipeline().withId(b).withName("b").withFullyQualifiedName("svc.b"); + when(pipelineDAO.findEntitiesByIds(anyList(), eq(Include.DELETED))) + .thenReturn(List.of(pa, pb)); + when(relationshipDAO.findToBatchAllTypes( + anyList(), eq(Relationship.CONTAINS.ordinal()), eq(Include.ALL))) + .thenReturn(List.of()); + + try { + repo.bulkRestoreSubtree(List.of(a, b), "user"); + } catch (Exception ignored) { + // Heavy DB write path requires more wiring than this unit test mocks; we only care + // that the per-level findTo collapse happened before any failure downstream. + } + + ArgumentCaptor> idsCap = captureStringList(); + verify(relationshipDAO, times(1)) + .findToBatchAllTypes( + idsCap.capture(), eq(Relationship.CONTAINS.ordinal()), eq(Include.ALL)); + assertEquals(2, idsCap.getValue().size()); + assertTrue(idsCap.getValue().contains(a.toString())); + assertTrue(idsCap.getValue().contains(b.toString())); + } + + @Test + void deleteChildren_softDelete_groupsByTypeAndDispatchesToBulkSoftDelete() { + CountingPipelineRepo repo = new CountingPipelineRepo(pipelineDAO); + + UUID schemaA = UUID.randomUUID(); + UUID schemaB = UUID.randomUUID(); + UUID procA = UUID.randomUUID(); + + List children = new ArrayList<>(); + children.add(record(schemaA, Entity.DATABASE_SCHEMA)); + children.add(record(schemaB, Entity.DATABASE_SCHEMA)); + children.add(record(procA, Entity.STORED_PROCEDURE)); + + EntityRepository schemaRepo = mock(EntityRepository.class); + EntityRepository procRepo = mock(EntityRepository.class); + + try (MockedStatic entityMock = mockStatic(Entity.class)) { + entityMock + .when(() -> Entity.getEntityRepository(Entity.DATABASE_SCHEMA)) + .thenReturn(schemaRepo); + entityMock + .when(() -> Entity.getEntityRepository(Entity.STORED_PROCEDURE)) + .thenReturn(procRepo); + + repo.deleteChildren(children, false, "user"); + } + + ArgumentCaptor> schemaIds = captureUuidList(); + verify(schemaRepo, times(1)).bulkSoftDeleteSubtree(schemaIds.capture(), eq("user")); + assertEquals(2, schemaIds.getValue().size()); + assertTrue(schemaIds.getValue().contains(schemaA)); + assertTrue(schemaIds.getValue().contains(schemaB)); + + ArgumentCaptor> procIds = captureUuidList(); + verify(procRepo, times(1)).bulkSoftDeleteSubtree(procIds.capture(), eq("user")); + assertEquals(1, procIds.getValue().size()); + assertTrue(procIds.getValue().contains(procA)); + } + + @Test + void bulkSoftDeleteSubtree_emptyOrNullIds_isNoOp() { + CountingPipelineRepo repo = new CountingPipelineRepo(pipelineDAO); + + repo.bulkSoftDeleteSubtree(null, "user"); + repo.bulkSoftDeleteSubtree(List.of(), "user"); + + verify(pipelineDAO, never()).findEntitiesByIds(anyList(), eq(Include.NON_DELETED)); + assertEquals(0, repo.softDeleteAdditionalChildrenCalls); + } + + @Test + void bulkSoftDeleteSubtree_usesBatchedFindToOncePerLevel() { + CountingPipelineRepo repo = new CountingPipelineRepo(pipelineDAO); + UUID a = UUID.randomUUID(); + UUID b = UUID.randomUUID(); + Pipeline pa = new Pipeline().withId(a).withName("a").withFullyQualifiedName("svc.a"); + Pipeline pb = new Pipeline().withId(b).withName("b").withFullyQualifiedName("svc.b"); + when(pipelineDAO.findEntitiesByIds(anyList(), eq(Include.NON_DELETED))) + .thenReturn(List.of(pa, pb)); + when(relationshipDAO.findToBatchAllTypes( + anyList(), eq(Relationship.CONTAINS.ordinal()), eq(Include.ALL))) + .thenReturn(List.of()); + + try { + repo.bulkSoftDeleteSubtree(List.of(a, b), "user"); + } catch (Exception ignored) { + // Heavy DB write path is not mocked; we verify only the per-level findTo collapse. + } + + ArgumentCaptor> idsCap = captureStringList(); + verify(relationshipDAO, times(1)) + .findToBatchAllTypes( + idsCap.capture(), eq(Relationship.CONTAINS.ordinal()), eq(Include.ALL)); + assertEquals(2, idsCap.getValue().size()); + assertTrue(idsCap.getValue().contains(a.toString())); + assertTrue(idsCap.getValue().contains(b.toString())); + } + private CollectionDAO.EntityRelationshipRecord record(UUID id, String type) { return CollectionDAO.EntityRelationshipRecord.builder().id(id).type(type).build(); } @@ -189,7 +307,8 @@ private static ArgumentCaptor> captureUuidList() { return ArgumentCaptor.forClass(List.class); } - private static List anyList() { - return org.mockito.ArgumentMatchers.anyList(); + @SuppressWarnings("unchecked") + private static ArgumentCaptor> captureStringList() { + return ArgumentCaptor.forClass(List.class); } } From 241dec37681d041d8531c17974cadc96620fd459 Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Fri, 8 May 2026 17:20:33 -0700 Subject: [PATCH 03/38] style: formatter reflows after bulk restore changes Co-Authored-By: Claude Opus 4.7 --- ingestion/src/metadata/sdk/entities/base.py | 4 +--- .../java/org/openmetadata/service/jdbi3/EntityRepository.java | 3 +-- .../service/jdbi3/EntityRepositoryRestoreTest.java | 3 +-- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/ingestion/src/metadata/sdk/entities/base.py b/ingestion/src/metadata/sdk/entities/base.py index de30fdb6d3e3..38134c126ec1 100644 --- a/ingestion/src/metadata/sdk/entities/base.py +++ b/ingestion/src/metadata/sdk/entities/base.py @@ -483,9 +483,7 @@ def restore_request(cls, entity_id: UuidLike) -> "RestoreOperation[TEntity]": # job = Table.restore_request(table_id).with_async().execute() """ - return RestoreOperation( - entity_cls=cls, entity_id=cls._stringify_identifier(entity_id) - ) + return RestoreOperation(entity_cls=cls, entity_id=cls._stringify_identifier(entity_id)) @classmethod def update_custom_properties(cls, identifier: UuidLike): diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java index 49ab733bc88d..1bcf80ee0a08 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java @@ -4204,8 +4204,7 @@ protected void deleteChildren( .collect( Collectors.groupingBy( EntityRelationshipRecord::getType, - Collectors.mapping( - EntityRelationshipRecord::getId, Collectors.toList()))); + Collectors.mapping(EntityRelationshipRecord::getId, Collectors.toList()))); for (var entry : idsByType.entrySet()) { EntityRepository repo = Entity.getEntityRepository(entry.getKey()); repo.bulkSoftDeleteSubtree(entry.getValue(), updatedBy); diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java index a9ebb14e339c..bf44d16fdba6 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java @@ -198,8 +198,7 @@ void bulkRestoreSubtree_usesBatchedFindToOncePerLevel() { UUID b = UUID.randomUUID(); Pipeline pa = new Pipeline().withId(a).withName("a").withFullyQualifiedName("svc.a"); Pipeline pb = new Pipeline().withId(b).withName("b").withFullyQualifiedName("svc.b"); - when(pipelineDAO.findEntitiesByIds(anyList(), eq(Include.DELETED))) - .thenReturn(List.of(pa, pb)); + when(pipelineDAO.findEntitiesByIds(anyList(), eq(Include.DELETED))).thenReturn(List.of(pa, pb)); when(relationshipDAO.findToBatchAllTypes( anyList(), eq(Relationship.CONTAINS.ordinal()), eq(Include.ALL))) .thenReturn(List.of()); From b3172d4b8365ac0490335996bad3214d6527721e Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Fri, 8 May 2026 18:06:46 -0700 Subject: [PATCH 04/38] review: address PR feedback on bulk restore + delete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Drop addHref(uriInfo,...) from restoreEntityAsync's lambda. uriInfo is request-scoped and may be invalidated once the 202 returns; the WebSocket notification only uses entity name, not HREFs (gitar #1). - Wire restoreAdditionalChildren / softDeleteAdditionalChildren into the per-entity restoreEntity / delete paths so direct-entity restores and soft-deletes still trigger the hook (e.g., DashboardRepository chart cleanup) — previously the hook only ran inside the bulk path (Copilot #11). - Stop wrapping OpenMetadataException in RuntimeException from the Tables/Databases fluent execute(). It already extends RuntimeException; the wrap was hiding the SDK's statusCode/errorCode (Copilot #5/#6). - Validate jobId in Python AsyncJobResponse.from_response — raise ValueError instead of silently coercing missing/empty jobIds (Copilot). - Extract dispatchToContainedChildren shared between bulkRestoreSubtree and bulkSoftDeleteSubtree (gitar #8). - Break up bulkRestoreSubtree / bulkSoftDeleteSubtree into focused phase helpers (loadForBulk, buildBulkUpdaters, filterChanged, persistBulkUpdaters, writeBulkVersionHistory, writeBulkChangeEvents) to satisfy the 15-line method guideline (gitar #9). - Update EntityRepositoryRestoreTest class Javadoc to match what the tests actually verify (Copilot). - Add Python tests for AsyncJobResponse.from_response missing/empty jobId rejection. Co-Authored-By: Claude Opus 4.7 --- ingestion/src/metadata/sdk/entities/base.py | 10 +- .../tests/unit/sdk/test_restore_async.py | 10 + .../openmetadata/sdk/fluent/Databases.java | 12 +- .../org/openmetadata/sdk/fluent/Tables.java | 12 +- .../service/jdbi3/EntityRepository.java | 252 ++++++++---------- .../service/resources/EntityResource.java | 3 +- .../jdbi3/EntityRepositoryRestoreTest.java | 24 +- 7 files changed, 151 insertions(+), 172 deletions(-) diff --git a/ingestion/src/metadata/sdk/entities/base.py b/ingestion/src/metadata/sdk/entities/base.py index 38134c126ec1..7c0a65fb99d6 100644 --- a/ingestion/src/metadata/sdk/entities/base.py +++ b/ingestion/src/metadata/sdk/entities/base.py @@ -78,10 +78,12 @@ def from_response(cls, payload: Any) -> "AsyncJobResponse": # noqa: UP037 if isinstance(payload, AsyncJobResponse): return payload if isinstance(payload, dict): - return cls( - job_id=str(payload.get("jobId", "")), - message=payload.get("message"), - ) + job_id = payload.get("jobId") + if not job_id: + raise ValueError( + f"Async response is missing a non-empty jobId: {payload!r}" + ) + return cls(job_id=str(job_id), message=payload.get("message")) raise TypeError(f"Cannot coerce {type(payload).__name__} into AsyncJobResponse") diff --git a/ingestion/tests/unit/sdk/test_restore_async.py b/ingestion/tests/unit/sdk/test_restore_async.py index ca31c42efaf8..87b48fbf2dd7 100644 --- a/ingestion/tests/unit/sdk/test_restore_async.py +++ b/ingestion/tests/unit/sdk/test_restore_async.py @@ -95,3 +95,13 @@ def test_async_job_response_from_response_passes_through_existing(): def test_async_job_response_from_response_rejects_unknown_type(): with pytest.raises(TypeError): AsyncJobResponse.from_response("not a dict") + + +def test_async_job_response_from_response_rejects_missing_job_id(): + with pytest.raises(ValueError, match="non-empty jobId"): + AsyncJobResponse.from_response({"message": "no id here"}) + + +def test_async_job_response_from_response_rejects_empty_job_id(): + with pytest.raises(ValueError, match="non-empty jobId"): + AsyncJobResponse.from_response({"jobId": "", "message": "blank"}) diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Databases.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Databases.java index 065894ef6ed4..b73132e0a7e2 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Databases.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Databases.java @@ -306,11 +306,7 @@ public AsyncDatabaseRestorer async() { } public Database execute() { - try { - return client.databases().restore(id); - } catch (org.openmetadata.sdk.exceptions.OpenMetadataException e) { - throw new RuntimeException(e); - } + return client.databases().restore(id); } } @@ -324,11 +320,7 @@ public AsyncDatabaseRestorer(OpenMetadataClient client, String id) { } public org.openmetadata.sdk.models.AsyncJobResponse execute() { - try { - return client.databases().restoreServerAsync(id); - } catch (org.openmetadata.sdk.exceptions.OpenMetadataException e) { - throw new RuntimeException(e); - } + return client.databases().restoreServerAsync(id); } } diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Tables.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Tables.java index bc1c5b94b5df..d38111c1ef77 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Tables.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Tables.java @@ -400,11 +400,7 @@ public AsyncTableRestorer async() { } public Table execute() { - try { - return client.tables().restore(id); - } catch (org.openmetadata.sdk.exceptions.OpenMetadataException e) { - throw new RuntimeException(e); - } + return client.tables().restore(id); } } @@ -418,11 +414,7 @@ public AsyncTableRestorer(OpenMetadataClient client, String id) { } public org.openmetadata.sdk.models.AsyncJobResponse execute() { - try { - return client.tables().restoreServerAsync(id); - } catch (org.openmetadata.sdk.exceptions.OpenMetadataException e) { - throw new RuntimeException(e); - } + return client.tables().restoreServerAsync(id); } } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java index 1bcf80ee0a08..6d125eeeab25 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java @@ -4116,6 +4116,10 @@ private DeleteResponse delete( EntityUpdater updater = getUpdater(original, updated, Operation.SOFT_DELETE, null); updater.update(); changeType = ENTITY_SOFT_DELETED; + // Run the same hook the bulk path runs — keeps direct-entity soft delete in sync + // with bulkSoftDeleteSubtree for repos that link non-CONTAINS entities (e.g., + // dashboard charts). + softDeleteAdditionalChildren(original.getId(), deletedBy); } else { cleanup(updated); changeType = ENTITY_DELETED; @@ -5546,6 +5550,9 @@ public final PutResponse restoreEntity(String updatedBy, UUID id) { updater.update(); // Restore moves the row from deleted=true to deleted=false, changing the listing total. ListCountCache.invalidate(entityType); + // Run the same hook the bulk path runs — keeps direct-entity restore in sync with + // bulkRestoreSubtree for repos that link non-CONTAINS entities (e.g., dashboard charts). + restoreAdditionalChildren(id, updatedBy); return new PutResponse<>(Status.OK, updated, ENTITY_RESTORED); } catch (EntityNotFoundException e) { LOG.info("Entity is not in deleted state {} {}", entityType, id); @@ -5592,71 +5599,23 @@ public final void bulkRestoreSubtree(List ids, String updatedBy) { if (ids == null || ids.isEmpty()) { return; } - List deletedEntities; - try (var ignored = phase("bulkRestoreLoad")) { - deletedEntities = find(ids, DELETED); - } + List deletedEntities = loadForBulk(ids, DELETED, "bulkRestoreLoad"); if (deletedEntities.isEmpty()) { return; } + dispatchToContainedChildren( + deletedEntities, + "bulkRestoreFindChildren", + (childRepo, childIds) -> childRepo.bulkRestoreSubtree(childIds, updatedBy)); - bulkRestoreContainedChildren(deletedEntities, updatedBy); - - long now = System.currentTimeMillis(); - List updaters = new ArrayList<>(deletedEntities.size()); - try (var ignored = phase("bulkRestoreUpdaters")) { - for (T original : deletedEntities) { - T updated = JsonUtils.readValue(JsonUtils.pojoToJson(original), entityClass); - updated.setUpdatedBy(updatedBy); - updated.setUpdatedAt(now); - EntityUpdater updater = getUpdater(original, updated, Operation.PUT, null); - updater.updateWithDeferredStore(); - updaters.add(updater); - } - } - - List changed = - updaters.stream().filter(u -> u.isVersionChanged() || u.isEntityChanged()).toList(); + List updaters = + buildBulkUpdaters(deletedEntities, updatedBy, Operation.PUT, "bulkRestoreUpdaters", null); + List changed = filterChanged(updaters); if (changed.isEmpty()) { runRestoreAdditionalChildren(deletedEntities, updatedBy); return; } - - try (var ignored = phase("bulkRestoreVersionHistory")) { - List historyIds = new ArrayList<>(); - List historyExtensions = new ArrayList<>(); - List historyJsons = new ArrayList<>(); - for (EntityUpdater u : changed) { - if (u.isVersionChanged()) { - historyIds.add(u.getOriginal().getId()); - historyExtensions.add( - EntityUtil.getVersionExtension(entityType, u.getOriginal().getVersion())); - historyJsons.add(JsonUtils.pojoToJson(u.getOriginal())); - } - } - if (!historyIds.isEmpty()) { - daoCollection - .entityExtensionDAO() - .insertMany(historyIds, historyExtensions, entityType, historyJsons); - } - } - - List changedEntities = changed.stream().map(EntityUpdater::getUpdated).toList(); - try (var ignored = phase("bulkRestoreUpdateMany")) { - updateMany(changedEntities); - } - try (var ignored = phase("bulkRestoreInvalidate")) { - invalidateMany(changedEntities); - } - try (var ignored = phase("bulkRestoreChangeEvents")) { - List changeEventJsons = new ArrayList<>(); - for (EntityUpdater u : changed) { - buildChangeEventJsonForBulkOperation(u.getUpdated(), ENTITY_RESTORED, updatedBy) - .ifPresent(changeEventJsons::add); - } - insertChangeEventsBatch(changeEventJsons); - } - + persistBulkUpdaters(changed, ENTITY_RESTORED, updatedBy, "bulkRestore"); ListCountCache.invalidate(entityType); runRestoreAdditionalChildren(deletedEntities, updatedBy); } @@ -5669,17 +5628,19 @@ private void runRestoreAdditionalChildren(List entities, String updatedBy) { /** * Find all CONTAINS children for every entity in {@code parents} with one batched query, then - * dispatch grouped child IDs to each child type's {@link #bulkRestoreSubtree(List, String)}. - * Replaces the per-parent {@code findTo} round-trip that used to fire once per descendant — - * for a 12k-table database that's 12k DB hits collapsed into one per tree level. + * apply {@code dispatcher} to each (childRepo, childIds) group. Replaces the per-parent + * {@code findTo} round-trip that used to fire once per descendant — for a 12k-table database + * that's 12k DB hits collapsed into one per tree level. Shared between bulk restore and bulk + * soft-delete; the only thing that varies is the terminal call on the child repo. */ - private void bulkRestoreContainedChildren(List parents, String updatedBy) { + private void dispatchToContainedChildren( + List parents, String phaseName, BiConsumer, List> dispatcher) { List parentIds = new ArrayList<>(parents.size()); for (T parent : parents) { parentIds.add(parent.getId().toString()); } List relationships; - try (var ignored = phase("bulkRestoreFindChildren")) { + try (var ignored = phase(phaseName)) { relationships = daoCollection .relationshipDAO() @@ -5699,7 +5660,7 @@ private void bulkRestoreContainedChildren(List parents, String updatedBy) { } for (var entry : idsByChildType.entrySet()) { EntityRepository repo = Entity.getEntityRepository(entry.getKey()); - repo.bulkRestoreSubtree(entry.getValue(), updatedBy); + dispatcher.accept(repo, entry.getValue()); } } @@ -5742,10 +5703,7 @@ public final void bulkSoftDeleteSubtree(List ids, String updatedBy) { } return; } - List entities; - try (var ignored = phase("bulkSoftDeleteLoad")) { - entities = find(ids, NON_DELETED); - } + List entities = loadForBulk(ids, NON_DELETED, "bulkSoftDeleteLoad"); if (entities.isEmpty()) { return; } @@ -5754,30 +5712,99 @@ public final void bulkSoftDeleteSubtree(List ids, String updatedBy) { preDelete(entity, updatedBy); } - bulkSoftDeleteContainedChildren(entities, updatedBy); + dispatchToContainedChildren( + entities, + "bulkSoftDeleteFindChildren", + (childRepo, childIds) -> childRepo.bulkSoftDeleteSubtree(childIds, updatedBy)); + + List updaters = + buildBulkUpdaters( + entities, + updatedBy, + Operation.SOFT_DELETE, + "bulkSoftDeleteUpdaters", + e -> e.setDeleted(true)); + List changed = filterChanged(updaters); + if (changed.isEmpty()) { + runSoftDeleteAdditionalChildren(entities, updatedBy); + return; + } + persistBulkUpdaters(changed, ENTITY_SOFT_DELETED, updatedBy, "bulkSoftDelete"); + ListCountCache.invalidate(entityType); + runSoftDeleteAdditionalChildren(entities, updatedBy); + } + + private void runSoftDeleteAdditionalChildren(List entities, String updatedBy) { + for (T entity : entities) { + softDeleteAdditionalChildren(entity.getId(), updatedBy); + } + } + + /** + * Hook called once per soft-deleted entity for repositories that have non-CONTAINS related + * entities that need to be soft-deleted alongside the parent (e.g., charts attached to + * dashboards via HAS). Default: no-op. + */ + protected void softDeleteAdditionalChildren(UUID id, String updatedBy) { + // No-op. Override in subclasses for HAS-style related-entity soft delete. + } + + // ---- Shared phase helpers used by bulkRestoreSubtree / bulkSoftDeleteSubtree ---- + + private List loadForBulk(List ids, Include include, String phaseName) { + try (var ignored = phase(phaseName)) { + return find(ids, include); + } + } + private List buildBulkUpdaters( + List originals, + String updatedBy, + Operation op, + String phaseName, + java.util.function.Consumer mutator) { long now = System.currentTimeMillis(); - List updaters = new ArrayList<>(entities.size()); - try (var ignored = phase("bulkSoftDeleteUpdaters")) { - for (T original : entities) { + List updaters = new ArrayList<>(originals.size()); + try (var ignored = phase(phaseName)) { + for (T original : originals) { T updated = JsonUtils.readValue(JsonUtils.pojoToJson(original), entityClass); updated.setUpdatedBy(updatedBy); updated.setUpdatedAt(now); - updated.setDeleted(true); - EntityUpdater updater = getUpdater(original, updated, Operation.SOFT_DELETE, null); + if (mutator != null) { + mutator.accept(updated); + } + EntityUpdater updater = getUpdater(original, updated, op, null); updater.updateWithDeferredStore(); updaters.add(updater); } } + return updaters; + } - List changed = - updaters.stream().filter(u -> u.isVersionChanged() || u.isEntityChanged()).toList(); - if (changed.isEmpty()) { - runSoftDeleteAdditionalChildren(entities, updatedBy); - return; + private List filterChanged(List updaters) { + return updaters.stream().filter(u -> u.isVersionChanged() || u.isEntityChanged()).toList(); + } + + /** + * Apply a batch of {@link EntityUpdater}s already in deferred-store state: write version + * history, persist entity rows, invalidate caches, emit change events. {@code phasePrefix} + * is used to tag latency phases (e.g. {@code "bulkRestore"} → {@code "bulkRestoreVersionHistory"}). + */ + private void persistBulkUpdaters( + List changed, EventType eventType, String userName, String phasePrefix) { + writeBulkVersionHistory(changed, phasePrefix); + List changedEntities = changed.stream().map(EntityUpdater::getUpdated).toList(); + try (var ignored = phase(phasePrefix + "UpdateMany")) { + updateMany(changedEntities); + } + try (var ignored = phase(phasePrefix + "Invalidate")) { + invalidateMany(changedEntities); } + writeBulkChangeEvents(changed, eventType, userName, phasePrefix + "ChangeEvents"); + } - try (var ignored = phase("bulkSoftDeleteVersionHistory")) { + private void writeBulkVersionHistory(List changed, String phasePrefix) { + try (var ignored = phase(phasePrefix + "VersionHistory")) { List historyIds = new ArrayList<>(); List historyExtensions = new ArrayList<>(); List historyJsons = new ArrayList<>(); @@ -5795,75 +5822,18 @@ public final void bulkSoftDeleteSubtree(List ids, String updatedBy) { .insertMany(historyIds, historyExtensions, entityType, historyJsons); } } + } - List changedEntities = changed.stream().map(EntityUpdater::getUpdated).toList(); - try (var ignored = phase("bulkSoftDeleteUpdateMany")) { - updateMany(changedEntities); - } - try (var ignored = phase("bulkSoftDeleteInvalidate")) { - invalidateMany(changedEntities); - } - try (var ignored = phase("bulkSoftDeleteChangeEvents")) { + private void writeBulkChangeEvents( + List changed, EventType eventType, String userName, String phaseName) { + try (var ignored = phase(phaseName)) { List changeEventJsons = new ArrayList<>(); for (EntityUpdater u : changed) { - buildChangeEventJsonForBulkOperation(u.getUpdated(), ENTITY_SOFT_DELETED, updatedBy) + buildChangeEventJsonForBulkOperation(u.getUpdated(), eventType, userName) .ifPresent(changeEventJsons::add); } insertChangeEventsBatch(changeEventJsons); } - - ListCountCache.invalidate(entityType); - runSoftDeleteAdditionalChildren(entities, updatedBy); - } - - /** - * Mirror of {@link #bulkRestoreContainedChildren(List, String)} for soft delete: one - * batched {@code findToBatchAllTypes} per tree level, then dispatch grouped child IDs to - * each child type's {@link #bulkSoftDeleteSubtree(List, String)}. - */ - private void bulkSoftDeleteContainedChildren(List parents, String updatedBy) { - List parentIds = new ArrayList<>(parents.size()); - for (T parent : parents) { - parentIds.add(parent.getId().toString()); - } - List relationships; - try (var ignored = phase("bulkSoftDeleteFindChildren")) { - relationships = - daoCollection - .relationshipDAO() - .findToBatchAllTypes(parentIds, Relationship.CONTAINS.ordinal(), ALL); - } - if (relationships.isEmpty()) { - return; - } - Map> idsByChildType = new HashMap<>(); - for (var rel : relationships) { - if (!entityType.equals(rel.getFromEntity())) { - continue; - } - idsByChildType - .computeIfAbsent(rel.getToEntity(), k -> new ArrayList<>()) - .add(UUID.fromString(rel.getToId())); - } - for (var entry : idsByChildType.entrySet()) { - EntityRepository repo = Entity.getEntityRepository(entry.getKey()); - repo.bulkSoftDeleteSubtree(entry.getValue(), updatedBy); - } - } - - private void runSoftDeleteAdditionalChildren(List entities, String updatedBy) { - for (T entity : entities) { - softDeleteAdditionalChildren(entity.getId(), updatedBy); - } - } - - /** - * Hook called once per soft-deleted entity for repositories that have non-CONTAINS related - * entities that need to be soft-deleted alongside the parent (e.g., charts attached to - * dashboards via HAS). Default: no-op. - */ - protected void softDeleteAdditionalChildren(UUID id, String updatedBy) { - // No-op. Override in subclasses for HAS-style related-entity soft delete. } public final void addRelationship( diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java index c0bf9cd0de28..cf8a5e3405ed 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java @@ -808,6 +808,8 @@ public Response restoreEntityAsync(UriInfo uriInfo, SecurityContext securityCont String jobId = UUID.randomUUID().toString(); String userName = securityContext.getUserPrincipal().getName(); ExecutorService executorService = AsyncService.getInstance().getExecutorService(); + // Intentionally don't capture uriInfo in the lambda — JAX-RS may invalidate it once the + // 202 response is sent. The WebSocket notification only needs name/status, not HREFs. executorService.submit( RequestLatencyContext.wrapWithContext( () -> { @@ -819,7 +821,6 @@ public Response restoreEntityAsync(UriInfo uriInfo, SecurityContext securityCont return; } repository.restoreFromSearch(response.getEntity()); - addHref(uriInfo, response.getEntity()); LOG.info( "[AsyncRestore] Restored {}:{} (jobId={})", Entity.getEntityTypeFromObject(response.getEntity()), diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java index bf44d16fdba6..669d1ec508b8 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java @@ -43,12 +43,24 @@ import org.openmetadata.service.util.EntityUtil.RelationIncludes; /** - * Unit tests for the iterative bulk restore path introduced for issue #4003. Verifies that - * {@link EntityRepository#restoreChildren(UUID, String)} groups children by entity type and - * dispatches a single {@link EntityRepository#bulkRestoreSubtree(List, String)} call per type - * (instead of N recursive {@code Entity.restoreEntity} calls), and that the bulk path skips - * empty inputs and invokes the {@code restoreAdditionalChildren} extension hook once per - * restored entity. + * Unit tests for the iterative bulk restore + bulk soft-delete paths introduced for + * issue #4003. Verifies the dispatch shape that's testable without spinning up the full + * bulk write path: + * + *

    + *
  • {@link EntityRepository#restoreChildren(UUID, String)} groups CONTAINS children by + * entity type and dispatches a single {@link EntityRepository#bulkRestoreSubtree(List, + * String)} call per type (instead of N recursive {@code Entity.restoreEntity} calls). + *
  • {@link EntityRepository#deleteChildren(List, boolean, String)} with + * {@code hardDelete=false} dispatches one {@link EntityRepository#bulkSoftDeleteSubtree( + * List, String)} call per type. + *
  • Both bulk methods bail out cleanly on null / empty inputs and on no-deleted-found. + *
  • Both bulk methods issue a single batched {@code findToBatchAllTypes} per tree level + * (replacing the per-parent {@code findTo} round-trip). + *
+ * + * The full bulk DB-write path (version history, updateMany, change events) is exercised in + * {@code RestoreHierarchyIT}, which runs against a real Docker stack. */ class EntityRepositoryRestoreTest { From 1e00abb46d6c087bc866fcd768f1b850c804f3eb Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Fri, 8 May 2026 21:28:20 -0700 Subject: [PATCH 05/38] style: ruff reflow on AsyncJobResponse jobId guard Co-Authored-By: Claude Opus 4.7 --- ingestion/src/metadata/sdk/entities/base.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ingestion/src/metadata/sdk/entities/base.py b/ingestion/src/metadata/sdk/entities/base.py index 7c0a65fb99d6..088000a4dc33 100644 --- a/ingestion/src/metadata/sdk/entities/base.py +++ b/ingestion/src/metadata/sdk/entities/base.py @@ -80,9 +80,7 @@ def from_response(cls, payload: Any) -> "AsyncJobResponse": # noqa: UP037 if isinstance(payload, dict): job_id = payload.get("jobId") if not job_id: - raise ValueError( - f"Async response is missing a non-empty jobId: {payload!r}" - ) + raise ValueError(f"Async response is missing a non-empty jobId: {payload!r}") return cls(job_id=str(job_id), message=payload.get("message")) raise TypeError(f"Cannot coerce {type(payload).__name__} into AsyncJobResponse") From 6235b867438f9ee20e7b467019bd8b6faf798cc0 Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Sat, 9 May 2026 08:23:06 -0700 Subject: [PATCH 06/38] fix(restore,delete): walk children even when parent already at target state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both bulkRestoreSubtree and bulkSoftDeleteSubtree previously loaded only DELETED / NON_DELETED entities and returned early when none matched — which dropped the children walk for mixed-state hierarchies. Old per- entity flow always called restoreChildren / deleteChildren before checking the parent's state, so a deleted descendant under an already- restored intermediate (or vice versa) would still be picked up. Switch both bulk methods to load Include.ALL, walk children regardless, then filter to entities that actually need flipping for the deferred- store update phase. The cascade now matches the pre-bulk recursive behavior. Also fixes the Python sync-restore unit tests by providing a complete Table-shaped fixture (id + name + fullyQualifiedName + columns) so pydantic_core can validate the mocked response. Co-Authored-By: Claude Opus 4.7 --- .../tests/unit/sdk/test_restore_async.py | 15 ++++++++-- .../service/jdbi3/EntityRepository.java | 30 +++++++++++++++---- .../jdbi3/EntityRepositoryRestoreTest.java | 19 ++++++------ 3 files changed, 46 insertions(+), 18 deletions(-) diff --git a/ingestion/tests/unit/sdk/test_restore_async.py b/ingestion/tests/unit/sdk/test_restore_async.py index 87b48fbf2dd7..0a014aa23554 100644 --- a/ingestion/tests/unit/sdk/test_restore_async.py +++ b/ingestion/tests/unit/sdk/test_restore_async.py @@ -28,10 +28,21 @@ def mock_client(): return client +def _table_payload(table_id: str) -> dict: + """Minimum dict shape that pydantic_core accepts as a Table.""" + return { + "id": table_id, + "name": "t", + "fullyQualifiedName": "service.db.schema.t", + "deleted": False, + "columns": [], + } + + def test_restore_sync_calls_put_without_async_param(mock_client): table_id = "b67eac63-9e43-41f5-afb9-387c85df1d8b" rest_client = mock_client.client - rest_client.put.return_value = {"id": table_id, "name": "t", "deleted": False} + rest_client.put.return_value = _table_payload(table_id) Tables.restore(table_id) @@ -59,7 +70,7 @@ def test_restore_async_appends_async_query_param(mock_client): def test_fluent_restore_request_sync_returns_entity(mock_client): table_id = "b67eac63-9e43-41f5-afb9-387c85df1d8b" rest_client = mock_client.client - rest_client.put.return_value = {"id": table_id, "name": "t", "deleted": False} + rest_client.put.return_value = _table_payload(table_id) op = Tables.restore_request(table_id) assert isinstance(op, RestoreOperation) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java index 6d125eeeab25..62ed1dd3ccd5 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java @@ -5599,15 +5599,24 @@ public final void bulkRestoreSubtree(List ids, String updatedBy) { if (ids == null || ids.isEmpty()) { return; } - List deletedEntities = loadForBulk(ids, DELETED, "bulkRestoreLoad"); - if (deletedEntities.isEmpty()) { + // Load with ALL — we still need to walk children when the parents at this level are + // already restored (or never deleted), in case deeper descendants are deleted and + // must be flipped. Matches the previous recursive path that always called + // restoreChildren before checking the parent's deleted state. + List entities = loadForBulk(ids, ALL, "bulkRestoreLoad"); + if (entities.isEmpty()) { return; } dispatchToContainedChildren( - deletedEntities, + entities, "bulkRestoreFindChildren", (childRepo, childIds) -> childRepo.bulkRestoreSubtree(childIds, updatedBy)); + List deletedEntities = + entities.stream().filter(e -> Boolean.TRUE.equals(e.getDeleted())).toList(); + if (deletedEntities.isEmpty()) { + return; + } List updaters = buildBulkUpdaters(deletedEntities, updatedBy, Operation.PUT, "bulkRestoreUpdaters", null); List changed = filterChanged(updaters); @@ -5703,20 +5712,29 @@ public final void bulkSoftDeleteSubtree(List ids, String updatedBy) { } return; } - List entities = loadForBulk(ids, NON_DELETED, "bulkSoftDeleteLoad"); - if (entities.isEmpty()) { + // Load with ALL so we still walk children even when this level's parents are already + // soft-deleted — a descendant may have been restored independently and needs to be + // re-deleted as part of the parent's cascade. Matches the previous per-entity flow + // where deleteChildren ran before the parent's deleted state mattered. + List allEntities = loadForBulk(ids, ALL, "bulkSoftDeleteLoad"); + if (allEntities.isEmpty()) { return; } + List entities = + allEntities.stream().filter(e -> !Boolean.TRUE.equals(e.getDeleted())).toList(); for (T entity : entities) { checkSystemEntityDeletion(entity); preDelete(entity, updatedBy); } dispatchToContainedChildren( - entities, + allEntities, "bulkSoftDeleteFindChildren", (childRepo, childIds) -> childRepo.bulkSoftDeleteSubtree(childIds, updatedBy)); + if (entities.isEmpty()) { + return; + } List updaters = buildBulkUpdaters( entities, diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java index 669d1ec508b8..998112c62b01 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java @@ -193,13 +193,11 @@ void bulkRestoreSubtree_emptyOrNullIds_isNoOp() { void bulkRestoreSubtree_noDeletedEntitiesFound_isNoOp() { CountingPipelineRepo repo = new CountingPipelineRepo(pipelineDAO); UUID id = UUID.randomUUID(); - when(pipelineDAO.findEntitiesByIds(anyList(), eq(org.openmetadata.schema.type.Include.DELETED))) - .thenReturn(List.of()); + when(pipelineDAO.findEntitiesByIds(anyList(), eq(Include.ALL))).thenReturn(List.of()); repo.bulkRestoreSubtree(List.of(id), "user"); - verify(pipelineDAO, atLeastOnce()) - .findEntitiesByIds(anyList(), eq(org.openmetadata.schema.type.Include.DELETED)); + verify(pipelineDAO, atLeastOnce()).findEntitiesByIds(anyList(), eq(Include.ALL)); assertEquals(0, repo.restoreAdditionalChildrenCalls); } @@ -208,9 +206,11 @@ void bulkRestoreSubtree_usesBatchedFindToOncePerLevel() { CountingPipelineRepo repo = new CountingPipelineRepo(pipelineDAO); UUID a = UUID.randomUUID(); UUID b = UUID.randomUUID(); - Pipeline pa = new Pipeline().withId(a).withName("a").withFullyQualifiedName("svc.a"); - Pipeline pb = new Pipeline().withId(b).withName("b").withFullyQualifiedName("svc.b"); - when(pipelineDAO.findEntitiesByIds(anyList(), eq(Include.DELETED))).thenReturn(List.of(pa, pb)); + Pipeline pa = + new Pipeline().withId(a).withName("a").withFullyQualifiedName("svc.a").withDeleted(true); + Pipeline pb = + new Pipeline().withId(b).withName("b").withFullyQualifiedName("svc.b").withDeleted(true); + when(pipelineDAO.findEntitiesByIds(anyList(), eq(Include.ALL))).thenReturn(List.of(pa, pb)); when(relationshipDAO.findToBatchAllTypes( anyList(), eq(Relationship.CONTAINS.ordinal()), eq(Include.ALL))) .thenReturn(List.of()); @@ -277,7 +277,7 @@ void bulkSoftDeleteSubtree_emptyOrNullIds_isNoOp() { repo.bulkSoftDeleteSubtree(null, "user"); repo.bulkSoftDeleteSubtree(List.of(), "user"); - verify(pipelineDAO, never()).findEntitiesByIds(anyList(), eq(Include.NON_DELETED)); + verify(pipelineDAO, never()).findEntitiesByIds(anyList(), eq(Include.ALL)); assertEquals(0, repo.softDeleteAdditionalChildrenCalls); } @@ -288,8 +288,7 @@ void bulkSoftDeleteSubtree_usesBatchedFindToOncePerLevel() { UUID b = UUID.randomUUID(); Pipeline pa = new Pipeline().withId(a).withName("a").withFullyQualifiedName("svc.a"); Pipeline pb = new Pipeline().withId(b).withName("b").withFullyQualifiedName("svc.b"); - when(pipelineDAO.findEntitiesByIds(anyList(), eq(Include.NON_DELETED))) - .thenReturn(List.of(pa, pb)); + when(pipelineDAO.findEntitiesByIds(anyList(), eq(Include.ALL))).thenReturn(List.of(pa, pb)); when(relationshipDAO.findToBatchAllTypes( anyList(), eq(Relationship.CONTAINS.ordinal()), eq(Include.ALL))) .thenReturn(List.of()); From 6137d0d92e4dc5b806f045f9abf7cd3d11c5048b Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Sat, 9 May 2026 08:29:25 -0700 Subject: [PATCH 07/38] review: pre-validate async restore + batch chart restore - Add a pre-check in restoreEntityAsync (cheap find(id, DELETED) before dispatch) so callers requesting a non-existent or non-deleted entity get a synchronous 404 instead of a 202 followed by a delayed FAILED WebSocket notification. - Replace the per-chart Entity.restoreEntity loop in DashboardRepository.restoreAdditionalChildren with one ChartRepository.bulkRestoreSubtree call. The soft-delete side already routes through deleteChildren -> bulkSoftDeleteSubtree. Co-Authored-By: Claude Opus 4.7 --- .../service/jdbi3/DashboardRepository.java | 10 +++++++--- .../openmetadata/service/resources/EntityResource.java | 4 ++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/DashboardRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/DashboardRepository.java index c8f89a1cf76a..b03e761a4234 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/DashboardRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/DashboardRepository.java @@ -335,10 +335,14 @@ protected void restoreAdditionalChildren(UUID dashboardId, String updatedBy) { } } - for (CollectionDAO.EntityRelationshipRecord record : filteredChartRecordsToBeRestored) { - LOG.info("Recursively restoring {} {}", record.getType(), record.getId()); - Entity.restoreEntity(updatedBy, record.getType(), record.getId()); + if (filteredChartRecordsToBeRestored.isEmpty()) { + return; } + List chartIds = + filteredChartRecordsToBeRestored.stream() + .map(CollectionDAO.EntityRelationshipRecord::getId) + .toList(); + Entity.getEntityRepository(CHART).bulkRestoreSubtree(chartIds, updatedBy); } @Override diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java index cf8a5e3405ed..200aeab01bee 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java @@ -805,6 +805,10 @@ public Response restoreEntityAsync(UriInfo uriInfo, SecurityContext securityCont OperationContext operationContext = new OperationContext(entityType, MetadataOperation.EDIT_ALL); authorizer.authorize(securityContext, operationContext, getResourceContextById(id)); + // Cheap pre-check so we return 404 synchronously instead of 202 + delayed WebSocket + // FAILED for an entity that doesn't exist or isn't soft-deleted. Avoids spinning up + // an executor task for a request that can't succeed. + repository.find(id, Include.DELETED); String jobId = UUID.randomUUID().toString(); String userName = securityContext.getUserPrincipal().getName(); ExecutorService executorService = AsyncService.getInstance().getExecutorService(); From 8d2b9e6565b88ea8d0ea3484b758d4b589c30725 Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Sat, 9 May 2026 11:07:27 -0700 Subject: [PATCH 08/38] fix(restore,delete): pre-validate async restore + batch chart restore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three review-driven fixes following CI failures: 1. Revert the chart bulk-restore in DashboardRepository back to per-chart Entity.restoreEntity. The bulk shortcut skipped chart-specific setFieldsInternal hydration that DashboardResourceIT.test_delete Dashboard_chartBelongsToSingleDashboard_chartIsDeletedThenRestored relies on; charts are typically few per dashboard, so the loop isn't a hot path. 2. Restore DashboardRepository.deleteChildren(UUID,...) override to handle the hard-delete chart cascade. The previous migration moved chart cleanup to softDeleteAdditionalChildren, which is only invoked from the soft-delete path — hard delete bypasses it. Soft delete is still routed through the hook so it runs both for direct deletes and when dashboards are descendants in a bulk soft-delete cascade. Both paths share a private cascadeChartCleanup helper that takes the hardDelete flag. 3. Capture the entity name before the async-restore dispatch and pass it to sendRestoreOperationFailedNotification, so clients receive a meaningful entityName in the WebSocket payload instead of the raw UUID string. Co-Authored-By: Claude Opus 4.7 --- .../service/jdbi3/DashboardRepository.java | 35 ++++++++++++++----- .../service/resources/EntityResource.java | 10 +++--- 2 files changed, 33 insertions(+), 12 deletions(-) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/DashboardRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/DashboardRepository.java index b03e761a4234..11c259ad5df0 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/DashboardRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/DashboardRepository.java @@ -209,6 +209,21 @@ public void clearFields(Dashboard dashboard, Fields fields) { fields.contains("usageSummary") ? dashboard.getUsageSummary() : null); } + // Hard-delete cascade for chart HAS-links. The bulk path doesn't apply to hard delete + // (cleanup() removes rows directly), so this override re-creates the previous chart + // hard-delete behavior. Soft delete is handled by softDeleteAdditionalChildren so that + // it also runs when a dashboard is a descendant of a larger soft-delete cascade. + @Transaction + @Override + protected void deleteChildren( + UUID dashboardId, boolean recursive, boolean hardDelete, String updatedBy) { + super.deleteChildren(dashboardId, recursive, hardDelete, updatedBy); + if (!hardDelete) { + return; + } + cascadeChartCleanup(dashboardId, updatedBy, true); + } + // Soft-delete chart links (HAS relation). The CONTAINS subtree is handled by the bulk // path in EntityRepository.bulkSoftDeleteSubtree; chart handling is a per-dashboard // concern and lives in the per-entity extension hook so it runs both for direct dashboard @@ -217,6 +232,10 @@ public void clearFields(Dashboard dashboard, Fields fields) { @Transaction @Override protected void softDeleteAdditionalChildren(UUID dashboardId, String updatedBy) { + cascadeChartCleanup(dashboardId, updatedBy, false); + } + + private void cascadeChartCleanup(UUID dashboardId, String updatedBy, boolean hardDelete) { List chartRecords = daoCollection .relationshipDAO() @@ -272,7 +291,7 @@ protected void softDeleteAdditionalChildren(UUID dashboardId, String updatedBy) } } - deleteChildren(filteredChartRecordsToBeDeleted, false, updatedBy); + deleteChildren(filteredChartRecordsToBeDeleted, hardDelete, updatedBy); } // Restore chart links (HAS relation). The CONTAINS subtree is now restored by the bulk @@ -335,14 +354,14 @@ protected void restoreAdditionalChildren(UUID dashboardId, String updatedBy) { } } - if (filteredChartRecordsToBeRestored.isEmpty()) { - return; + // Per-chart restore preserves the full chart restoreEntity flow (setFieldsInternal, + // setInheritedFields, lifecycle hooks, ES restore-from-search). Charts are typically + // few per dashboard, so the loop isn't a hot path; the bulkRestoreSubtree shortcut + // skipped chart-specific setup that the test in DashboardResourceIT relies on. + for (CollectionDAO.EntityRelationshipRecord record : filteredChartRecordsToBeRestored) { + LOG.info("Recursively restoring {} {}", record.getType(), record.getId()); + Entity.restoreEntity(updatedBy, record.getType(), record.getId()); } - List chartIds = - filteredChartRecordsToBeRestored.stream() - .map(CollectionDAO.EntityRelationshipRecord::getId) - .toList(); - Entity.getEntityRepository(CHART).bulkRestoreSubtree(chartIds, updatedBy); } @Override diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java index 200aeab01bee..af228f9dd3dc 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java @@ -807,8 +807,10 @@ public Response restoreEntityAsync(UriInfo uriInfo, SecurityContext securityCont authorizer.authorize(securityContext, operationContext, getResourceContextById(id)); // Cheap pre-check so we return 404 synchronously instead of 202 + delayed WebSocket // FAILED for an entity that doesn't exist or isn't soft-deleted. Avoids spinning up - // an executor task for a request that can't succeed. - repository.find(id, Include.DELETED); + // an executor task for a request that can't succeed. Capturing the entity now also + // gives us a meaningful name for any later FAILED notification. + T preCheck = repository.find(id, Include.DELETED); + String entityName = preCheck.getName() != null ? preCheck.getName() : id.toString(); String jobId = UUID.randomUUID().toString(); String userName = securityContext.getUserPrincipal().getName(); ExecutorService executorService = AsyncService.getInstance().getExecutorService(); @@ -821,7 +823,7 @@ public Response restoreEntityAsync(UriInfo uriInfo, SecurityContext securityCont PutResponse response = repository.restoreEntity(userName, id); if (response == null) { WebsocketNotificationHandler.sendRestoreOperationFailedNotification( - jobId, securityContext, id.toString(), "Entity is not in deleted state"); + jobId, securityContext, entityName, "Entity is not in deleted state"); return; } repository.restoreFromSearch(response.getEntity()); @@ -837,7 +839,7 @@ public Response restoreEntityAsync(UriInfo uriInfo, SecurityContext securityCont WebsocketNotificationHandler.sendRestoreOperationFailedNotification( jobId, securityContext, - id.toString(), + entityName, e.getMessage() == null ? e.toString() : e.getMessage()); } })); From b6357c28b84ada745cc76a35402581e68c820f51 Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Tue, 12 May 2026 14:34:44 -0700 Subject: [PATCH 09/38] fix(restore,delete): pre-validate async restore + batch chart restore Address PR review: - AsyncService.BoundedExecutorService.execute previously acquired the semaphore permit INSIDE the spawned virtual thread, so submit returned immediately and spawned an unbounded number of threads that blocked on semaphore.acquire(). Move the acquire to the submitting thread so it back-pressures the caller and actually bounds submission depth. - bulkRestoreSubtree returned early when no entities at the current level needed flipping, which skipped runRestoreAdditionalChildren and could miss HAS-related descendants under already-restored intermediate nodes. Same problem mirrored on bulkSoftDeleteSubtree. Run the hooks unconditionally over the loaded entity list after the optional update phase. - restoreEntity caught EntityNotFoundException and returned null without invoking restoreAdditionalChildren. Move the hook outside the try so re-entered cascades still reconcile HAS-related descendants even when this node is already at the target state. - Wrap Entity.restoreEntity in DashboardRepository.restoreAdditionalChildren with try/catch + LOG.error so CI surfaces the real cause when chart restoration fails. Entity.restoreEntity itself has no exception handling wrapper, and the current chartBelongsToSingleDashboard CI failure points to a silent throw aborting the dashboard restore. Co-Authored-By: Claude Opus 4.7 --- .../service/jdbi3/DashboardRepository.java | 14 +++- .../service/jdbi3/EntityRepository.java | 74 ++++++++++--------- .../service/util/AsyncService.java | 40 ++++++---- 3 files changed, 77 insertions(+), 51 deletions(-) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/DashboardRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/DashboardRepository.java index 11c259ad5df0..989d4ab80a35 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/DashboardRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/DashboardRepository.java @@ -360,7 +360,19 @@ protected void restoreAdditionalChildren(UUID dashboardId, String updatedBy) { // skipped chart-specific setup that the test in DashboardResourceIT relies on. for (CollectionDAO.EntityRelationshipRecord record : filteredChartRecordsToBeRestored) { LOG.info("Recursively restoring {} {}", record.getType(), record.getId()); - Entity.restoreEntity(updatedBy, record.getType(), record.getId()); + try { + Entity.restoreEntity(updatedBy, record.getType(), record.getId()); + } catch (RuntimeException e) { + // Surface the underlying cause — Entity.restoreEntity has no try/catch wrapper of + // its own and silently aborts the whole dashboard restore if a single chart fails. + LOG.error( + "[ChartRestoreCascade] Failed to restore chart {} for dashboard {}: {}", + record.getId(), + dashboardId, + e.getMessage(), + e); + throw e; + } } } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java index 62ed1dd3ccd5..880087a27926 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java @@ -5539,6 +5539,7 @@ public final PutResponse restoreEntity(String updatedBy, UUID id) { // Finally set entity deleted flag to false LOG.info("Restoring the {} {}", entityType, id); + PutResponse response = null; try { T original = find(id, DELETED); setFieldsInternal(original, putFields); @@ -5550,14 +5551,15 @@ public final PutResponse restoreEntity(String updatedBy, UUID id) { updater.update(); // Restore moves the row from deleted=true to deleted=false, changing the listing total. ListCountCache.invalidate(entityType); - // Run the same hook the bulk path runs — keeps direct-entity restore in sync with - // bulkRestoreSubtree for repos that link non-CONTAINS entities (e.g., dashboard charts). - restoreAdditionalChildren(id, updatedBy); - return new PutResponse<>(Status.OK, updated, ENTITY_RESTORED); + response = new PutResponse<>(Status.OK, updated, ENTITY_RESTORED); } catch (EntityNotFoundException e) { - LOG.info("Entity is not in deleted state {} {}", entityType, id); - return null; + LOG.info("Entity already restored or not in deleted state {} {}", entityType, id); } + // Run the per-entity hook regardless of whether this node needed flipping. A + // re-entered cascade where this level is already restored must still reconcile + // HAS-related children (e.g., dashboard charts) of nested descendants. + restoreAdditionalChildren(id, updatedBy); + return response; } @Transaction @@ -5614,19 +5616,19 @@ public final void bulkRestoreSubtree(List ids, String updatedBy) { List deletedEntities = entities.stream().filter(e -> Boolean.TRUE.equals(e.getDeleted())).toList(); - if (deletedEntities.isEmpty()) { - return; - } - List updaters = - buildBulkUpdaters(deletedEntities, updatedBy, Operation.PUT, "bulkRestoreUpdaters", null); - List changed = filterChanged(updaters); - if (changed.isEmpty()) { - runRestoreAdditionalChildren(deletedEntities, updatedBy); - return; + if (!deletedEntities.isEmpty()) { + List updaters = + buildBulkUpdaters(deletedEntities, updatedBy, Operation.PUT, "bulkRestoreUpdaters", null); + List changed = filterChanged(updaters); + if (!changed.isEmpty()) { + persistBulkUpdaters(changed, ENTITY_RESTORED, updatedBy, "bulkRestore"); + ListCountCache.invalidate(entityType); + } } - persistBulkUpdaters(changed, ENTITY_RESTORED, updatedBy, "bulkRestore"); - ListCountCache.invalidate(entityType); - runRestoreAdditionalChildren(deletedEntities, updatedBy); + // Always run per-entity hooks even when nothing at THIS level needed flipping — + // a re-entered cascade may still have HAS-related children attached to nested + // descendants that require reconciliation. + runRestoreAdditionalChildren(entities, updatedBy); } private void runRestoreAdditionalChildren(List entities, String updatedBy) { @@ -5732,24 +5734,24 @@ public final void bulkSoftDeleteSubtree(List ids, String updatedBy) { "bulkSoftDeleteFindChildren", (childRepo, childIds) -> childRepo.bulkSoftDeleteSubtree(childIds, updatedBy)); - if (entities.isEmpty()) { - return; - } - List updaters = - buildBulkUpdaters( - entities, - updatedBy, - Operation.SOFT_DELETE, - "bulkSoftDeleteUpdaters", - e -> e.setDeleted(true)); - List changed = filterChanged(updaters); - if (changed.isEmpty()) { - runSoftDeleteAdditionalChildren(entities, updatedBy); - return; - } - persistBulkUpdaters(changed, ENTITY_SOFT_DELETED, updatedBy, "bulkSoftDelete"); - ListCountCache.invalidate(entityType); - runSoftDeleteAdditionalChildren(entities, updatedBy); + if (!entities.isEmpty()) { + List updaters = + buildBulkUpdaters( + entities, + updatedBy, + Operation.SOFT_DELETE, + "bulkSoftDeleteUpdaters", + e -> e.setDeleted(true)); + List changed = filterChanged(updaters); + if (!changed.isEmpty()) { + persistBulkUpdaters(changed, ENTITY_SOFT_DELETED, updatedBy, "bulkSoftDelete"); + ListCountCache.invalidate(entityType); + } + } + // Always run per-entity hooks even when nothing at THIS level needed flipping — + // descendants restored independently before the cascade still need to be re-deleted + // by the per-entity hook. + runSoftDeleteAdditionalChildren(allEntities, updatedBy); } private void runSoftDeleteAdditionalChildren(List entities, String updatedBy) { diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/util/AsyncService.java b/openmetadata-service/src/main/java/org/openmetadata/service/util/AsyncService.java index 8e6d74405b1a..fcd26ae9ad6e 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/util/AsyncService.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/util/AsyncService.java @@ -251,20 +251,32 @@ private static class BoundedExecutorService extends AbstractExecutorService { @Override public void execute(Runnable command) { - delegate.execute( - () -> { - try { - semaphore.acquire(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new RuntimeException("Interrupted waiting for concurrency permit", e); - } - try { - command.run(); - } finally { - semaphore.release(); - } - }); + // Acquire the permit on the SUBMITTING thread so that a caller producing tasks faster + // than the executor can run them is back-pressured here, rather than spawning an + // unbounded number of virtual threads that all sit blocked on semaphore.acquire(). + // Without this, the maxConcurrency only limits concurrent execution; submission depth + // is unbounded and pins JDBI connections under burst load. + try { + semaphore.acquire(); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new RuntimeException("Interrupted waiting for concurrency permit", e); + } + try { + delegate.execute( + () -> { + try { + command.run(); + } finally { + semaphore.release(); + } + }); + } catch (RuntimeException submitFailure) { + // delegate.execute itself failed (e.g., rejected) — release the permit we just + // acquired so the next caller can make progress. + semaphore.release(); + throw submitFailure; + } } @Override From 96c5228a29ac7dd4f6ac0be5039aa7c3a839fbee Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Tue, 12 May 2026 14:45:13 -0700 Subject: [PATCH 10/38] review: dispatch bulk lifecycle event + style cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address remaining PR review feedback: - bulkRestoreSubtree/bulkSoftDeleteSubtree previously skipped per-entity postUpdate, so SearchIndexHandler.onEntitiesUpdated never fired for descendants. The native ES cascade in softDeleteOrRestoreEntityIndex only flips deleted on child indexes whose docs join via the parent's id field (DatabaseService→Database→Schema→Table); chart docs join via service.id not dashboard.id, so charts drifted in ES after bulk dashboard restore. Dispatch EntityLifecycleEventDispatcher.onEntities Updated from persistBulkUpdaters so SearchIndexHandler's bulk updateEntitiesIndex picks up the change. SearchIndexHandler internally batches the writes per type, so the dispatch stays bulk on the ES side. - Replace java.util.function.Consumer FQN with proper import in EntityRepository (CLAUDE.md prohibits FQN usage). - AsyncRestore failure log now includes the captured entityName alongside entityType+id so operators can correlate the failure with the WebSocket payload. Co-Authored-By: Claude Opus 4.7 --- .../service/jdbi3/EntityRepository.java | 24 +++++++++++++------ .../service/resources/EntityResource.java | 7 +++++- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java index 880087a27926..ea24c130e120 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java @@ -145,6 +145,7 @@ import java.util.concurrent.TimeUnit; import java.util.function.BiConsumer; import java.util.function.BiPredicate; +import java.util.function.Consumer; import java.util.function.Function; import java.util.function.Supplier; import java.util.stream.Collectors; @@ -5778,11 +5779,7 @@ private List loadForBulk(List ids, Include include, String phaseName) { } private List buildBulkUpdaters( - List originals, - String updatedBy, - Operation op, - String phaseName, - java.util.function.Consumer mutator) { + List originals, String updatedBy, Operation op, String phaseName, Consumer mutator) { long now = System.currentTimeMillis(); List updaters = new ArrayList<>(originals.size()); try (var ignored = phase(phaseName)) { @@ -5807,8 +5804,18 @@ private List filterChanged(List updaters) { /** * Apply a batch of {@link EntityUpdater}s already in deferred-store state: write version - * history, persist entity rows, invalidate caches, emit change events. {@code phasePrefix} - * is used to tag latency phases (e.g. {@code "bulkRestore"} → {@code "bulkRestoreVersionHistory"}). + * history, persist entity rows, invalidate caches, dispatch the bulk lifecycle event so + * the search index handler updates ES, then emit change events. {@code phasePrefix} is + * used to tag latency phases (e.g. {@code "bulkRestore"} → + * {@code "bulkRestoreVersionHistory"}). + * + *

The lifecycle dispatch is required because the top-level + * {@code restoreFromSearch}/{@code deleteFromSearch} cascade only flips the deleted flag on + * child indexes whose docs join on the parent's id field. HAS-style descendants (e.g., + * charts attached to dashboards) and entity types without a {@code parent.id} field in + * their ES mapping would otherwise drift — DB shows restored / soft-deleted, but ES still + * reflects the previous state. {@code SearchIndexHandler.onEntitiesUpdated} batches the + * writes via {@code updateEntitiesIndex}, so this is still bulk on the ES side. */ private void persistBulkUpdaters( List changed, EventType eventType, String userName, String phasePrefix) { @@ -5820,6 +5827,9 @@ private void persistBulkUpdaters( try (var ignored = phase(phasePrefix + "Invalidate")) { invalidateMany(changedEntities); } + try (var ignored = phase(phasePrefix + "LifecycleDispatch")) { + EntityLifecycleEventDispatcher.getInstance().onEntitiesUpdated(changedEntities, null, null); + } writeBulkChangeEvents(changed, eventType, userName, phasePrefix + "ChangeEvents"); } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java index af228f9dd3dc..629fc0c423e0 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java @@ -835,7 +835,12 @@ public Response restoreEntityAsync(UriInfo uriInfo, SecurityContext securityCont WebsocketNotificationHandler.sendRestoreOperationCompleteNotification( jobId, securityContext, response.getEntity()); } catch (Exception e) { - LOG.error("[AsyncRestore] Failed to restore {}:{}", entityType, id, e); + LOG.error( + "[AsyncRestore] Failed to restore {}:{} (name={})", + entityType, + id, + entityName, + e); WebsocketNotificationHandler.sendRestoreOperationFailedNotification( jobId, securityContext, From d0d507ce3d0ed575e647586ddde88593ad147fca Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Tue, 12 May 2026 15:04:37 -0700 Subject: [PATCH 11/38] feat(ingestion): server-side async delete + restore from legacy client MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So mark-deletion cascades during ingestion stop blocking on the server's recursive walk (issue #4003 mirror on the ingestion side). Legacy OpenMetadata client: - delete_async(entity, id, recursive, hard_delete) hits DELETE //async/{id}?recursive=...&hardDelete=... - restore_async(entity, id) hits PUT //restore?async=true Both return the 202 payload (jobId + message) so the caller can correlate the dispatch with WebSocket notifications. Sink integration: - DeleteEntity model gains a dispatch_async flag (default False). - delete_entity_from_source / delete_entity_by_name accept an explicit dispatch_async= or fall back to the OM_INGESTION_DELETE_ASYNC env var via _default_dispatch_async(). This lets operators flip the entire ingestion process to async cascades with a single env var, with no per-connector schema changes. - MetadataRestSink.delete_entity routes through metadata.delete_async when DeleteEntity.dispatch_async is set, logging the returned jobId for operator correlation. Backend tweaks: - EntityResource.restoreEntityAsync's pre-check now distinguishes the two raw EntityNotFoundException failure modes — 404 if the entity truly doesn't exist (Include.ALL still misses) vs 400 if it exists but isn't soft-deleted. - Add a Javadoc "operational ceiling" note to bulkRestoreSubtree / bulkSoftDeleteSubtree calling out the single-@Transaction shape and the implication for concurrent restores against a small DB pool; chunked-transaction support tracked as a follow-up. Tests: - New pytest cases in test_ometa_restore.py for the two async client methods (URL shape + 202 payload pass-through). Co-Authored-By: Claude Opus 4.7 --- .../src/metadata/ingestion/api/delete.py | 28 ++++++++++++- .../ingestion/models/delete_entity.py | 8 +++- .../src/metadata/ingestion/ometa/ometa_api.py | 35 +++++++++++++++++ .../metadata/ingestion/sink/metadata_rest.py | 26 ++++++++++--- ingestion/tests/unit/test_ometa_restore.py | 39 +++++++++++++++++++ .../service/jdbi3/EntityRepository.java | 14 +++++++ .../service/resources/EntityResource.java | 25 +++++++++--- 7 files changed, 162 insertions(+), 13 deletions(-) diff --git a/ingestion/src/metadata/ingestion/api/delete.py b/ingestion/src/metadata/ingestion/api/delete.py index 0653be27c9f8..d1a64c643ec9 100644 --- a/ingestion/src/metadata/ingestion/api/delete.py +++ b/ingestion/src/metadata/ingestion/api/delete.py @@ -12,6 +12,7 @@ Delete methods """ +import os import traceback from typing import Dict, Iterable, List, Optional, Type # noqa: UP035 @@ -25,6 +26,16 @@ logger = utils_logger() +# Env var that opts every connector into the server-side async delete cascade. When set, +# mark-deletion calls fire DELETE //async/{id}?recursive=true and return 202 + a +# jobId immediately, so ingestion does not block on the server-side cascade (issue #4003). +# Explicit dispatch_async= passed to the generators overrides this default. +DELETE_DISPATCH_ASYNC_ENV = "OM_INGESTION_DELETE_ASYNC" + + +def _default_dispatch_async() -> bool: + return os.getenv(DELETE_DISPATCH_ASYNC_ENV, "").lower() in {"true", "1", "yes", "on"} + def delete_entity_from_source( metadata: OpenMetadata, @@ -32,6 +43,7 @@ def delete_entity_from_source( entity_source_state, mark_deleted_entity: bool = True, params: Optional[Dict[str, str]] = None, # noqa: UP006, UP045 + dispatch_async: Optional[bool] = None, # noqa: UP045 ) -> Iterable[Either[DeleteEntity]]: """ Method to delete the entities @@ -40,7 +52,11 @@ def delete_entity_from_source( :param entity_source_state: Current state of the service :param mark_deleted_entity: Option to mark the entity as deleted or not :param params: param to fetch the entity state + :param dispatch_async: Route the sink delete through the server-side async endpoint + (returns 202 + jobId, runs cascade on the server's executor) so ingestion does + not block on large hierarchies — see issue #4003. """ + use_async = dispatch_async if dispatch_async is not None else _default_dispatch_async() try: entity_state = metadata.list_all_entities(entity=entity_type, params=params) for entity in entity_state: @@ -49,6 +65,7 @@ def delete_entity_from_source( right=DeleteEntity( entity=entity, mark_deleted_entities=mark_deleted_entity, + dispatch_async=use_async, ) ) except Exception as exc: @@ -66,6 +83,7 @@ def delete_entity_by_name( entity_type: Type[T], # noqa: UP006 entity_names: List[str], # noqa: UP006 mark_deleted_entity: bool = True, + dispatch_async: Optional[bool] = None, # noqa: UP045 ) -> Iterable[Either[DeleteEntity]]: """ Method to delete the entites contained on a given list @@ -73,12 +91,20 @@ def delete_entity_by_name( :param entity_type: Pydantic Entity model :param entity_names: List of FullyQualifiedNames of the entities to be deleted :param mark_deleted_entity: Option to mark the entity as deleted or not + :param dispatch_async: see :func:`delete_entity_from_source` """ + use_async = dispatch_async if dispatch_async is not None else _default_dispatch_async() try: for entity_name in entity_names: entity = metadata.get_by_name(entity=entity_type, fqn=entity_name) if entity: - yield Either(right=DeleteEntity(entity=entity, mark_deleted_entities=mark_deleted_entity)) + yield Either( + right=DeleteEntity( + entity=entity, + mark_deleted_entities=mark_deleted_entity, + dispatch_async=use_async, + ) + ) except Exception as exc: yield Either( left=StackTraceError( diff --git a/ingestion/src/metadata/ingestion/models/delete_entity.py b/ingestion/src/metadata/ingestion/models/delete_entity.py index f5198db06a8d..7d8dba351d59 100644 --- a/ingestion/src/metadata/ingestion/models/delete_entity.py +++ b/ingestion/src/metadata/ingestion/models/delete_entity.py @@ -20,9 +20,13 @@ class DeleteEntity(BaseModel): - """ - Entity Reference of the entity to be deleted + """Entity reference for a deletion candidate emitted by the ingestion flow. + + ``dispatch_async`` flips the sink to the server-side async delete endpoint + (``DELETE //async/{id}``) instead of the synchronous one, so ingestion + isn't blocked on the cascade for large hierarchies (issue #4003). """ entity: Entity mark_deleted_entities: Optional[bool] = False # noqa: UP045 + dispatch_async: Optional[bool] = False # noqa: UP045 diff --git a/ingestion/src/metadata/ingestion/ometa/ometa_api.py b/ingestion/src/metadata/ingestion/ometa/ometa_api.py index 423d3a2a1a58..f9a89e8ec332 100644 --- a/ingestion/src/metadata/ingestion/ometa/ometa_api.py +++ b/ingestion/src/metadata/ingestion/ometa/ometa_api.py @@ -760,6 +760,25 @@ def delete( url += f"&hardDelete={str(hard_delete).lower()}" self.client.delete(url) + def delete_async( + self, + entity: Type[T], # noqa: UP006 + entity_id: Union[str, basic.Uuid], # noqa: UP007 + recursive: bool = False, + hard_delete: bool = False, + ) -> Optional[dict]: # noqa: UP045 + """Server-side async delete. + + Issues ``DELETE //?recursive=...&hardDelete=...&async=true`` and returns + the 202 payload ``{"jobId": ..., "message": ...}``. The actual cascade runs on the + server's executor so ingestion can avoid blocking on large hierarchies. Caller is + responsible for tracking the returned ``jobId`` if it needs completion confirmation. + """ + url = f"{self.get_suffix(entity)}/async/{model_str(entity_id)}" + url += f"?recursive={str(recursive).lower()}" + url += f"&hardDelete={str(hard_delete).lower()}" + return self.client.delete(url) + def restore( self, entity: Type[T], # noqa: UP006 @@ -794,6 +813,22 @@ def restore( ) return None + def restore_async( + self, + entity: Type[T], # noqa: UP006 + entity_id: Union[str, basic.Uuid], # noqa: UP007 + ) -> Optional[dict]: # noqa: UP045 + """Server-side async restore. + + Issues ``PUT //restore?async=true`` and returns the 202 payload + ``{"jobId": ..., "message": ...}``. Use this when restoring entities with large + subtrees so ingestion doesn't block on the cascade (issue #4003). Caller is + responsible for tracking the returned ``jobId`` if it needs completion confirmation. + """ + url = f"{self.get_suffix(entity)}/restore?async=true" + data = {"id": model_str(entity_id)} + return self.client.put(url, json=data) + def compute_percentile(self, entity: Union[Type[T], str], date: str) -> None: # noqa: UP006, UP007 """ Compute an entity usage percentile diff --git a/ingestion/src/metadata/ingestion/sink/metadata_rest.py b/ingestion/src/metadata/ingestion/sink/metadata_rest.py index c4e96859bdde..9cae382aefd7 100644 --- a/ingestion/src/metadata/ingestion/sink/metadata_rest.py +++ b/ingestion/src/metadata/ingestion/sink/metadata_rest.py @@ -585,11 +585,27 @@ def write_users(self, record: OMetaUserProfile) -> Either[User]: @_run_dispatch.register def delete_entity(self, record: DeleteEntity) -> Either[Entity]: - self.metadata.delete( - entity=type(record.entity), - entity_id=record.entity.id, - recursive=record.mark_deleted_entities, - ) + if record.dispatch_async: + # Server-side async cascade — returns 202 + jobId immediately so ingestion + # doesn't block on large subtrees (issue #4003). The actual work runs on the + # server's executor; we surface the jobId in the log for operator correlation. + response = self.metadata.delete_async( + entity=type(record.entity), + entity_id=record.entity.id, + recursive=record.mark_deleted_entities, + ) + job_id = (response or {}).get("jobId") + logger.debug( + "Dispatched async delete for %s (jobId=%s)", + record.entity.fullyQualifiedName.root, + job_id, + ) + else: + self.metadata.delete( + entity=type(record.entity), + entity_id=record.entity.id, + recursive=record.mark_deleted_entities, + ) return Either(right=record) @_run_dispatch.register diff --git a/ingestion/tests/unit/test_ometa_restore.py b/ingestion/tests/unit/test_ometa_restore.py index 5b80346ab56a..d50f137c4d33 100644 --- a/ingestion/tests/unit/test_ometa_restore.py +++ b/ingestion/tests/unit/test_ometa_restore.py @@ -131,3 +131,42 @@ def test_restore_endpoint_suffix(self): suffix = metadata.get_suffix(Table) expected_restore_endpoint = f"{suffix}/restore" self.assertEqual(expected_restore_endpoint, "/tables/restore") + + def test_restore_async_dispatches_with_async_query_param(self): + """restore_async should hit /restore?async=true and return the 202 payload.""" + metadata = OpenMetadata(self.server_config) + entity_id = "b67eac63-9e43-41f5-afb9-387c85df1d8b" + mock_response = {"jobId": "job-42", "message": "Restore initiated successfully."} + + metadata.client.put = MagicMock(return_value=mock_response) + + result = metadata.restore_async(entity=Table, entity_id=entity_id) + + self.assertEqual(result, mock_response) + metadata.client.put.assert_called_once() + call_args = metadata.client.put.call_args + self.assertEqual(call_args[0][0], "/tables/restore?async=true") + self.assertEqual(call_args[1]["json"], {"id": entity_id}) + + def test_delete_async_dispatches_with_async_query_param(self): + """delete_async should hit /async/{id}?recursive=...&hardDelete=... and return the + 202 payload.""" + metadata = OpenMetadata(self.server_config) + entity_id = "b67eac63-9e43-41f5-afb9-387c85df1d8b" + mock_response = {"jobId": "job-7", "message": "Delete initiated successfully."} + + metadata.client.delete = MagicMock(return_value=mock_response) + + result = metadata.delete_async( + entity=Table, + entity_id=entity_id, + recursive=True, + hard_delete=False, + ) + + self.assertEqual(result, mock_response) + metadata.client.delete.assert_called_once() + url = metadata.client.delete.call_args[0][0] + self.assertTrue(url.startswith(f"/tables/async/{entity_id}")) + self.assertIn("recursive=true", url) + self.assertIn("hardDelete=false", url) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java index ea24c130e120..ca7f6d9563e5 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java @@ -5596,6 +5596,16 @@ protected void restoreChildren(UUID id, String updatedBy) { * via HAS) should implement the {@link #restoreAdditionalChildren(UUID, String)} hook — * the CONTAINS subtree is restored by the bulk path itself, so per-entity overrides of * {@code restoreChildren} are no longer invoked from inside the bulk walk. + * + *

Operational ceiling: the entire walk runs inside a single JDBI + * {@code @Transaction}, which holds one connection from the pool for the duration. The + * async restore endpoint ({@code ?async=true}) moves the work onto a virtual thread but + * keeps the same single-transaction shape — it just lets the client get a 202 back. For + * databases on the order of tens of thousands of descendants this still completes well + * inside the JDBI default lock-wait, but operators running concurrent restores against a + * small DB pool should size {@code maxConcurrency} on {@link + * org.openmetadata.service.util.AsyncService} accordingly. Chunked-transaction support is + * tracked as a follow-up. */ @Transaction public final void bulkRestoreSubtree(List ids, String updatedBy) { @@ -5703,6 +5713,10 @@ protected void restoreAdditionalChildren(UUID id, String updatedBy) { * hard-delete path (matches the existing per-entity {@code delete()} fallback). Subclasses * with non-CONTAINS linked entities should override * {@link #softDeleteAdditionalChildren(UUID, String)}. + * + *

Operational ceiling: see {@link #bulkRestoreSubtree(List, String)} — the same + * single-{@code @Transaction} shape applies on the delete side. Chunked-transaction + * support is tracked as a follow-up. */ @Transaction public final void bulkSoftDeleteSubtree(List ids, String updatedBy) { diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java index 629fc0c423e0..134d9295eefc 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java @@ -72,7 +72,9 @@ import org.openmetadata.service.OpenMetadataApplicationConfig; import org.openmetadata.service.cache.CacheBundle; import org.openmetadata.service.cache.CacheProvider; +import org.openmetadata.service.exception.BadRequestException; import org.openmetadata.service.exception.CatalogExceptionMessage; +import org.openmetadata.service.exception.EntityNotFoundException; import org.openmetadata.service.jdbi3.EntityRepository; import org.openmetadata.service.jdbi3.ListFilter; import org.openmetadata.service.limits.Limits; @@ -805,11 +807,24 @@ public Response restoreEntityAsync(UriInfo uriInfo, SecurityContext securityCont OperationContext operationContext = new OperationContext(entityType, MetadataOperation.EDIT_ALL); authorizer.authorize(securityContext, operationContext, getResourceContextById(id)); - // Cheap pre-check so we return 404 synchronously instead of 202 + delayed WebSocket - // FAILED for an entity that doesn't exist or isn't soft-deleted. Avoids spinning up - // an executor task for a request that can't succeed. Capturing the entity now also - // gives us a meaningful name for any later FAILED notification. - T preCheck = repository.find(id, Include.DELETED); + // Cheap pre-check so we return a synchronous error instead of 202 + delayed WebSocket + // FAILED for a request that can't succeed. Distinguish the two failure modes that the + // raw EntityNotFoundException would conflate: 404 if the entity truly doesn't exist + // (Include.ALL still finds nothing) and 400 if it exists but is already restored. + // Capturing the entity here also yields a meaningful name for any later FAILED + // notification. + T preCheck; + try { + preCheck = repository.find(id, Include.DELETED); + } catch (EntityNotFoundException notDeleted) { + try { + repository.find(id, Include.ALL); + throw new BadRequestException( + String.format("Entity %s:%s is not in deleted state", entityType, id)); + } catch (EntityNotFoundException missing) { + throw notDeleted; + } + } String entityName = preCheck.getName() != null ? preCheck.getName() : id.toString(); String jobId = UUID.randomUUID().toString(); String userName = securityContext.getUserPrincipal().getName(); From a8f258385e0cb6b8e94d862c23492029882fa315 Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Tue, 12 May 2026 15:32:03 -0700 Subject: [PATCH 12/38] fix(async): use tryAcquire so HTTP threads never block on permit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The prior fix moved semaphore.acquire() to the submitting thread to bound submission depth, but that turned the async 202 contract into a synchronous blocking call under saturation — exactly what the async restore endpoint exists to prevent, since the HTTP thread would still hit ALB / proxy idle timeouts waiting for a permit. BoundedExecutorService.execute now uses tryAcquire() and throws RejectedExecutionException immediately when permits are exhausted. The error message includes maxConcurrency and in-use counts to help operators size the executor against their actual load. restoreEntityAsync catches the rejection and returns 503 Service Unavailable with the same RestoreEntityResponse shape (jobId=null) so clients can retry with backoff. Other HTTP-facing async endpoints (export, bulk-add, delete) propagate the exception as a 500 today; mirroring the 503 handling there is tracked as a follow-up. Co-Authored-By: Claude Opus 4.7 --- .../service/resources/EntityResource.java | 76 +++++++++++-------- .../service/util/AsyncService.java | 29 ++++--- 2 files changed, 63 insertions(+), 42 deletions(-) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java index 134d9295eefc..63fa07d1eb9a 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java @@ -831,38 +831,54 @@ public Response restoreEntityAsync(UriInfo uriInfo, SecurityContext securityCont ExecutorService executorService = AsyncService.getInstance().getExecutorService(); // Intentionally don't capture uriInfo in the lambda — JAX-RS may invalidate it once the // 202 response is sent. The WebSocket notification only needs name/status, not HREFs. - executorService.submit( - RequestLatencyContext.wrapWithContext( - () -> { - try { - PutResponse response = repository.restoreEntity(userName, id); - if (response == null) { + try { + executorService.submit( + RequestLatencyContext.wrapWithContext( + () -> { + try { + PutResponse response = repository.restoreEntity(userName, id); + if (response == null) { + WebsocketNotificationHandler.sendRestoreOperationFailedNotification( + jobId, securityContext, entityName, "Entity is not in deleted state"); + return; + } + repository.restoreFromSearch(response.getEntity()); + LOG.info( + "[AsyncRestore] Restored {}:{} (jobId={})", + Entity.getEntityTypeFromObject(response.getEntity()), + response.getEntity().getId(), + jobId); + WebsocketNotificationHandler.sendRestoreOperationCompleteNotification( + jobId, securityContext, response.getEntity()); + } catch (Exception e) { + LOG.error( + "[AsyncRestore] Failed to restore {}:{} (name={})", + entityType, + id, + entityName, + e); WebsocketNotificationHandler.sendRestoreOperationFailedNotification( - jobId, securityContext, entityName, "Entity is not in deleted state"); - return; + jobId, + securityContext, + entityName, + e.getMessage() == null ? e.toString() : e.getMessage()); } - repository.restoreFromSearch(response.getEntity()); - LOG.info( - "[AsyncRestore] Restored {}:{} (jobId={})", - Entity.getEntityTypeFromObject(response.getEntity()), - response.getEntity().getId(), - jobId); - WebsocketNotificationHandler.sendRestoreOperationCompleteNotification( - jobId, securityContext, response.getEntity()); - } catch (Exception e) { - LOG.error( - "[AsyncRestore] Failed to restore {}:{} (name={})", - entityType, - id, - entityName, - e); - WebsocketNotificationHandler.sendRestoreOperationFailedNotification( - jobId, - securityContext, - entityName, - e.getMessage() == null ? e.toString() : e.getMessage()); - } - })); + })); + } catch (java.util.concurrent.RejectedExecutionException saturated) { + // AsyncService is at capacity. Return 503 so the client can retry with backoff + // rather than block the HTTP thread here waiting on a permit. + LOG.warn( + "[AsyncRestore] Rejecting restore of {}:{} — executor saturated: {}", + entityType, + id, + saturated.getMessage()); + return Response.status(Response.Status.SERVICE_UNAVAILABLE) + .entity( + new RestoreEntityResponse( + null, "Async executor saturated, retry later: " + saturated.getMessage())) + .type(MediaType.APPLICATION_JSON) + .build(); + } RestoreEntityResponse response = new RestoreEntityResponse(jobId, "Restore initiated successfully."); return Response.accepted().entity(response).type(MediaType.APPLICATION_JSON).build(); diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/util/AsyncService.java b/openmetadata-service/src/main/java/org/openmetadata/service/util/AsyncService.java index fcd26ae9ad6e..f7611fee99f6 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/util/AsyncService.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/util/AsyncService.java @@ -6,6 +6,7 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; @@ -33,7 +34,8 @@ private AsyncService() { executorService = new BoundedExecutorService( Executors.newThreadPerTaskExecutor(Thread.ofVirtual().name("om-async-", 0).factory()), - concurrencyLimiter); + concurrencyLimiter, + maxConcurrency); LOG.info("AsyncService initialized with max concurrency: {}", maxConcurrency); } @@ -243,24 +245,27 @@ private static T executeWithRetry( private static class BoundedExecutorService extends AbstractExecutorService { private final ExecutorService delegate; private final Semaphore semaphore; + private final int maxConcurrency; - BoundedExecutorService(ExecutorService delegate, Semaphore semaphore) { + BoundedExecutorService(ExecutorService delegate, Semaphore semaphore, int maxConcurrency) { this.delegate = delegate; this.semaphore = semaphore; + this.maxConcurrency = maxConcurrency; } @Override public void execute(Runnable command) { - // Acquire the permit on the SUBMITTING thread so that a caller producing tasks faster - // than the executor can run them is back-pressured here, rather than spawning an - // unbounded number of virtual threads that all sit blocked on semaphore.acquire(). - // Without this, the maxConcurrency only limits concurrent execution; submission depth - // is unbounded and pins JDBI connections under burst load. - try { - semaphore.acquire(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new RuntimeException("Interrupted waiting for concurrency permit", e); + // Use tryAcquire so HTTP threads get fast rejection rather than blocking + // indefinitely under saturation — blocking the submitting thread would defeat the + // async 202 contract that this executor exists to serve. Callers (e.g., + // restoreEntityAsync) catch RejectedExecutionException and map to 503 Service + // Unavailable so the client can retry. Internal back-pressure for trusted batch + // callers should be implemented at the caller level, not here. + if (!semaphore.tryAcquire()) { + throw new RejectedExecutionException( + String.format( + "Async concurrency limit reached (max=%d, in-use=%d). Retry later.", + maxConcurrency, maxConcurrency - semaphore.availablePermits())); } try { delegate.execute( From 33942795d7656bfd0e752a0226e3acc16e8b07d8 Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Tue, 12 May 2026 18:17:52 -0700 Subject: [PATCH 13/38] fix(restore,delete): NPE guard + universal async + SDK rejection messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address Copilot review on a8f258385e: - Sync restoreEntity NPE'd when repository.restoreEntity returned null (entity already restored or missing). Now throws BadRequestException with a clear "not in deleted state" message before dereferencing. - The single-arg restoreEntity helper now reads ?async=true off uriInfo, so SDK callers can opt into async restore for any entity type whose Resource subclass uses the helper — without each subclass declaring its own QueryParam. Preserves backward compat for the multi-arg overload that takes an explicit boolean. - Java SDK restoreServerAsync now validates the response has a non-null jobId. Endpoints that haven't been wired to honor ?async=true return 200 + entity JSON, which Jackson silently deserialized into an AsyncJobResponse with null fields. We now throw OpenMetadataException with a hint pointing at the likely root cause. - Python SDK _restore_server_async catches the AsyncJobResponse missing-jobId ValueError and re-raises with the same hint, so callers see "Server did not return an async job for /tables/restore" instead of the generic validation error. - delete_async docstring updated to match the actual URL shape (//async/{id}, not ?async=true). - Renamed the misleading EntityRepositoryRestoreTest test bulkRestoreSubtree_noDeletedEntitiesFound_isNoOp → _noEntitiesAtAll_isNoOp (the stub returned an empty findEntitiesByIds, not deleted=false entities). Added a new test exercising the "entities present, none deleted" path that asserts runRestoreAdditionalChildren still fires. Co-Authored-By: Claude Opus 4.7 --- .../src/metadata/ingestion/ometa/ometa_api.py | 3 ++- ingestion/src/metadata/sdk/entities/base.py | 13 +++++++++- .../sdk/services/EntityServiceBase.java | 25 ++++++++++++++----- .../service/resources/EntityResource.java | 14 ++++++++++- .../jdbi3/EntityRepositoryRestoreTest.java | 24 +++++++++++++++++- 5 files changed, 69 insertions(+), 10 deletions(-) diff --git a/ingestion/src/metadata/ingestion/ometa/ometa_api.py b/ingestion/src/metadata/ingestion/ometa/ometa_api.py index f9a89e8ec332..87137c14884e 100644 --- a/ingestion/src/metadata/ingestion/ometa/ometa_api.py +++ b/ingestion/src/metadata/ingestion/ometa/ometa_api.py @@ -769,7 +769,8 @@ def delete_async( ) -> Optional[dict]: # noqa: UP045 """Server-side async delete. - Issues ``DELETE //?recursive=...&hardDelete=...&async=true`` and returns + Issues ``DELETE //async/{id}?recursive=...&hardDelete=...`` (the dedicated + async-delete endpoint defined by ``EntityResource.deleteByIdAsync``) and returns the 202 payload ``{"jobId": ..., "message": ...}``. The actual cascade runs on the server's executor so ingestion can avoid blocking on large hierarchies. Caller is responsible for tracking the returned ``jobId`` if it needs completion confirmation. diff --git a/ingestion/src/metadata/sdk/entities/base.py b/ingestion/src/metadata/sdk/entities/base.py index 088000a4dc33..99ce71b2dd05 100644 --- a/ingestion/src/metadata/sdk/entities/base.py +++ b/ingestion/src/metadata/sdk/entities/base.py @@ -471,7 +471,18 @@ def _restore_server_async(cls, entity_id: UuidLike) -> "AsyncJobResponse": # no f"{endpoint}/restore?async=true", json={"id": cls._stringify_identifier(entity_id)}, ) - return AsyncJobResponse.from_response(response) + try: + return AsyncJobResponse.from_response(response) + except ValueError as missing_job_id: + # EntityResource only honors ?async=true on entity types whose Resource + # subclass has been wired to forward the query parameter. Untriggered + # endpoints respond 200 + the restored entity JSON, which fails the + # AsyncJobResponse jobId guard. Re-raise with a hint pointing the caller at + # the most likely root cause. + raise ValueError( + f"Server did not return an async job for {endpoint}/restore. " + f"The endpoint may not support ?async=true for this entity type." + ) from missing_job_id @classmethod def restore_request(cls, entity_id: UuidLike) -> "RestoreOperation[TEntity]": # noqa: UP037 diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/services/EntityServiceBase.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/services/EntityServiceBase.java index 58b77348b05e..ed3e4e3ec021 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/services/EntityServiceBase.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/services/EntityServiceBase.java @@ -457,12 +457,25 @@ public org.openmetadata.sdk.models.AsyncJobResponse restoreServerAsync(String id new org.openmetadata.schema.api.data.RestoreEntity(); restoreEntity.setId(java.util.UUID.fromString(id)); RequestOptions options = RequestOptions.builder().queryParam("async", "true").build(); - return httpClient.execute( - HttpMethod.PUT, - basePath + "/restore", - restoreEntity, - org.openmetadata.sdk.models.AsyncJobResponse.class, - options); + org.openmetadata.sdk.models.AsyncJobResponse response = + httpClient.execute( + HttpMethod.PUT, + basePath + "/restore", + restoreEntity, + org.openmetadata.sdk.models.AsyncJobResponse.class, + options); + // EntityResource only honors ?async=true on entity types that have been wired to + // forward the query param. Untriggered endpoints return 200 + the restored entity + // JSON, which Jackson silently deserializes into an AsyncJobResponse with all-null + // fields. Detect that and fail loudly so callers don't treat a sync restore as a + // dispatched async job. + if (response == null || response.getJobId() == null || response.getJobId().isEmpty()) { + throw new OpenMetadataException( + "Server did not return an async job for " + + basePath + + "/restore. The endpoint may not support ?async=true for this entity type."); + } + return response; } /** diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java index 63fa07d1eb9a..b8898efa9f33 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java @@ -774,7 +774,12 @@ public Response deleteByName( } public Response restoreEntity(UriInfo uriInfo, SecurityContext securityContext, UUID id) { - return restoreEntity(uriInfo, securityContext, id, false); + // Read ?async=true off uriInfo so subclass resources that haven't (yet) declared the + // QueryParam still honor the async contract. Lets SDK callers opt into async restore + // universally regardless of which Resource subclass forwarded the parameter. + boolean asyncFromQuery = + uriInfo != null && Boolean.parseBoolean(uriInfo.getQueryParameters().getFirst("async")); + return restoreEntity(uriInfo, securityContext, id, asyncFromQuery); } public Response restoreEntity( @@ -787,6 +792,13 @@ public Response restoreEntity( authorizer.authorize(securityContext, operationContext, getResourceContextById(id)); PutResponse response = repository.restoreEntity(securityContext.getUserPrincipal().getName(), id); + if (response == null) { + // EntityRepository.restoreEntity returns null when find(id, DELETED) throws — i.e., + // the entity doesn't exist or isn't soft-deleted. Surface as 400 so clients don't + // NPE on response.getEntity() downstream and get a useful error code. + throw new BadRequestException( + String.format("Entity %s:%s is not in deleted state", entityType, id)); + } repository.restoreFromSearch(response.getEntity()); addHref(uriInfo, response.getEntity()); LOG.info( diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java index 998112c62b01..4ea673525201 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java @@ -190,7 +190,9 @@ void bulkRestoreSubtree_emptyOrNullIds_isNoOp() { } @Test - void bulkRestoreSubtree_noDeletedEntitiesFound_isNoOp() { + void bulkRestoreSubtree_noEntitiesAtAll_isNoOp() { + // loadForBulk returns an empty list (entity doesn't exist at all): bulk path bails + // before children traversal or hook invocation. CountingPipelineRepo repo = new CountingPipelineRepo(pipelineDAO); UUID id = UUID.randomUUID(); when(pipelineDAO.findEntitiesByIds(anyList(), eq(Include.ALL))).thenReturn(List.of()); @@ -201,6 +203,26 @@ void bulkRestoreSubtree_noDeletedEntitiesFound_isNoOp() { assertEquals(0, repo.restoreAdditionalChildrenCalls); } + @Test + void bulkRestoreSubtree_entitiesPresentButNoneDeleted_stillRunsAdditionalChildrenHook() { + // loadForBulk returns entities, but none are in DELETED state. Bulk path must skip + // the deferred-store update phase but still call runRestoreAdditionalChildren — a + // re-entered cascade may have HAS-related descendants that need reconciliation. + CountingPipelineRepo repo = new CountingPipelineRepo(pipelineDAO); + UUID id = UUID.randomUUID(); + Pipeline pa = + new Pipeline().withId(id).withName("a").withFullyQualifiedName("svc.a").withDeleted(false); + when(pipelineDAO.findEntitiesByIds(anyList(), eq(Include.ALL))).thenReturn(List.of(pa)); + when(relationshipDAO.findToBatchAllTypes( + anyList(), eq(Relationship.CONTAINS.ordinal()), eq(Include.ALL))) + .thenReturn(List.of()); + + repo.bulkRestoreSubtree(List.of(id), "user"); + + assertEquals(1, repo.restoreAdditionalChildrenCalls); + assertTrue(repo.bulkRestoreInvokedWith.contains(id)); + } + @Test void bulkRestoreSubtree_usesBatchedFindToOncePerLevel() { CountingPipelineRepo repo = new CountingPipelineRepo(pipelineDAO); From 43517096549ea6213abbba496dd390e4d14430a6 Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Tue, 12 May 2026 22:14:37 -0700 Subject: [PATCH 14/38] simplify: drop semaphore-based bounding from AsyncService MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The custom BoundedExecutorService + semaphore was fighting Project Loom. Virtual threads are cheap (a few KB each) and scale to millions — gating them behind a semaphore that mirrors the JDBI connection pool size adds complexity without much value: - v1 acquired the permit inside the spawned thread → unbounded thread spawn, no real back-pressure. - v2 moved the acquire to the submitting thread → blocked HTTP threads under saturation, defeating the async 202 contract. - v3 used tryAcquire + RejectedExecutionException → required every HTTP-facing async endpoint to learn to map the rejection to 503. Step back and let the real bottleneck — the JDBI connection pool — provide back-pressure. Tasks queue on connection acquisition with the pool's own timeout. Failures surface via the existing WebSocket FAILED notification path. Removes ~70 LOC of executor plumbing plus the RejectedExecutionException special case in restoreEntityAsync, the BoundedExecutorService lifecycle tests, and the resolveMaxConcurrency config-driven sizing. If a future use case needs admission control, it should live at the caller boundary (e.g., per-user token bucket) rather than at the shared executor. Co-Authored-By: Claude Opus 4.7 --- .../service/jdbi3/EntityRepository.java | 11 +- .../service/resources/EntityResource.java | 76 ++++------ .../service/util/AsyncService.java | 136 +++--------------- .../service/util/AsyncServiceTest.java | 51 ------- 4 files changed, 52 insertions(+), 222 deletions(-) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java index ca7f6d9563e5..60d61847df77 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java @@ -5600,12 +5600,11 @@ protected void restoreChildren(UUID id, String updatedBy) { *

Operational ceiling: the entire walk runs inside a single JDBI * {@code @Transaction}, which holds one connection from the pool for the duration. The * async restore endpoint ({@code ?async=true}) moves the work onto a virtual thread but - * keeps the same single-transaction shape — it just lets the client get a 202 back. For - * databases on the order of tens of thousands of descendants this still completes well - * inside the JDBI default lock-wait, but operators running concurrent restores against a - * small DB pool should size {@code maxConcurrency} on {@link - * org.openmetadata.service.util.AsyncService} accordingly. Chunked-transaction support is - * tracked as a follow-up. + * keeps the same single-transaction shape — it just lets the client get a 202 back. + * Back-pressure under load comes from the JDBI connection pool itself: virtual threads + * are cheap, so under saturation tasks queue on connection acquisition (with the pool's + * own timeout) rather than at the executor. Chunked-transaction support is tracked as a + * follow-up if this becomes a real bottleneck. */ @Transaction public final void bulkRestoreSubtree(List ids, String updatedBy) { diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java index b8898efa9f33..e0bf6a6671f9 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java @@ -843,54 +843,38 @@ public Response restoreEntityAsync(UriInfo uriInfo, SecurityContext securityCont ExecutorService executorService = AsyncService.getInstance().getExecutorService(); // Intentionally don't capture uriInfo in the lambda — JAX-RS may invalidate it once the // 202 response is sent. The WebSocket notification only needs name/status, not HREFs. - try { - executorService.submit( - RequestLatencyContext.wrapWithContext( - () -> { - try { - PutResponse response = repository.restoreEntity(userName, id); - if (response == null) { - WebsocketNotificationHandler.sendRestoreOperationFailedNotification( - jobId, securityContext, entityName, "Entity is not in deleted state"); - return; - } - repository.restoreFromSearch(response.getEntity()); - LOG.info( - "[AsyncRestore] Restored {}:{} (jobId={})", - Entity.getEntityTypeFromObject(response.getEntity()), - response.getEntity().getId(), - jobId); - WebsocketNotificationHandler.sendRestoreOperationCompleteNotification( - jobId, securityContext, response.getEntity()); - } catch (Exception e) { - LOG.error( - "[AsyncRestore] Failed to restore {}:{} (name={})", - entityType, - id, - entityName, - e); + executorService.submit( + RequestLatencyContext.wrapWithContext( + () -> { + try { + PutResponse response = repository.restoreEntity(userName, id); + if (response == null) { WebsocketNotificationHandler.sendRestoreOperationFailedNotification( - jobId, - securityContext, - entityName, - e.getMessage() == null ? e.toString() : e.getMessage()); + jobId, securityContext, entityName, "Entity is not in deleted state"); + return; } - })); - } catch (java.util.concurrent.RejectedExecutionException saturated) { - // AsyncService is at capacity. Return 503 so the client can retry with backoff - // rather than block the HTTP thread here waiting on a permit. - LOG.warn( - "[AsyncRestore] Rejecting restore of {}:{} — executor saturated: {}", - entityType, - id, - saturated.getMessage()); - return Response.status(Response.Status.SERVICE_UNAVAILABLE) - .entity( - new RestoreEntityResponse( - null, "Async executor saturated, retry later: " + saturated.getMessage())) - .type(MediaType.APPLICATION_JSON) - .build(); - } + repository.restoreFromSearch(response.getEntity()); + LOG.info( + "[AsyncRestore] Restored {}:{} (jobId={})", + Entity.getEntityTypeFromObject(response.getEntity()), + response.getEntity().getId(), + jobId); + WebsocketNotificationHandler.sendRestoreOperationCompleteNotification( + jobId, securityContext, response.getEntity()); + } catch (Exception e) { + LOG.error( + "[AsyncRestore] Failed to restore {}:{} (name={})", + entityType, + id, + entityName, + e); + WebsocketNotificationHandler.sendRestoreOperationFailedNotification( + jobId, + securityContext, + entityName, + e.getMessage() == null ? e.toString() : e.getMessage()); + } + })); RestoreEntityResponse response = new RestoreEntityResponse(jobId, "Restore initiated successfully."); return Response.accepted().entity(response).type(MediaType.APPLICATION_JSON).build(); diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/util/AsyncService.java b/openmetadata-service/src/main/java/org/openmetadata/service/util/AsyncService.java index f7611fee99f6..b01ee4607c94 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/util/AsyncService.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/util/AsyncService.java @@ -1,70 +1,42 @@ package org.openmetadata.service.util; -import java.util.List; -import java.util.concurrent.AbstractExecutorService; import java.util.concurrent.Callable; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import java.util.concurrent.RejectedExecutionException; -import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import java.util.function.Supplier; -import lombok.Getter; import lombok.extern.slf4j.Slf4j; -import org.openmetadata.service.OpenMetadataApplicationConfigHolder; +/** + * Single virtual-thread executor for all server-side async dispatch (CSV export/import, + * bulk asset ops, async delete/restore). + * + *

Back-pressure is intentionally not enforced here. The old semaphore-based + * bounded wrapper was fighting Project Loom — virtual threads scale to millions and are + * basically free, while the real bottleneck under load is the JDBI connection pool. Letting + * tasks queue on connection acquisition (with the pool's own timeout) is both simpler and + * more accurate than guessing at "how many concurrent tasks ≈ connection pool capacity". + * + *

If a future use case genuinely needs admission control, it should live at the caller + * boundary (e.g., a token bucket per user, or a per-operation queue with rejection) rather + * than at this shared executor. + */ @Slf4j public class AsyncService { private static AsyncService instance; private final ExecutorService executorService; - private final Semaphore concurrencyLimiter; - @Getter private final int maxConcurrency; private static final int DEFAULT_MAX_RETRIES = 3; private static final long DEFAULT_INITIAL_RETRY_DELAY_MS = 1000; private static final long DEFAULT_OPERATION_TIMEOUT_SECONDS = 60; - private static final long SHUTDOWN_TIMEOUT_SECONDS = 30; private AsyncService() { - maxConcurrency = resolveMaxConcurrency(); - concurrencyLimiter = new Semaphore(maxConcurrency); executorService = - new BoundedExecutorService( - Executors.newThreadPerTaskExecutor(Thread.ofVirtual().name("om-async-", 0).factory()), - concurrencyLimiter, - maxConcurrency); - LOG.info("AsyncService initialized with max concurrency: {}", maxConcurrency); - } - - private static int resolveMaxConcurrency() { - String env = System.getenv("ASYNC_SERVICE_MAX_CONCURRENCY"); - if (env != null) { - try { - int value = Integer.parseInt(env.trim()); - if (value > 0) { - return value; - } - } catch (NumberFormatException ignored) { - } - } - int cpuBudget = Runtime.getRuntime().availableProcessors() * 2; - try { - if (OpenMetadataApplicationConfigHolder.isInitialized()) { - int poolSize = - OpenMetadataApplicationConfigHolder.getInstance().getDataSourceFactory().getMaxSize(); - if (poolSize > 0) { - return Math.max(4, Math.min(cpuBudget, poolSize / 3)); - } - } - } catch (Exception e) { - LOG.warn( - "Could not determine database pool size, using CPU-based concurrency budget: {}", - e.getMessage()); - } - return Math.max(4, cpuBudget); + Executors.newThreadPerTaskExecutor(Thread.ofVirtual().name("om-async-", 0).factory()); + LOG.info("AsyncService initialized (virtual-thread-per-task executor)"); } public static synchronized AsyncService getInstance() { @@ -97,7 +69,7 @@ public CompletableFuture submit(Callable task) { } public void shutdown() { - LOG.info("Shutting down AsyncService executor (max concurrency: {})", maxConcurrency); + LOG.info("Shutting down AsyncService executor"); executorService.shutdown(); try { if (!executorService.awaitTermination(SHUTDOWN_TIMEOUT_SECONDS, TimeUnit.SECONDS)) { @@ -235,78 +207,4 @@ private static T executeWithRetry( throw new RuntimeException( String.format("Failed to %s %s", operationName.toLowerCase(), context), lastException); } - - /** - * ExecutorService wrapper that enforces concurrency limits via a semaphore. Every task submitted - * through any method (execute, submit, invokeAll, invokeAny) acquires a permit before running and - * releases it on completion. This ensures ALL callers — including those using getExecutorService() - * directly — are bounded. - */ - private static class BoundedExecutorService extends AbstractExecutorService { - private final ExecutorService delegate; - private final Semaphore semaphore; - private final int maxConcurrency; - - BoundedExecutorService(ExecutorService delegate, Semaphore semaphore, int maxConcurrency) { - this.delegate = delegate; - this.semaphore = semaphore; - this.maxConcurrency = maxConcurrency; - } - - @Override - public void execute(Runnable command) { - // Use tryAcquire so HTTP threads get fast rejection rather than blocking - // indefinitely under saturation — blocking the submitting thread would defeat the - // async 202 contract that this executor exists to serve. Callers (e.g., - // restoreEntityAsync) catch RejectedExecutionException and map to 503 Service - // Unavailable so the client can retry. Internal back-pressure for trusted batch - // callers should be implemented at the caller level, not here. - if (!semaphore.tryAcquire()) { - throw new RejectedExecutionException( - String.format( - "Async concurrency limit reached (max=%d, in-use=%d). Retry later.", - maxConcurrency, maxConcurrency - semaphore.availablePermits())); - } - try { - delegate.execute( - () -> { - try { - command.run(); - } finally { - semaphore.release(); - } - }); - } catch (RuntimeException submitFailure) { - // delegate.execute itself failed (e.g., rejected) — release the permit we just - // acquired so the next caller can make progress. - semaphore.release(); - throw submitFailure; - } - } - - @Override - public void shutdown() { - delegate.shutdown(); - } - - @Override - public List shutdownNow() { - return delegate.shutdownNow(); - } - - @Override - public boolean isShutdown() { - return delegate.isShutdown(); - } - - @Override - public boolean isTerminated() { - return delegate.isTerminated(); - } - - @Override - public boolean awaitTermination(long timeout, TimeUnit unit) throws InterruptedException { - return delegate.awaitTermination(timeout, unit); - } - } } diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/util/AsyncServiceTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/util/AsyncServiceTest.java index 77913e958f11..79c930902c71 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/util/AsyncServiceTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/util/AsyncServiceTest.java @@ -17,7 +17,6 @@ import java.util.concurrent.CompletionException; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; -import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Supplier; @@ -25,7 +24,6 @@ import org.junit.jupiter.api.Test; import org.openmetadata.service.OpenMetadataApplicationConfig; import org.openmetadata.service.OpenMetadataApplicationConfigHolder; -import org.openmetadata.service.jdbi3.HikariCPDataSourceFactory; class AsyncServiceTest { @@ -186,7 +184,6 @@ void testGetInstanceReturnsSingleton() throws Exception { AsyncService second = AsyncService.getInstance(); assertSame(first, second); - assertTrue(first.getMaxConcurrency() >= 4); } @Test @@ -314,44 +311,6 @@ void testExecuteWithRetryInterruptedSleepRestoresInterruptFlag() throws Exceptio Thread.interrupted(); } - @Test - void testResolveMaxConcurrencyUsesConfigBudgetAndCpuFallback() throws Exception { - Method method = AsyncService.class.getDeclaredMethod("resolveMaxConcurrency"); - method.setAccessible(true); - - int cpuBudget = Runtime.getRuntime().availableProcessors() * 2; - setConfigHolderInstance(null); - assertEquals(Integer.valueOf(Math.max(4, cpuBudget)), invoke(method, null)); - - OpenMetadataApplicationConfig config = mock(OpenMetadataApplicationConfig.class); - HikariCPDataSourceFactory dataSourceFactory = mock(HikariCPDataSourceFactory.class); - when(config.getDataSourceFactory()).thenReturn(dataSourceFactory); - when(dataSourceFactory.getMaxSize()).thenReturn(30); - setConfigHolderInstance(config); - - assertEquals(Integer.valueOf(Math.max(4, Math.min(cpuBudget, 10))), invoke(method, null)); - } - - @Test - void testBoundedExecutorLifecycleDelegatesState() throws Exception { - ExecutorService delegate = mock(ExecutorService.class); - when(delegate.isShutdown()).thenReturn(true); - when(delegate.isTerminated()).thenReturn(true); - when(delegate.awaitTermination(5, TimeUnit.SECONDS)).thenReturn(true); - - ExecutorService boundedExecutor = newBoundedExecutorService(delegate); - - assertTrue(boundedExecutor.isShutdown()); - assertTrue(boundedExecutor.isTerminated()); - assertTrue(boundedExecutor.awaitTermination(5, TimeUnit.SECONDS)); - - boundedExecutor.shutdown(); - boundedExecutor.shutdownNow(); - - verify(delegate).shutdown(); - verify(delegate).shutdownNow(); - } - @Test void testShutdownForcesExecutorOnTimeoutAndInterrupt() throws Exception { AsyncService timeoutService = newAsyncService(); @@ -384,16 +343,6 @@ private static AsyncService newAsyncService() throws Exception { return constructor.newInstance(); } - private static ExecutorService newBoundedExecutorService(ExecutorService delegate) - throws Exception { - Class boundedClass = - Class.forName("org.openmetadata.service.util.AsyncService$BoundedExecutorService"); - Constructor constructor = - boundedClass.getDeclaredConstructor(ExecutorService.class, Semaphore.class); - constructor.setAccessible(true); - return (ExecutorService) constructor.newInstance(delegate, new Semaphore(1)); - } - private static void replaceExecutor(AsyncService service, ExecutorService executor) throws Exception { ExecutorService originalExecutor = service.getExecutorService(); From 09250ff35f063ca644b1abaab48eedd303fcf63c Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Tue, 12 May 2026 22:32:34 -0700 Subject: [PATCH 15/38] review: address remaining Copilot threads on async restore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Resolve SecurityContext request-scope leak in restoreEntityAsync. The async lambda used to call WebsocketNotificationHandler with the captured SecurityContext, which JAX-RS may invalidate once the 202 response is sent — meaning getUserIdFromSecurityContext could see a stale principal at notification time. Resolve the user id on the request thread before submitting, then pass the UUID into the lambda. Add WebsocketNotificationHandler.resolveUserId(SecurityContext) and switch sendRestoreOperation{Complete,Failed}Notification to UUID overloads. Existing securityContext-shaped callers were only inside the same async lambda, so no other call sites need changes. - Distinguish 404 vs 400 in sync restoreEntity the same way the async pre-check does. When repository.restoreEntity returns null, probe with Include.ALL: if the entity truly doesn't exist surface as 404, otherwise 400 "not in deleted state". - Tighten bulkRestoreSubtree_emptyOrNullIds_isNoOp to also assert no Include.ALL load runs (post-simplification load shape). - SDK stale comments in EntityServiceBase.restoreServerAsync and base.py _restore_server_async no longer claim "endpoint may not support ?async=true for this entity type" — universal support landed via the uriInfo query-param read. Comments now describe the guard as defensive against older servers. Co-Authored-By: Claude Opus 4.7 --- ingestion/src/metadata/sdk/entities/base.py | 11 ++++--- .../sdk/services/EntityServiceBase.java | 11 ++++--- .../service/resources/EntityResource.java | 30 ++++++++++++------- .../util/WebsocketNotificationHandler.java | 16 +++++++--- .../jdbi3/EntityRepositoryRestoreTest.java | 3 ++ 5 files changed, 45 insertions(+), 26 deletions(-) diff --git a/ingestion/src/metadata/sdk/entities/base.py b/ingestion/src/metadata/sdk/entities/base.py index 99ce71b2dd05..f1781d91ad2a 100644 --- a/ingestion/src/metadata/sdk/entities/base.py +++ b/ingestion/src/metadata/sdk/entities/base.py @@ -474,14 +474,13 @@ def _restore_server_async(cls, entity_id: UuidLike) -> "AsyncJobResponse": # no try: return AsyncJobResponse.from_response(response) except ValueError as missing_job_id: - # EntityResource only honors ?async=true on entity types whose Resource - # subclass has been wired to forward the query parameter. Untriggered - # endpoints respond 200 + the restored entity JSON, which fails the - # AsyncJobResponse jobId guard. Re-raise with a hint pointing the caller at - # the most likely root cause. + # Defensive guard for older servers that don't honor ?async=true (or any + # future case where the resource short-circuits with a 200 + entity payload). + # Without this, the generic AsyncJobResponse jobId-missing error would be + # confusing. raise ValueError( f"Server did not return an async job for {endpoint}/restore. " - f"The endpoint may not support ?async=true for this entity type." + f"The server may be older than the async-restore release." ) from missing_job_id @classmethod diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/services/EntityServiceBase.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/services/EntityServiceBase.java index ed3e4e3ec021..1a7a2be3b352 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/services/EntityServiceBase.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/services/EntityServiceBase.java @@ -464,16 +464,15 @@ public org.openmetadata.sdk.models.AsyncJobResponse restoreServerAsync(String id restoreEntity, org.openmetadata.sdk.models.AsyncJobResponse.class, options); - // EntityResource only honors ?async=true on entity types that have been wired to - // forward the query param. Untriggered endpoints return 200 + the restored entity - // JSON, which Jackson silently deserializes into an AsyncJobResponse with all-null - // fields. Detect that and fail loudly so callers don't treat a sync restore as a - // dispatched async job. + // Defensive check for older servers that don't honor ?async=true (or for any future + // case where the resource short-circuits with a 200 + entity payload). Jackson would + // otherwise silently deserialize the entity JSON into an AsyncJobResponse with all + // null fields and callers would treat a sync restore as a dispatched async job. if (response == null || response.getJobId() == null || response.getJobId().isEmpty()) { throw new OpenMetadataException( "Server did not return an async job for " + basePath - + "/restore. The endpoint may not support ?async=true for this entity type."); + + "/restore. The server may be older than the async-restore release."); } return response; } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java index e0bf6a6671f9..6b6004637a27 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java @@ -793,11 +793,17 @@ public Response restoreEntity( PutResponse response = repository.restoreEntity(securityContext.getUserPrincipal().getName(), id); if (response == null) { - // EntityRepository.restoreEntity returns null when find(id, DELETED) throws — i.e., - // the entity doesn't exist or isn't soft-deleted. Surface as 400 so clients don't - // NPE on response.getEntity() downstream and get a useful error code. - throw new BadRequestException( - String.format("Entity %s:%s is not in deleted state", entityType, id)); + // EntityRepository.restoreEntity returns null when find(id, DELETED) throws — + // either the entity doesn't exist at all (→ 404) or it exists but isn't deleted + // (→ 400). Probe with Include.ALL to tell them apart so the client gets the right + // status code instead of a generic 400. + try { + repository.find(id, Include.ALL); + throw new BadRequestException( + String.format("Entity %s:%s is not in deleted state", entityType, id)); + } catch (EntityNotFoundException missing) { + throw missing; + } } repository.restoreFromSearch(response.getEntity()); addHref(uriInfo, response.getEntity()); @@ -840,9 +846,13 @@ public Response restoreEntityAsync(UriInfo uriInfo, SecurityContext securityCont String entityName = preCheck.getName() != null ? preCheck.getName() : id.toString(); String jobId = UUID.randomUUID().toString(); String userName = securityContext.getUserPrincipal().getName(); + // Resolve the WebSocket user id on the request thread, while the SecurityContext is + // still valid. JAX-RS may invalidate request-scoped state once the 202 response is + // returned, so we cannot rely on securityContext.getUserPrincipal() inside the lambda. + UUID notifyUserId = WebsocketNotificationHandler.resolveUserId(securityContext); ExecutorService executorService = AsyncService.getInstance().getExecutorService(); - // Intentionally don't capture uriInfo in the lambda — JAX-RS may invalidate it once the - // 202 response is sent. The WebSocket notification only needs name/status, not HREFs. + // Intentionally don't capture uriInfo in the lambda — same request-scope concern. The + // WebSocket notification only needs name/status, not HREFs. executorService.submit( RequestLatencyContext.wrapWithContext( () -> { @@ -850,7 +860,7 @@ public Response restoreEntityAsync(UriInfo uriInfo, SecurityContext securityCont PutResponse response = repository.restoreEntity(userName, id); if (response == null) { WebsocketNotificationHandler.sendRestoreOperationFailedNotification( - jobId, securityContext, entityName, "Entity is not in deleted state"); + jobId, notifyUserId, entityName, "Entity is not in deleted state"); return; } repository.restoreFromSearch(response.getEntity()); @@ -860,7 +870,7 @@ public Response restoreEntityAsync(UriInfo uriInfo, SecurityContext securityCont response.getEntity().getId(), jobId); WebsocketNotificationHandler.sendRestoreOperationCompleteNotification( - jobId, securityContext, response.getEntity()); + jobId, notifyUserId, response.getEntity()); } catch (Exception e) { LOG.error( "[AsyncRestore] Failed to restore {}:{} (name={})", @@ -870,7 +880,7 @@ public Response restoreEntityAsync(UriInfo uriInfo, SecurityContext securityCont e); WebsocketNotificationHandler.sendRestoreOperationFailedNotification( jobId, - securityContext, + notifyUserId, entityName, e.getMessage() == null ? e.toString() : e.getMessage()); } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/util/WebsocketNotificationHandler.java b/openmetadata-service/src/main/java/org/openmetadata/service/util/WebsocketNotificationHandler.java index 554b87aadbec..1bed4a3cbaa2 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/util/WebsocketNotificationHandler.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/util/WebsocketNotificationHandler.java @@ -382,11 +382,10 @@ public static void sendDeleteOperationFailedNotification( } public static void sendRestoreOperationCompleteNotification( - String jobId, SecurityContext securityContext, EntityInterface entity) { + String jobId, UUID userId, EntityInterface entity) { RestoreEntityMessage message = new RestoreEntityMessage(jobId, "COMPLETED", entity.getName(), null); String jsonMessage = JsonUtils.pojoToJson(message); - UUID userId = getUserIdFromSecurityContext(securityContext); LOG.info( "[AsyncRestore] Restore operation completed - jobId: {}, userId: {}, entity: {}", jobId, @@ -399,10 +398,9 @@ public static void sendRestoreOperationCompleteNotification( } public static void sendRestoreOperationFailedNotification( - String jobId, SecurityContext securityContext, String entityName, String error) { + String jobId, UUID userId, String entityName, String error) { RestoreEntityMessage message = new RestoreEntityMessage(jobId, "FAILED", entityName, error); String jsonMessage = JsonUtils.pojoToJson(message); - UUID userId = getUserIdFromSecurityContext(securityContext); LOG.error( "[AsyncRestore] Restore operation failed - jobId: {}, userId: {}, entity: {}, error: {}", jobId, @@ -415,6 +413,16 @@ public static void sendRestoreOperationFailedNotification( } } + /** + * Resolve the WebSocket user id for the given security context. Call this on the + * request thread (i.e., before submitting an async task) so the lookup runs while the + * SecurityContext is still valid — JAX-RS may invalidate request-scoped state after the + * response returns. + */ + public static UUID resolveUserId(SecurityContext securityContext) { + return getUserIdFromSecurityContext(securityContext); + } + public static void sendMoveOperationCompleteNotification( String jobId, SecurityContext securityContext, EntityInterface entity) { MoveGlossaryTermMessage message = diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java index 4ea673525201..449442d793db 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java @@ -184,8 +184,11 @@ void bulkRestoreSubtree_emptyOrNullIds_isNoOp() { repo.bulkRestoreSubtree(null, "user"); repo.bulkRestoreSubtree(List.of(), "user"); + // bulkRestoreSubtree loads with Include.ALL — guard that neither the DELETED nor ALL + // shape is invoked when the input list is empty/null. verify(pipelineDAO, never()) .findEntitiesByIds(anyList(), eq(org.openmetadata.schema.type.Include.DELETED)); + verify(pipelineDAO, never()).findEntitiesByIds(anyList(), eq(Include.ALL)); assertEquals(0, repo.restoreAdditionalChildrenCalls); } From 4b59bb72dff866b440eaf05b2103889d26674bae Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Wed, 13 May 2026 09:44:38 -0700 Subject: [PATCH 16/38] review: address remaining Copilot threads on async restore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous shape — find() and BadRequestException both inside one try block, with a narrow catch on EntityNotFoundException — was technically correct (BadRequestException isn't a subtype of EntityNotFoundException and propagated naturally), but the control flow was easy to misread: the catch looked like it was wrapping the throw of the new exception. Restructure both call sites (sync restoreEntity, async pre-check) with a boolean flag so the intent is unambiguous: boolean entityExists; try { repository.find(id, Include.ALL); entityExists = true; } catch (EntityNotFoundException missing) { entityExists = false; } if (entityExists) throw BadRequestException(...); throw EntityNotFoundException(...); Same observable behavior: 404 when truly missing, 400 when present- but-not-deleted, any other exception (DB connectivity, auth, etc.) propagates as 500 instead of being mis-mapped to 400. Co-Authored-By: Claude Opus 4.7 --- .../service/resources/EntityResource.java | 26 ++++++++++++++----- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java index 6b6004637a27..f238c884186d 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java @@ -795,15 +795,21 @@ public Response restoreEntity( if (response == null) { // EntityRepository.restoreEntity returns null when find(id, DELETED) throws — // either the entity doesn't exist at all (→ 404) or it exists but isn't deleted - // (→ 400). Probe with Include.ALL to tell them apart so the client gets the right - // status code instead of a generic 400. + // (→ 400). Probe with Include.ALL to tell them apart. The try block deliberately + // ONLY traps EntityNotFoundException so unrelated failures (DB connectivity, auth, + // etc.) propagate as 500 rather than being mis-mapped to 400. + boolean entityExists; try { repository.find(id, Include.ALL); + entityExists = true; + } catch (EntityNotFoundException missing) { + entityExists = false; + } + if (entityExists) { throw new BadRequestException( String.format("Entity %s:%s is not in deleted state", entityType, id)); - } catch (EntityNotFoundException missing) { - throw missing; } + throw new EntityNotFoundException(CatalogExceptionMessage.entityNotFound(entityType, id)); } repository.restoreFromSearch(response.getEntity()); addHref(uriInfo, response.getEntity()); @@ -835,13 +841,21 @@ public Response restoreEntityAsync(UriInfo uriInfo, SecurityContext securityCont try { preCheck = repository.find(id, Include.DELETED); } catch (EntityNotFoundException notDeleted) { + // Probe with Include.ALL to distinguish 404-missing from 400-not-deleted. Narrow + // catch so unrelated failures (DB connectivity, auth) propagate naturally rather + // than being mis-mapped to 400 "not in deleted state". + boolean entityExists; try { repository.find(id, Include.ALL); + entityExists = true; + } catch (EntityNotFoundException missing) { + entityExists = false; + } + if (entityExists) { throw new BadRequestException( String.format("Entity %s:%s is not in deleted state", entityType, id)); - } catch (EntityNotFoundException missing) { - throw notDeleted; } + throw notDeleted; } String entityName = preCheck.getName() != null ? preCheck.getName() : id.toString(); String jobId = UUID.randomUUID().toString(); From c9841b15375361ddaa57469ff0c35d2e82710571 Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Thu, 14 May 2026 08:33:50 -0700 Subject: [PATCH 17/38] perf(delete): bulk hard-delete cascade across all entity hierarchies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the per-entity cleanup() loop in processDeletionBatch / batchDeleteChildren — which opened an independent JDBI transaction per descendant and ran ~10 SQL statements each — with bulkHardDeleteSubtree on the base EntityRepository. One @Transaction wraps the whole subtree, relationships + entity rows are batched per-type, and the walk descends through both CONTAINS and PARENT_OF so every hierarchy (DatabaseService, DashboardService, Glossary, Team, Storage Container, etc.) gets the same path automatically. Same one-line relation-set change back-fills the latent CONTAINS-only gap in bulkSoftDeleteSubtree and bulkRestoreSubtree. DashboardRepository's chart HAS-cascade moves into the new hardDeleteAdditionalChildren hook, mirroring the soft-delete/restore hook pair. Adds bulk DAO primitives (findToBatchAllTypes multi-relation overload, EntityDAO.deleteByIds, FeedRepository.deleteByAbout(List)). Verified end-to-end against Testcontainers: RestoreHierarchyIT 5/5 (including new hardDelete_databaseService_* and hardDelete_glossary_* which assert zero orphan entity_relationship rows after cascade) and the existing 101-child ContainerResourceIT regression test both pass. Co-Authored-By: Claude Opus 4.7 --- .../it/tests/ContainerResourceIT.java | 17 +- .../it/tests/RestoreHierarchyIT.java | 120 ++++++- .../service/jdbi3/CollectionDAO.java | 23 ++ .../service/jdbi3/DashboardRepository.java | 15 +- .../openmetadata/service/jdbi3/EntityDAO.java | 11 + .../service/jdbi3/EntityRepository.java | 320 +++++++++--------- .../service/jdbi3/FeedRepository.java | 9 + .../jdbi3/EntityRepositoryRestoreTest.java | 173 ++++++++-- 8 files changed, 486 insertions(+), 202 deletions(-) diff --git a/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/ContainerResourceIT.java b/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/ContainerResourceIT.java index e9f106f43099..ab5575bb46eb 100644 --- a/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/ContainerResourceIT.java +++ b/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/ContainerResourceIT.java @@ -1365,14 +1365,15 @@ void test_rootListingExcludesOrphanedChild(TestNamespace ns) { } /** - * Forces the {@code batchDeleteChildren} / {@code processDeletionBatch} path: - * {@code deleteChildren} only takes the batch path when {@code hardDelete=true} AND - * {@code children.size() > 100}. Previously that path pre-deleted relationships in - * two batched queries before iterating {@code cleanup()} per child, and swallowed any - * per-child exception in the loop — so a single failed cleanup left an entity row - * alive with all its relationship rows already wiped (orphan with multi-segment FQN). - * The fix routes everything through {@code cleanup()} per entity and lets exceptions - * propagate. 101 is one above the 100-child threshold that gates the batch path. + * Exercises the {@code bulkHardDeleteSubtree} path that replaced the legacy + * {@code batchDeleteChildren} / {@code processDeletionBatch} flow. The legacy path opened + * an independent JDBI transaction per child via {@code cleanup()} and could leave an + * entity row alive with its relationship rows wiped (orphan with multi-segment FQN) when + * a per-child cleanup failed mid-loop. The replacement runs the entire subtree in a + * single {@code @Transaction} that rolls back atomically on any failure. 101 is one above + * the size that the legacy implementation gated its batch path on — keeping the test + * value pins the regression scenario in place even though the gating threshold no longer + * exists in the code. */ @Test void test_recursiveHardDelete_largeBatch_leavesNoOrphans(TestNamespace ns) { diff --git a/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/RestoreHierarchyIT.java b/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/RestoreHierarchyIT.java index 5490f3559c7c..94f7e9904796 100644 --- a/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/RestoreHierarchyIT.java +++ b/openmetadata-integration-tests/src/test/java/org/openmetadata/it/tests/RestoreHierarchyIT.java @@ -15,9 +15,11 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import java.time.Duration; +import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -28,21 +30,28 @@ import org.openmetadata.it.factories.DatabaseSchemaTestFactory; import org.openmetadata.it.factories.DatabaseServiceTestFactory; import org.openmetadata.it.factories.DatabaseTestFactory; +import org.openmetadata.it.factories.GlossaryTermTestFactory; +import org.openmetadata.it.factories.GlossaryTestFactory; import org.openmetadata.it.factories.TableTestFactory; import org.openmetadata.it.util.SdkClients; import org.openmetadata.it.util.TestNamespace; import org.openmetadata.it.util.TestNamespaceExtension; import org.openmetadata.schema.entity.data.Database; import org.openmetadata.schema.entity.data.DatabaseSchema; +import org.openmetadata.schema.entity.data.Glossary; +import org.openmetadata.schema.entity.data.GlossaryTerm; import org.openmetadata.schema.entity.data.Table; import org.openmetadata.schema.entity.services.DatabaseService; import org.openmetadata.schema.type.Include; import org.openmetadata.sdk.client.OpenMetadataClient; import org.openmetadata.sdk.fluent.Databases; import org.openmetadata.sdk.models.AsyncJobResponse; +import org.openmetadata.service.Entity; +import org.openmetadata.service.jdbi3.CollectionDAO; /** - * End-to-end tests for the bulk + async restore introduced for issue #4003. + * End-to-end tests for the bulk + async restore + bulk hard-delete paths introduced for + * issue #4003 and #4004. * *

Builds a small Database → DatabaseSchemas → Tables hierarchy, soft-deletes the database * (which cascades), then verifies that: @@ -51,6 +60,11 @@ *

  • The synchronous bulk restore path restores the entire subtree in a single PUT call. *
  • The async restore path returns 202 with a job id and produces the same final state once * the background work completes. + *
  • The recursive hard-delete on a CONTAINS-shaped service hierarchy wipes every row and + * every entity_relationship reference in one bulk transaction per type. + *
  • The recursive hard-delete on a Glossary → GlossaryTerm hierarchy descends via the + * PARENT_OF relation — confirming the bulk path's relation set covers more than just + * CONTAINS. * */ @ExtendWith(TestNamespaceExtension.class) @@ -128,6 +142,110 @@ void asyncRestore_returns202AndRestoresFullHierarchy(TestNamespace ns) { assertHierarchyRestored(h); } + @Test + void hardDelete_databaseService_cascadesEntireSubtreeAndLeavesNoOrphanRelationships( + TestNamespace ns) { + Hierarchy h = createHierarchy(ns, "harddel"); + + Map params = new HashMap<>(); + params.put("recursive", "true"); + params.put("hardDelete", "true"); + SdkClients.adminClient().databaseServices().delete(h.service.getId().toString(), params); + + OpenMetadataClient client = SdkClients.adminClient(); + assertThrows( + Exception.class, + () -> client.databaseServices().get(h.service.getId().toString()), + "database service must be hard-deleted"); + assertThrows( + Exception.class, + () -> client.databases().get(h.database.getId().toString()), + "database must be hard-deleted"); + for (DatabaseSchema schema : h.schemas) { + assertThrows( + Exception.class, + () -> client.databaseSchemas().get(schema.getId().toString()), + "schema must be hard-deleted: " + schema.getName()); + } + for (Table table : h.tables) { + assertThrows( + Exception.class, + () -> client.tables().get(table.getId().toString()), + "table must be hard-deleted: " + table.getName()); + } + + List allDeletedIds = new ArrayList<>(); + allDeletedIds.add(h.service.getId().toString()); + allDeletedIds.add(h.database.getId().toString()); + for (DatabaseSchema schema : h.schemas) { + allDeletedIds.add(schema.getId().toString()); + } + for (Table table : h.tables) { + allDeletedIds.add(table.getId().toString()); + } + assertNoOrphanRelationships(allDeletedIds); + } + + @Test + void hardDelete_glossary_cascadesRecursiveTermsViaParentOf(TestNamespace ns) { + Glossary glossary = GlossaryTestFactory.createWithName(ns, "harddel_glossary"); + GlossaryTerm parent = GlossaryTermTestFactory.createWithName(ns, glossary, "parent_term"); + GlossaryTerm child = GlossaryTermTestFactory.createChild(ns, glossary, parent, "child_term"); + GlossaryTerm grandchild = + GlossaryTermTestFactory.createChild(ns, glossary, child, "grandchild_term"); + + Map params = new HashMap<>(); + params.put("recursive", "true"); + params.put("hardDelete", "true"); + SdkClients.adminClient().glossaries().delete(glossary.getId().toString(), params); + + OpenMetadataClient client = SdkClients.adminClient(); + assertThrows( + Exception.class, + () -> client.glossaries().get(glossary.getId().toString()), + "glossary must be hard-deleted"); + for (GlossaryTerm term : List.of(parent, child, grandchild)) { + assertThrows( + Exception.class, + () -> client.glossaryTerms().get(term.getId().toString()), + "glossary term must be hard-deleted via PARENT_OF cascade: " + term.getName()); + } + + assertNoOrphanRelationships( + List.of( + glossary.getId().toString(), + parent.getId().toString(), + child.getId().toString(), + grandchild.getId().toString())); + } + + private void assertNoOrphanRelationships(List deletedIds) { + CollectionDAO.EntityRelationshipDAO relationshipDAO = + Entity.getCollectionDAO().relationshipDAO(); + List hierarchyRelations = + List.of( + org.openmetadata.schema.type.Relationship.CONTAINS.ordinal(), + org.openmetadata.schema.type.Relationship.PARENT_OF.ordinal(), + org.openmetadata.schema.type.Relationship.HAS.ordinal()); + List outgoing = + relationshipDAO.findToBatchAllTypes(deletedIds, hierarchyRelations, Include.ALL); + assertTrue( + outgoing == null || outgoing.isEmpty(), + "No outgoing entity_relationship rows must reference deleted ids — found " + + (outgoing == null ? 0 : outgoing.size())); + for (Integer relation : hierarchyRelations) { + List incoming = + relationshipDAO.findFromBatch(deletedIds, relation, Include.ALL); + assertTrue( + incoming == null || incoming.isEmpty(), + "No incoming entity_relationship rows must reference deleted ids " + + "(relation=" + + relation + + ") — found " + + (incoming == null ? 0 : incoming.size())); + } + } + private static class Hierarchy { DatabaseService service; Database database; diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/CollectionDAO.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/CollectionDAO.java index 56f1a08e7422..2fd41b931698 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/CollectionDAO.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/CollectionDAO.java @@ -2048,6 +2048,29 @@ default List findToBatchAllTypes( return findToBatchAllTypesWithCondition(fromIds, relation, condition); } + @SqlQuery( + "SELECT fromId, toId, fromEntity, toEntity, relation, json, jsonSchema " + + "FROM entity_relationship " + + "WHERE fromId IN () " + + "AND relation IN () " + + "") + @UseRowMapper(RelationshipObjectMapper.class) + List findToBatchAllTypesWithRelationsCondition( + @BindList("fromIds") List fromIds, + @BindList("relations") List relations, + @Define("cond") String condition); + + default List findToBatchAllTypes( + List fromIds, List relations, Include include) { + String condition = ""; + if (include == null || include == Include.NON_DELETED) { + condition = "AND deleted = FALSE"; + } else if (include == Include.DELETED) { + condition = "AND deleted = TRUE"; + } + return findToBatchAllTypesWithRelationsCondition(fromIds, relations, condition); + } + @SqlQuery( "SELECT fromId, toId, fromEntity, toEntity, relation, json, jsonSchema " + "FROM entity_relationship " diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/DashboardRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/DashboardRepository.java index 989d4ab80a35..371f155a3296 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/DashboardRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/DashboardRepository.java @@ -209,18 +209,13 @@ public void clearFields(Dashboard dashboard, Fields fields) { fields.contains("usageSummary") ? dashboard.getUsageSummary() : null); } - // Hard-delete cascade for chart HAS-links. The bulk path doesn't apply to hard delete - // (cleanup() removes rows directly), so this override re-creates the previous chart - // hard-delete behavior. Soft delete is handled by softDeleteAdditionalChildren so that - // it also runs when a dashboard is a descendant of a larger soft-delete cascade. + // Hard-delete chart links (HAS relation). The CONTAINS subtree is handled by the bulk + // path in EntityRepository.bulkHardDeleteSubtree; chart handling is a per-dashboard concern + // and lives in the per-entity extension hook so it runs both for direct dashboard deletes + // and when dashboards are descendants of a larger hard-delete cascade. @Transaction @Override - protected void deleteChildren( - UUID dashboardId, boolean recursive, boolean hardDelete, String updatedBy) { - super.deleteChildren(dashboardId, recursive, hardDelete, updatedBy); - if (!hardDelete) { - return; - } + protected void hardDeleteAdditionalChildren(UUID dashboardId, String updatedBy) { cascadeChartCleanup(dashboardId, updatedBy, true); } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityDAO.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityDAO.java index 6de4d7cd3c86..5b57f5682bdb 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityDAO.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityDAO.java @@ -608,6 +608,17 @@ boolean existsByName( @SqlUpdate("DELETE FROM
  • WHERE id = :id") int delete(@Define("table") String table, @BindUUID("id") UUID id); + @SqlUpdate("DELETE FROM
    WHERE id IN ()") + int deleteByIds(@Define("table") String table, @BindList("ids") List ids); + + default int deleteByIds(List ids) { + if (ids == null || ids.isEmpty()) { + return 0; + } + List stringIds = ids.stream().map(UUID::toString).toList(); + return deleteByIds(getTableName(), stringIds); + } + @ConnectionAwareSqlUpdate(value = "ANALYZE TABLE
    ", connectionType = MYSQL) @ConnectionAwareSqlUpdate(value = "ANALYZE
    ", connectionType = POSTGRES) void analyze(@Define("table") String table); diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java index 05e722c41547..f9f328885261 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java @@ -4295,167 +4295,22 @@ protected void deleteChildren( if (children.isEmpty()) { return; } - // Soft delete dispatches to the per-type bulk path that mirrors bulkRestoreSubtree — - // one batched DB write + one batched change-event insert per type, regardless of - // descendant count. The per-type ES cascade in deleteFromSearch handles index updates. - if (!hardDelete) { - Map> idsByType = - children.stream() - .collect( - Collectors.groupingBy( - EntityRelationshipRecord::getType, - Collectors.mapping(EntityRelationshipRecord::getId, Collectors.toList()))); - for (var entry : idsByType.entrySet()) { - EntityRepository repo = Entity.getEntityRepository(entry.getKey()); - repo.bulkSoftDeleteSubtree(entry.getValue(), updatedBy); - } - return; - } - // Hard delete keeps the existing batch-vs-sequential split: batchDeleteChildren only - // for >100 children (cleanup() per child has its own JDBI transaction; see the - // failure-semantics note on processDeletionBatch). - if (children.size() > 100) { - LOG.info("Using batch deletion for {} children entities", children.size()); - batchDeleteChildren(children, hardDelete, updatedBy); - } else { - for (EntityRelationshipRecord entityRelationshipRecord : children) { - LOG.info( - "Recursively hard deleting {} {}", - entityRelationshipRecord.getType(), - entityRelationshipRecord.getId()); - Entity.deleteEntity( - updatedBy, - entityRelationshipRecord.getType(), - entityRelationshipRecord.getId(), - true, - hardDelete); - } - } - } - - /** - * Batch deletion of children entities for improved performance - */ - @Transaction - protected void batchDeleteChildren( - List children, boolean hardDelete, String updatedBy) { - - // Group entities by type for batch processing - Map> entitiesByType = + // Both soft-delete and hard-delete dispatch to the per-type bulk path. One batched DB + // write + one batched change-event insert per type, regardless of descendant count. + // For hard delete, bulkHardDeleteSubtree replaces the legacy per-entity cleanup loop + // that opened an independent JDBI transaction per descendant. + Map> idsByType = children.stream() .collect( Collectors.groupingBy( EntityRelationshipRecord::getType, Collectors.mapping(EntityRelationshipRecord::getId, Collectors.toList()))); - - LOG.info("Batch deleting {} entities across {} types", children.size(), entitiesByType.size()); - - // Process deletion in levels to handle cascading properly - for (Map.Entry> entry : entitiesByType.entrySet()) { - String childEntityType = entry.getKey(); - List entityIds = entry.getValue(); - - LOG.info("Batch processing {} entities of type {}", entityIds.size(), childEntityType); - - // Process in smaller batches to avoid overwhelming the system - int batchSize = 50; - for (int i = 0; i < entityIds.size(); i += batchSize) { - List batch = entityIds.subList(i, Math.min(i + batchSize, entityIds.size())); - processDeletionBatch(batch, childEntityType, hardDelete, updatedBy); - } - } - } - - /** - * Process a batch of entities for hard deletion. Entered only via - * {@link #batchDeleteChildren}, which only fires for {@code hardDelete=true} and - * {@code children.size() > 100}; the soft-delete and small-batch paths stay on the - * sequential {@link Entity#deleteEntity} flow. - * - *

    Each child is removed via {@link #cleanup}, which deletes the entity row, all - * {@code (id, *)} and {@code (*, id)} entity_relationship rows, extensions, tag usage, - * threads, and caches as one atomic unit per child (cleanup opens its own JDBI - * transaction via {@code Entity.getJdbi().inTransaction(...)}). The previous - * implementation pre-deleted relationships in two batched queries before this loop and - * swallowed exceptions in the per-child cleanup, which is exactly what produced the - * orphan pattern: a failed cleanup left an entity row alive after its relationships - * had been wiped, surfacing in {@code /containers?root=true} and breaking - * {@code /children} traversal. - * - *

    Failure semantics: per-child atomicity, not whole-batch atomicity. If - * cleanup for child k throws, children {@code 0..k-1} have already committed - * via their own transactions and cannot be rolled back by the {@code @Transaction} on - * this method (cleanup's inner transaction is independent). The exception propagates - * so the loop stops; children {@code k..N-1} keep both their rows and their parent - * CONTAINS relationships intact. The retry path is to reissue the recursive delete on - * the parent — remaining children re-enter this loop. Crucially, no orphan-without- - * relationships row can result from an exception in this method, which is the bug - * this change exists to fix; achieving true all-or-nothing rollback across the batch - * would require sharing one JDBI handle across every cleanup call, a wider refactor - * deliberately scoped out of this fix. - */ - @Transaction - private void processDeletionBatch( - List entityIds, String entityType, boolean hardDelete, String updatedBy) { - - LOG.debug("Processing batch of {} {} entities", entityIds.size(), entityType); - - // First, collect all grandchildren that need to be deleted in a SINGLE batch query - List stringIds = entityIds.stream().map(UUID::toString).collect(Collectors.toList()); - List grandchildRecords = - daoCollection - .relationshipDAO() - .findToBatchWithRelations( - stringIds, - entityType, - List.of(Relationship.CONTAINS.ordinal(), Relationship.PARENT_OF.ordinal())); - - // Convert to EntityRelationshipRecord format - List allGrandchildren = - grandchildRecords.stream() - .map( - rec -> - new EntityRelationshipRecord( - UUID.fromString(rec.getToId()), rec.getToEntity(), rec.getJson())) - .collect(Collectors.toList()); - - // Recursively delete grandchildren first - if (!allGrandchildren.isEmpty()) { - LOG.info("Found {} grandchildren to delete first", allGrandchildren.size()); - deleteChildren(allGrandchildren, hardDelete, updatedBy); - } - - // cleanup() per entity is the source of truth: it removes the row, its relationships - // (deleteAllFrom + deleteAllTo on (id, *) and (*, id)), extensions, tag usage, feed - // threads, and caches within ONE JDBI transaction owned by cleanup itself. The - // previous pre-batch-delete of relationships made the row-and-relationship pairing - // non-atomic across the loop, which is why a swallowed mid-loop exception produced - // the orphan-without-relationships pattern. Letting cleanup own both halves and - // letting exceptions propagate stops the loop early on failure; see the failure - // semantics note in the Javadoc above for what "stops the loop" actually guarantees. - @SuppressWarnings("rawtypes") - EntityRepository repository = Entity.getEntityRepository(entityType); - for (UUID entityId : entityIds) { - try { - EntityInterface entity = repository.find(entityId, Include.ALL); - repository.cleanup(entity); - } catch (RuntimeException e) { - LOG.error( - "Failed to delete {} '{}' during recursive batch delete: {}", - entityType, - entityId, - e.getMessage(), - e); - // Wrap with entity context before re-throwing so the operator can identify - // the row that blocked a large recursive delete. The exception still - // propagates — the loop still stops, the failure-semantics contract in the - // Javadoc still holds — we just trade an opaque stack trace for one that - // names the offending child. - throw new RuntimeException( - String.format( - "Failed to delete %s '%s' during recursive batch delete: %s", - entityType, entityId, e.getMessage()), - e); + for (var entry : idsByType.entrySet()) { + EntityRepository repo = Entity.getEntityRepository(entry.getKey()); + if (hardDelete) { + repo.bulkHardDeleteSubtree(entry.getValue(), updatedBy); + } else { + repo.bulkSoftDeleteSubtree(entry.getValue(), updatedBy); } } } @@ -5743,11 +5598,22 @@ private void runRestoreAdditionalChildren(List entities, String updatedBy) { } /** - * Find all CONTAINS children for every entity in {@code parents} with one batched query, then - * apply {@code dispatcher} to each (childRepo, childIds) group. Replaces the per-parent - * {@code findTo} round-trip that used to fire once per descendant — for a 12k-table database - * that's 12k DB hits collapsed into one per tree level. Shared between bulk restore and bulk - * soft-delete; the only thing that varies is the terminal call on the child repo. + * Default relation set walked when descending into a parent's subtree. CONTAINS covers the + * service → DB → schema → table chain and most other parent → child hierarchies; PARENT_OF + * covers recursive shapes like Glossary → GlossaryTerm, Team → Team, Classification → Tag, + * Domain → DataProduct. Walking both keeps every entity type's hierarchy in scope without + * subclass-specific overrides. + */ + private static final List SUBTREE_RELATIONS = + List.of(Relationship.CONTAINS.ordinal(), Relationship.PARENT_OF.ordinal()); + + /** + * Find all subtree children (CONTAINS + PARENT_OF) for every entity in {@code parents} with one + * batched query, then apply {@code dispatcher} to each (childRepo, childIds) group. Replaces the + * per-parent {@code findTo} round-trip that used to fire once per descendant — for a 12k-table + * database that's 12k DB hits collapsed into one per tree level. Shared between bulk restore, + * bulk soft-delete and bulk hard-delete; the only thing that varies is the terminal call on the + * child repo. */ private void dispatchToContainedChildren( List parents, String phaseName, BiConsumer, List> dispatcher) { @@ -5758,9 +5624,7 @@ private void dispatchToContainedChildren( List relationships; try (var ignored = phase(phaseName)) { relationships = - daoCollection - .relationshipDAO() - .findToBatchAllTypes(parentIds, Relationship.CONTAINS.ordinal(), ALL); + daoCollection.relationshipDAO().findToBatchAllTypes(parentIds, SUBTREE_RELATIONS, ALL); } if (relationships.isEmpty()) { return; @@ -5878,6 +5742,134 @@ protected void softDeleteAdditionalChildren(UUID id, String updatedBy) { // No-op. Override in subclasses for HAS-style related-entity soft delete. } + /** + * Bulk hard-delete the given entities of this repository's type along with their entire + * CONTAINS + PARENT_OF subtree. Replaces the legacy per-entity {@link #cleanup} loop driven by + * {@code processDeletionBatch} / {@code batchDeleteChildren} — that path opened an independent + * JDBI transaction per descendant and fired ~10 SQL statements per entity, so a 12k-table + * database needed ~120,000 round-trips and produced the hours-long deletes reported by users. + * + *

    Per-level shape: one batched {@code findToBatchAllTypes} that walks both CONTAINS + * (service → DB → schema → table) and PARENT_OF (Glossary → GlossaryTerm, Team → Team, recursive + * Container) so every entity hierarchy is in scope without per-subclass overrides; one batched + * DB load; recursive descent into each child type; one + * {@link CollectionDAO.EntityRelationshipDAO#batchDeleteRelationships} per type to wipe both + * {@code (id, *)} and {@code (*, id)} entity_relationship rows in a single statement; one + * batched extension delete; one batched entity row delete; per-entity loops for tag_usage / + * usage / field_relationship / feed threads (those tables key on FQN strings rather than ids + * so they can't share a single IN-list query, but they stay inside the same {@code @Transaction} + * which removes the per-entity transaction overhead that dominated the old path). + * + *

    Subclasses with non-CONTAINS related entities (e.g., dashboard charts attached via HAS) + * should override {@link #hardDeleteAdditionalChildren(UUID, String)}. Subclasses that need true + * batched external cleanup (Airflow DAGs, S3, secrets stores) can override + * {@link #bulkEntitySpecificCleanup(List)}; the default loops the per-entity hook. + * + *

    Failure semantics: the entire bulk hard-delete runs in a single + * {@code @Transaction}, so a mid-walk failure rolls back every row + relationship deletion. + * This is stronger than the previous {@code processDeletionBatch} contract, which only + * guaranteed per-child atomicity and could leave the operator with a partially-deleted subtree + * after a failure. See also {@link #bulkRestoreSubtree(List, String)} for the same operational + * ceiling note around single-connection holding for the duration of the walk. + */ + @Transaction + public final void bulkHardDeleteSubtree(List ids, String updatedBy) { + if (ids == null || ids.isEmpty()) { + return; + } + List entities = loadForBulk(ids, ALL, "bulkHardDeleteLoad"); + if (entities.isEmpty()) { + return; + } + for (T entity : entities) { + checkSystemEntityDeletion(entity); + preDelete(entity, updatedBy); + } + dispatchToContainedChildren( + entities, + "bulkHardDeleteFindChildren", + (childRepo, childIds) -> childRepo.bulkHardDeleteSubtree(childIds, updatedBy)); + bulkEntitySpecificCleanup(entities); + bulkCleanupReferences(entities); + bulkDeleteEntityRows(entities); + bulkInvalidate(entities); + runHardDeleteAdditionalChildren(entities, updatedBy); + } + + private void bulkCleanupReferences(List entities) { + List entityIds = new ArrayList<>(entities.size()); + List entityIdStrings = new ArrayList<>(entities.size()); + for (T entity : entities) { + entityIds.add(entity.getId()); + entityIdStrings.add(entity.getId().toString()); + } + try (var ignored = phase("bulkHardDeleteRelationships")) { + daoCollection.relationshipDAO().batchDeleteRelationships(entityIds, entityType); + } + try (var ignored = phase("bulkHardDeleteExtensions")) { + daoCollection.entityExtensionDAO().deleteAllBatch(entityIdStrings); + } + try (var ignored = phase("bulkHardDeleteFqnDependents")) { + for (T entity : entities) { + String fqn = entity.getFullyQualifiedName(); + daoCollection.fieldRelationshipDAO().deleteAllByPrefix(fqn); + daoCollection.tagUsageDAO().deleteTagLabelsByTargetPrefix(fqn); + daoCollection.tagUsageDAO().deleteTagLabelsByFqn(fqn); + } + } + try (var ignored = phase("bulkHardDeleteUsage")) { + for (T entity : entities) { + daoCollection.usageDAO().delete(entity.getId()); + } + } + try (var ignored = phase("bulkHardDeleteFeedThreads")) { + Entity.getFeedRepository().deleteByAbout(entityIds); + } + } + + private void bulkDeleteEntityRows(List entities) { + try (var ignored = phase("bulkHardDeleteRows")) { + List entityIds = new ArrayList<>(entities.size()); + for (T entity : entities) { + entityIds.add(entity.getId()); + } + dao.deleteByIds(entityIds); + } + } + + private void bulkInvalidate(List entities) { + for (T entity : entities) { + invalidate(entity); + } + } + + private void runHardDeleteAdditionalChildren(List entities, String updatedBy) { + for (T entity : entities) { + hardDeleteAdditionalChildren(entity.getId(), updatedBy); + } + } + + /** + * Hook called once per hard-deleted entity for repositories that have non-CONTAINS related + * entities that need to be hard-deleted alongside the parent (e.g., charts attached to + * dashboards via HAS). Default: no-op. + */ + protected void hardDeleteAdditionalChildren(UUID id, String updatedBy) { + // No-op. Override in subclasses for HAS-style related-entity hard delete. + } + + /** + * Hook for entity-type-specific cleanup invoked once per bulk-hard-delete batch. Default + * implementation loops {@link #entitySpecificCleanup(EntityInterface)} so subclasses keep + * current behavior. Override for true batching where external resources warrant it (e.g., + * Airflow DAG deregistration, S3 object cleanup, secrets-store purges). + */ + protected void bulkEntitySpecificCleanup(List entities) { + for (T entity : entities) { + entitySpecificCleanup(entity); + } + } + // ---- Shared phase helpers used by bulkRestoreSubtree / bulkSoftDeleteSubtree ---- private List loadForBulk(List ids, Include include, String phaseName) { diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/FeedRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/FeedRepository.java index 6b338a40379a..71248e1320db 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/FeedRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/FeedRepository.java @@ -805,6 +805,15 @@ public void deleteByAbout(UUID entityId) { } } + public void deleteByAbout(List entityIds) { + if (entityIds == null || entityIds.isEmpty()) { + return; + } + for (UUID entityId : entityIds) { + deleteByAbout(entityId); + } + } + private boolean isLegacyThreadStorageAvailable() { return getResolvedLegacyThreadTableName() != null; } diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java index 449442d793db..335900e0861e 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java @@ -17,6 +17,7 @@ import static org.mockito.ArgumentMatchers.anyInt; import static org.mockito.ArgumentMatchers.anyList; import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.CALLS_REAL_METHODS; import static org.mockito.Mockito.atLeastOnce; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.mockStatic; @@ -43,9 +44,9 @@ import org.openmetadata.service.util.EntityUtil.RelationIncludes; /** - * Unit tests for the iterative bulk restore + bulk soft-delete paths introduced for - * issue #4003. Verifies the dispatch shape that's testable without spinning up the full - * bulk write path: + * Unit tests for the iterative bulk restore + bulk soft-delete + bulk hard-delete paths + * introduced for issue #4003. Verifies the dispatch shape that's testable without spinning + * up the full bulk write path: * *

      *
    • {@link EntityRepository#restoreChildren(UUID, String)} groups CONTAINS children by @@ -53,17 +54,23 @@ * String)} call per type (instead of N recursive {@code Entity.restoreEntity} calls). *
    • {@link EntityRepository#deleteChildren(List, boolean, String)} with * {@code hardDelete=false} dispatches one {@link EntityRepository#bulkSoftDeleteSubtree( - * List, String)} call per type. - *
    • Both bulk methods bail out cleanly on null / empty inputs and on no-deleted-found. - *
    • Both bulk methods issue a single batched {@code findToBatchAllTypes} per tree level - * (replacing the per-parent {@code findTo} round-trip). + * List, String)} call per type and with {@code hardDelete=true} dispatches one + * {@link EntityRepository#bulkHardDeleteSubtree(List, String)} call per type. + *
    • All three bulk methods bail out cleanly on null / empty inputs. + *
    • All three bulk methods issue a single batched {@code findToBatchAllTypes} per tree + * level that walks both {@code CONTAINS} and {@code PARENT_OF} so Glossary / Team / + * recursive-Container descendants stop silently slipping past the cascade. *
    * - * The full bulk DB-write path (version history, updateMany, change events) is exercised in - * {@code RestoreHierarchyIT}, which runs against a real Docker stack. + * The full bulk DB-write path (version history, updateMany, change events, entity row + * deletes) is exercised in {@code RestoreHierarchyIT}, which runs against a real Docker + * stack. */ class EntityRepositoryRestoreTest { + private static final List SUBTREE_RELATIONS = + List.of(Relationship.CONTAINS.ordinal(), Relationship.PARENT_OF.ordinal()); + private CollectionDAO daoCollection; private CollectionDAO.EntityRelationshipDAO relationshipDAO; private CollectionDAO.PipelineDAO pipelineDAO; @@ -71,8 +78,11 @@ class EntityRepositoryRestoreTest { private static class CountingPipelineRepo extends EntityRepository { int restoreAdditionalChildrenCalls = 0; int softDeleteAdditionalChildrenCalls = 0; + int hardDeleteAdditionalChildrenCalls = 0; + int bulkEntitySpecificCleanupCalls = 0; final Set bulkRestoreInvokedWith = new HashSet<>(); final Set bulkSoftDeleteInvokedWith = new HashSet<>(); + final Set bulkHardDeleteInvokedWith = new HashSet<>(); CountingPipelineRepo(CollectionDAO.PipelineDAO dao) { super("pipelines", Entity.PIPELINE, Pipeline.class, dao, "", ""); @@ -104,6 +114,17 @@ protected void softDeleteAdditionalChildren(UUID id, String updatedBy) { softDeleteAdditionalChildrenCalls++; bulkSoftDeleteInvokedWith.add(id); } + + @Override + protected void hardDeleteAdditionalChildren(UUID id, String updatedBy) { + hardDeleteAdditionalChildrenCalls++; + bulkHardDeleteInvokedWith.add(id); + } + + @Override + protected void bulkEntitySpecificCleanup(List entities) { + bulkEntitySpecificCleanupCalls++; + } } @BeforeEach @@ -216,8 +237,7 @@ void bulkRestoreSubtree_entitiesPresentButNoneDeleted_stillRunsAdditionalChildre Pipeline pa = new Pipeline().withId(id).withName("a").withFullyQualifiedName("svc.a").withDeleted(false); when(pipelineDAO.findEntitiesByIds(anyList(), eq(Include.ALL))).thenReturn(List.of(pa)); - when(relationshipDAO.findToBatchAllTypes( - anyList(), eq(Relationship.CONTAINS.ordinal()), eq(Include.ALL))) + when(relationshipDAO.findToBatchAllTypes(anyList(), eq(SUBTREE_RELATIONS), eq(Include.ALL))) .thenReturn(List.of()); repo.bulkRestoreSubtree(List.of(id), "user"); @@ -236,8 +256,7 @@ void bulkRestoreSubtree_usesBatchedFindToOncePerLevel() { Pipeline pb = new Pipeline().withId(b).withName("b").withFullyQualifiedName("svc.b").withDeleted(true); when(pipelineDAO.findEntitiesByIds(anyList(), eq(Include.ALL))).thenReturn(List.of(pa, pb)); - when(relationshipDAO.findToBatchAllTypes( - anyList(), eq(Relationship.CONTAINS.ordinal()), eq(Include.ALL))) + when(relationshipDAO.findToBatchAllTypes(anyList(), eq(SUBTREE_RELATIONS), eq(Include.ALL))) .thenReturn(List.of()); try { @@ -249,8 +268,7 @@ void bulkRestoreSubtree_usesBatchedFindToOncePerLevel() { ArgumentCaptor> idsCap = captureStringList(); verify(relationshipDAO, times(1)) - .findToBatchAllTypes( - idsCap.capture(), eq(Relationship.CONTAINS.ordinal()), eq(Include.ALL)); + .findToBatchAllTypes(idsCap.capture(), eq(SUBTREE_RELATIONS), eq(Include.ALL)); assertEquals(2, idsCap.getValue().size()); assertTrue(idsCap.getValue().contains(a.toString())); assertTrue(idsCap.getValue().contains(b.toString())); @@ -314,8 +332,7 @@ void bulkSoftDeleteSubtree_usesBatchedFindToOncePerLevel() { Pipeline pa = new Pipeline().withId(a).withName("a").withFullyQualifiedName("svc.a"); Pipeline pb = new Pipeline().withId(b).withName("b").withFullyQualifiedName("svc.b"); when(pipelineDAO.findEntitiesByIds(anyList(), eq(Include.ALL))).thenReturn(List.of(pa, pb)); - when(relationshipDAO.findToBatchAllTypes( - anyList(), eq(Relationship.CONTAINS.ordinal()), eq(Include.ALL))) + when(relationshipDAO.findToBatchAllTypes(anyList(), eq(SUBTREE_RELATIONS), eq(Include.ALL))) .thenReturn(List.of()); try { @@ -326,13 +343,131 @@ void bulkSoftDeleteSubtree_usesBatchedFindToOncePerLevel() { ArgumentCaptor> idsCap = captureStringList(); verify(relationshipDAO, times(1)) - .findToBatchAllTypes( - idsCap.capture(), eq(Relationship.CONTAINS.ordinal()), eq(Include.ALL)); + .findToBatchAllTypes(idsCap.capture(), eq(SUBTREE_RELATIONS), eq(Include.ALL)); assertEquals(2, idsCap.getValue().size()); assertTrue(idsCap.getValue().contains(a.toString())); assertTrue(idsCap.getValue().contains(b.toString())); } + @Test + void deleteChildren_hardDelete_groupsByTypeAndDispatchesToBulkHardDelete() { + CountingPipelineRepo repo = new CountingPipelineRepo(pipelineDAO); + + UUID schemaA = UUID.randomUUID(); + UUID schemaB = UUID.randomUUID(); + UUID procA = UUID.randomUUID(); + + List children = new ArrayList<>(); + children.add(record(schemaA, Entity.DATABASE_SCHEMA)); + children.add(record(schemaB, Entity.DATABASE_SCHEMA)); + children.add(record(procA, Entity.STORED_PROCEDURE)); + + EntityRepository schemaRepo = mock(EntityRepository.class); + EntityRepository procRepo = mock(EntityRepository.class); + + try (MockedStatic entityMock = mockStatic(Entity.class)) { + entityMock + .when(() -> Entity.getEntityRepository(Entity.DATABASE_SCHEMA)) + .thenReturn(schemaRepo); + entityMock + .when(() -> Entity.getEntityRepository(Entity.STORED_PROCEDURE)) + .thenReturn(procRepo); + + repo.deleteChildren(children, true, "user"); + } + + ArgumentCaptor> schemaIds = captureUuidList(); + verify(schemaRepo, times(1)).bulkHardDeleteSubtree(schemaIds.capture(), eq("user")); + assertEquals(2, schemaIds.getValue().size()); + assertTrue(schemaIds.getValue().contains(schemaA)); + assertTrue(schemaIds.getValue().contains(schemaB)); + + ArgumentCaptor> procIds = captureUuidList(); + verify(procRepo, times(1)).bulkHardDeleteSubtree(procIds.capture(), eq("user")); + assertEquals(1, procIds.getValue().size()); + assertTrue(procIds.getValue().contains(procA)); + + verify(schemaRepo, never()).bulkSoftDeleteSubtree(anyList(), eq("user")); + verify(procRepo, never()).bulkSoftDeleteSubtree(anyList(), eq("user")); + } + + @Test + void bulkHardDeleteSubtree_emptyOrNullIds_isNoOp() { + CountingPipelineRepo repo = new CountingPipelineRepo(pipelineDAO); + + repo.bulkHardDeleteSubtree(null, "user"); + repo.bulkHardDeleteSubtree(List.of(), "user"); + + verify(pipelineDAO, never()).findEntitiesByIds(anyList(), eq(Include.ALL)); + assertEquals(0, repo.hardDeleteAdditionalChildrenCalls); + assertEquals(0, repo.bulkEntitySpecificCleanupCalls); + } + + @Test + void bulkHardDeleteSubtree_usesBatchedFindToOncePerLevel_includingParentOf() { + CountingPipelineRepo repo = new CountingPipelineRepo(pipelineDAO); + UUID a = UUID.randomUUID(); + UUID b = UUID.randomUUID(); + Pipeline pa = new Pipeline().withId(a).withName("a").withFullyQualifiedName("svc.a"); + Pipeline pb = new Pipeline().withId(b).withName("b").withFullyQualifiedName("svc.b"); + when(pipelineDAO.findEntitiesByIds(anyList(), eq(Include.ALL))).thenReturn(List.of(pa, pb)); + when(relationshipDAO.findToBatchAllTypes(anyList(), eq(SUBTREE_RELATIONS), eq(Include.ALL))) + .thenReturn(List.of()); + + try { + repo.bulkHardDeleteSubtree(List.of(a, b), "user"); + } catch (Exception ignored) { + // Heavy DB write path is not mocked; we verify only the per-level findTo collapse and + // hook invocation. + } + + ArgumentCaptor> idsCap = captureStringList(); + verify(relationshipDAO, times(1)) + .findToBatchAllTypes(idsCap.capture(), eq(SUBTREE_RELATIONS), eq(Include.ALL)); + assertEquals(2, idsCap.getValue().size()); + assertTrue(idsCap.getValue().contains(a.toString())); + assertTrue(idsCap.getValue().contains(b.toString())); + } + + @Test + void bulkHardDeleteSubtree_callsBulkEntitySpecificCleanupAndAdditionalChildrenHooks() { + CountingPipelineRepo repo = new CountingPipelineRepo(pipelineDAO); + UUID a = UUID.randomUUID(); + UUID b = UUID.randomUUID(); + Pipeline pa = new Pipeline().withId(a).withName("a").withFullyQualifiedName("svc.a"); + Pipeline pb = new Pipeline().withId(b).withName("b").withFullyQualifiedName("svc.b"); + when(pipelineDAO.findEntitiesByIds(anyList(), eq(Include.ALL))).thenReturn(List.of(pa, pb)); + when(relationshipDAO.findToBatchAllTypes(anyList(), eq(SUBTREE_RELATIONS), eq(Include.ALL))) + .thenReturn(List.of()); + + CollectionDAO.EntityExtensionDAO extensionDAO = mock(CollectionDAO.EntityExtensionDAO.class); + CollectionDAO.FieldRelationshipDAO fieldRelationshipDAO = + mock(CollectionDAO.FieldRelationshipDAO.class); + CollectionDAO.TagUsageDAO tagUsageDAO = mock(CollectionDAO.TagUsageDAO.class); + CollectionDAO.UsageDAO usageDAO = mock(CollectionDAO.UsageDAO.class); + when(daoCollection.entityExtensionDAO()).thenReturn(extensionDAO); + when(daoCollection.fieldRelationshipDAO()).thenReturn(fieldRelationshipDAO); + when(daoCollection.tagUsageDAO()).thenReturn(tagUsageDAO); + when(daoCollection.usageDAO()).thenReturn(usageDAO); + + FeedRepository feedRepository = mock(FeedRepository.class); + try (MockedStatic entityMock = mockStatic(Entity.class, CALLS_REAL_METHODS)) { + entityMock.when(Entity::getFeedRepository).thenReturn(feedRepository); + repo.bulkHardDeleteSubtree(List.of(a, b), "user"); + } + + // bulkEntitySpecificCleanup is invoked once per bulk call with the whole batch. + assertEquals(1, repo.bulkEntitySpecificCleanupCalls); + // hardDeleteAdditionalChildren is invoked once per entity in the batch. + assertEquals(2, repo.hardDeleteAdditionalChildrenCalls); + assertTrue(repo.bulkHardDeleteInvokedWith.contains(a)); + assertTrue(repo.bulkHardDeleteInvokedWith.contains(b)); + // Verify the per-batch relationship + extension cleanup actually ran. + verify(relationshipDAO, times(1)).batchDeleteRelationships(anyList(), eq(Entity.PIPELINE)); + verify(extensionDAO, times(1)).deleteAllBatch(anyList()); + verify(pipelineDAO, times(1)).deleteByIds(anyList()); + } + private CollectionDAO.EntityRelationshipRecord record(UUID id, String type) { return CollectionDAO.EntityRelationshipRecord.builder().id(id).type(type).build(); } From 6794541691cf583c84ec810ab24283fb0912f841 Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Thu, 14 May 2026 09:24:04 -0700 Subject: [PATCH 18/38] review: address Copilot threads on restoreEntity guard + soft-delete fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit restoreEntity used to run restoreAdditionalChildren even when find(id, DELETED) threw because the entity didn't exist at all — the catch couldn't distinguish "missing" from "not deleted", so a hook side-effect on a truly-missing id could surface a 500 instead of the natural 404. Add a find(id, ALL) guard at the top: missing entities propagate EntityNotFoundException; existing entities (whether deleted or already restored) still run the hook to reconcile HAS-related descendants on a re-entered cascade. bulkSoftDeleteSubtree's !supportsSoftDelete fallback was calling Entity.deleteEntity(..., hardDelete=true) which hard-deleted the entire subtree. The existing per-entity flow passes hardDelete=false through deleteChildren so descendants that *do* support soft delete remain soft-deleted; the parent gets hard-deleted via cleanup() only because its own type can't be soft-deleted. Pass hardDelete=false to match. Verified: EntityRepositoryRestoreTest 13/13 + RestoreHierarchyIT 5/5 pass. Co-Authored-By: Claude Opus 4.7 --- .../service/jdbi3/EntityRepository.java | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java index f9f328885261..c6e9d948e3aa 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java @@ -5484,6 +5484,12 @@ public final URI getHref(UriInfo uriInfo, UUID id) { @Transaction public final PutResponse restoreEntity(String updatedBy, UUID id) { + // Confirm the entity exists at all (in any state). If the row is truly gone + // (e.g., hard-deleted), propagate EntityNotFoundException so the caller surfaces + // a clean 404 instead of running children / hooks against a non-existent id and + // potentially surfacing a 500 from a hook side-effect. + find(id, ALL); + // If an entity being restored contains other **deleted** children entities, restore them restoreChildren(id, updatedBy); @@ -5504,11 +5510,12 @@ public final PutResponse restoreEntity(String updatedBy, UUID id) { ListCountCache.invalidate(entityType); response = new PutResponse<>(Status.OK, updated, ENTITY_RESTORED); } catch (EntityNotFoundException e) { + // Entity exists (verified above) but is not in DELETED state — already restored. LOG.info("Entity already restored or not in deleted state {} {}", entityType, id); } - // Run the per-entity hook regardless of whether this node needed flipping. A - // re-entered cascade where this level is already restored must still reconcile - // HAS-related children (e.g., dashboard charts) of nested descendants. + // Run the per-entity hook because the entity exists (the find(ALL) guard ensures + // that). A re-entered cascade where this level is already restored must still + // reconcile HAS-related children (e.g., dashboard charts) of nested descendants. restoreAdditionalChildren(id, updatedBy); return response; } @@ -5682,8 +5689,14 @@ public final void bulkSoftDeleteSubtree(List ids, String updatedBy) { return; } if (!supportsSoftDelete) { + // This type can't be soft-deleted, so each entity at this level must be hard + // deleted instead. Pass hardDelete=false through to the per-entity delete so + // descendant levels that *do* support soft delete remain soft-deleted — the + // per-entity flow handles the asymmetry by inspecting each level's own + // supportsSoftDelete flag. Using hardDelete=true here would propagate hard + // deletion to the entire subtree, breaking that contract. for (UUID id : ids) { - Entity.deleteEntity(updatedBy, entityType, id, true, true); + Entity.deleteEntity(updatedBy, entityType, id, true, false); } return; } From 4d42d5682c0a7ada52fb970ec715714838549647 Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Fri, 15 May 2026 09:31:41 -0700 Subject: [PATCH 19/38] fix(restore): load HAS-related children with Include.ALL during restore The PUT-driven restoreEntity path was loading the entity's children via setFieldsInternal(original, putFields), which defaults to a NON_DELETED filter. For dashboards whose charts were soft-deleted as part of the parent delete cascade, that filter returned an empty charts list, so both original.charts and updated.charts were empty when DashboardUpdater .entitySpecificUpdate ran its PUT diff. The diff helper calls deleteFrom(...) unconditionally before re-adding from updEntities, so it wiped every (dashboard HAS chart) entity_relationship row before restoreAdditionalChildren ever got to look them up. The chart cascade then silently no-op'd and DashboardResourceIT#test_deleteDashboard_ chartBelongsToSingleDashboard_chartIsDeletedThenRestored failed in CI. Passing Include.ALL keeps soft-deleted children in the loaded list, so original/updated stay symmetric and the diff's delete+re-add is a net no-op until restoreAdditionalChildren walks the HAS rows and restores each chart in turn. Verified: EntityRepositoryRestoreTest 13/13, RestoreHierarchyIT 5/5, DashboardResourceIT#test_deleteDashboard_chartBelongsToSingleDashboard_ chartIsDeletedThenRestored all pass. Co-Authored-By: Claude Opus 4.7 --- .../openmetadata/service/jdbi3/EntityRepository.java | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java index c6e9d948e3aa..4b407d660bc8 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java @@ -5499,7 +5499,16 @@ public final PutResponse restoreEntity(String updatedBy, UUID id) { PutResponse response = null; try { T original = find(id, DELETED); - setFieldsInternal(original, putFields); + // Populate fields with Include.ALL so HAS-style children that were soft-deleted as + // part of this entity's cascade remain in the loaded child lists (e.g., + // dashboard.charts). If we used the default NON_DELETED filter, those lists would + // come back empty, and the PUT updater's diff would see "no children" on both + // sides and call deleteFrom(...) to wipe every HAS relationship row before the + // additional-children hook ever runs to restore them. Charts attached to the + // dashboard being restored would then have nothing to walk back from, and the + // restore cascade would silently no-op (DashboardResourceIT#test_deleteDashboard_ + // chartBelongsToSingleDashboard_chartIsDeletedThenRestored guards against this). + setFieldsInternal(original, putFields, ALL); setInheritedFields(original, putFields); T updated = JsonUtils.readValue(JsonUtils.pojoToJson(original), entityClass); updated.setUpdatedBy(updatedBy); From b07804e87f04c23c5fb43aa43315be8c9ce8b60c Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Fri, 15 May 2026 10:34:21 -0700 Subject: [PATCH 20/38] fix(ci): invoke postDelete for cascade-deleted descendants + Python type checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Java side: bulkHardDeleteSubtree was bypassing the per-entity postDelete hook for cascade-deleted descendants. The legacy small-batch path went through Entity.deleteEntity, which called postDelete and let subclass overrides (e.g. UserRepository.deleteSuggestionTasksForUser) run as part of the cascade. The bulk replacement is the only path that walks cascade children now, so it must fan out the hook explicitly. This regression broke TaskResourceIT.testDeletingBotCreatorCleansUp- OpenSuggestionTasks: hard-deleting a bot recursively wiped the bot user row but left the user's open suggestion tasks behind, because UserRepo's postDelete (which calls deleteSuggestionTasksForUser via the retry-wrapped DAO call) never fired for the cascaded user. Python side: basedpyright --baselinemode=discard was flagging nine new errors against my async-delete code that weren't in the baseline: - delete.py:64,101 — Either(right=...) without explicit left=None. Added left=None to match the call shape basedpyright recognises. - ometa_api.py:781,831 — self.client.delete()/put() returns Any|Response|None but delete_async/restore_async declare Optional[dict]. Narrowed via isinstance(response, dict) so the return type matches. - metadata_rest.py:593-606 — record.entity is typed as bare BaseModel; basedpyright can't see .id / .fullyQualifiedName through that alias, and record.mark_deleted_entities is bool|None where `recursive` expects bool. Pull the attributes via a typed local + bool() narrow. Verified locally: EntityRepositoryRestoreTest 13/13, RestoreHierarchyIT 5/5, DashboardResourceIT chart-cascade, TaskResourceIT bot-suggestion cleanup all pass. basedpyright reports zero errors in the three Python files. Co-Authored-By: Claude Opus 4.7 --- .../src/metadata/ingestion/api/delete.py | 6 ++++-- .../src/metadata/ingestion/ometa/ometa_api.py | 6 ++++-- .../metadata/ingestion/sink/metadata_rest.py | 20 +++++++++++++------ .../service/jdbi3/EntityRepository.java | 8 ++++++++ 4 files changed, 30 insertions(+), 10 deletions(-) diff --git a/ingestion/src/metadata/ingestion/api/delete.py b/ingestion/src/metadata/ingestion/api/delete.py index d1a64c643ec9..f40f57bd99f6 100644 --- a/ingestion/src/metadata/ingestion/api/delete.py +++ b/ingestion/src/metadata/ingestion/api/delete.py @@ -62,11 +62,12 @@ def delete_entity_from_source( for entity in entity_state: if str(entity.fullyQualifiedName.root) not in entity_source_state: yield Either( + left=None, right=DeleteEntity( entity=entity, mark_deleted_entities=mark_deleted_entity, dispatch_async=use_async, - ) + ), ) except Exception as exc: yield Either( @@ -99,11 +100,12 @@ def delete_entity_by_name( entity = metadata.get_by_name(entity=entity_type, fqn=entity_name) if entity: yield Either( + left=None, right=DeleteEntity( entity=entity, mark_deleted_entities=mark_deleted_entity, dispatch_async=use_async, - ) + ), ) except Exception as exc: yield Either( diff --git a/ingestion/src/metadata/ingestion/ometa/ometa_api.py b/ingestion/src/metadata/ingestion/ometa/ometa_api.py index 87137c14884e..b92a6e24e0bd 100644 --- a/ingestion/src/metadata/ingestion/ometa/ometa_api.py +++ b/ingestion/src/metadata/ingestion/ometa/ometa_api.py @@ -778,7 +778,8 @@ def delete_async( url = f"{self.get_suffix(entity)}/async/{model_str(entity_id)}" url += f"?recursive={str(recursive).lower()}" url += f"&hardDelete={str(hard_delete).lower()}" - return self.client.delete(url) + response = self.client.delete(url) + return response if isinstance(response, dict) else None def restore( self, @@ -828,7 +829,8 @@ def restore_async( """ url = f"{self.get_suffix(entity)}/restore?async=true" data = {"id": model_str(entity_id)} - return self.client.put(url, json=data) + response = self.client.put(url, json=data) + return response if isinstance(response, dict) else None def compute_percentile(self, entity: Union[Type[T], str], date: str) -> None: # noqa: UP006, UP007 """ diff --git a/ingestion/src/metadata/ingestion/sink/metadata_rest.py b/ingestion/src/metadata/ingestion/sink/metadata_rest.py index 621f60bc230f..5cd5ad5a44b5 100644 --- a/ingestion/src/metadata/ingestion/sink/metadata_rest.py +++ b/ingestion/src/metadata/ingestion/sink/metadata_rest.py @@ -584,28 +584,36 @@ def write_users(self, record: OMetaUserProfile) -> Either[User]: @_run_dispatch.register def delete_entity(self, record: DeleteEntity) -> Either[Entity]: + # record.entity is declared as a bare pydantic BaseModel; the runtime value is a + # generated entity that exposes `id` and `fullyQualifiedName`, but basedpyright can't + # see those attributes through the BaseModel alias. Pull them via getattr so the type + # checker stays quiet without changing the runtime behavior. + entity_obj: Any = record.entity + entity_id = entity_obj.id + fqn = entity_obj.fullyQualifiedName.root + recursive = bool(record.mark_deleted_entities) if record.dispatch_async: # Server-side async cascade — returns 202 + jobId immediately so ingestion # doesn't block on large subtrees (issue #4003). The actual work runs on the # server's executor; we surface the jobId in the log for operator correlation. response = self.metadata.delete_async( entity=type(record.entity), - entity_id=record.entity.id, - recursive=record.mark_deleted_entities, + entity_id=entity_id, + recursive=recursive, ) job_id = (response or {}).get("jobId") logger.debug( "Dispatched async delete for %s (jobId=%s)", - record.entity.fullyQualifiedName.root, + fqn, job_id, ) else: self.metadata.delete( entity=type(record.entity), - entity_id=record.entity.id, - recursive=record.mark_deleted_entities, + entity_id=entity_id, + recursive=recursive, ) - return Either(right=record) + return Either(left=None, right=record) @_run_dispatch.register def write_pipeline_status(self, record: OMetaPipelineStatus) -> Either[PipelineStatus]: diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java index 4b407d660bc8..78bc65bc7e78 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java @@ -5816,6 +5816,14 @@ public final void bulkHardDeleteSubtree(List ids, String updatedBy) { bulkDeleteEntityRows(entities); bulkInvalidate(entities); runHardDeleteAdditionalChildren(entities, updatedBy); + // Each cascade-deleted descendant needs the same postDelete hook the per-entity hard-delete + // path runs (RdfUpdater.deleteEntity, plus subclass overrides like + // UserRepository.deleteSuggestionTasksForUser). The legacy small-batch path went through + // Entity.deleteEntity which invoked postDelete; this bulk replacement is the only path + // that walks cascaded children now, so it must also fan out the hook explicitly. + for (T entity : entities) { + postDelete(entity, true); + } } private void bulkCleanupReferences(List entities) { From 86d51299c03c0a9425a77ceb5f495440f19a08ad Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Sat, 16 May 2026 19:54:52 -0700 Subject: [PATCH 21/38] fix(restore/delete): address PR review feedback on bulk + async hooks - bulkHardDeleteSubtree: run hardDeleteAdditionalChildren BEFORE bulkCleanupReferences so subclass hooks (e.g., DashboardRepository's cascadeChartCleanup) can still walk HAS relationships before they are wiped. - delete(): call hardDeleteAdditionalChildren before cleanup() on the single-entity hard-delete path so direct dashboard hard-delete cascades charts the same way the bulk subtree walk does. - restoreEntityAsync: treat null return from restoreEntity as an idempotent success (entity was already restored by another request) rather than a failure; emit FAILED only when the entity was hard-deleted between pre-check and the async work. - bulkSoftDeleteSubtree: extracted hardDeleteAtLevelOnly and applyBulkSoftDelete to bring the method closer to the 15-line guideline. - delete.py: typo "entites" -> "entities" in delete_entity_by_name docstring. - EntityRepositoryRestoreTest Javadoc: note hook-invocation coverage. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../src/metadata/ingestion/api/delete.py | 2 +- .../service/jdbi3/EntityRepository.java | 70 +++++++++++-------- .../service/resources/EntityResource.java | 23 +++++- .../jdbi3/EntityRepositoryRestoreTest.java | 5 ++ 4 files changed, 67 insertions(+), 33 deletions(-) diff --git a/ingestion/src/metadata/ingestion/api/delete.py b/ingestion/src/metadata/ingestion/api/delete.py index f40f57bd99f6..a1b3ea7b5e4a 100644 --- a/ingestion/src/metadata/ingestion/api/delete.py +++ b/ingestion/src/metadata/ingestion/api/delete.py @@ -87,7 +87,7 @@ def delete_entity_by_name( dispatch_async: Optional[bool] = None, # noqa: UP045 ) -> Iterable[Either[DeleteEntity]]: """ - Method to delete the entites contained on a given list + Method to delete the entities contained on a given list :param metadata: OMeta client :param entity_type: Pydantic Entity model :param entity_names: List of FullyQualifiedNames of the entities to be deleted diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java index 3f6def57eb02..e822eddf3229 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java @@ -4266,6 +4266,11 @@ private DeleteResponse delete( // dashboard charts). softDeleteAdditionalChildren(original.getId(), deletedBy); } else { + // Run hook BEFORE cleanup(): cleanup() deletes this entity's relationship rows + // (including HAS), and subclass hooks like DashboardRepository.cascadeChartCleanup + // need to walk HAS to discover linked entities. Mirrors bulkHardDeleteSubtree + // ordering for direct-entity hard delete. + hardDeleteAdditionalChildren(original.getId(), deletedBy); cleanup(updated); changeType = ENTITY_DELETED; } @@ -5774,21 +5779,9 @@ public final void bulkSoftDeleteSubtree(List ids, String updatedBy) { return; } if (!supportsSoftDelete) { - // This type can't be soft-deleted, so each entity at this level must be hard - // deleted instead. Pass hardDelete=false through to the per-entity delete so - // descendant levels that *do* support soft delete remain soft-deleted — the - // per-entity flow handles the asymmetry by inspecting each level's own - // supportsSoftDelete flag. Using hardDelete=true here would propagate hard - // deletion to the entire subtree, breaking that contract. - for (UUID id : ids) { - Entity.deleteEntity(updatedBy, entityType, id, true, false); - } + hardDeleteAtLevelOnly(ids, updatedBy); return; } - // Load with ALL so we still walk children even when this level's parents are already - // soft-deleted — a descendant may have been restored independently and needs to be - // re-deleted as part of the parent's cascade. Matches the previous per-entity flow - // where deleteChildren ran before the parent's deleted state mattered. List allEntities = loadForBulk(ids, ALL, "bulkSoftDeleteLoad"); if (allEntities.isEmpty()) { return; @@ -5799,32 +5792,46 @@ public final void bulkSoftDeleteSubtree(List ids, String updatedBy) { checkSystemEntityDeletion(entity); preDelete(entity, updatedBy); } - dispatchToContainedChildren( allEntities, "bulkSoftDeleteFindChildren", (childRepo, childIds) -> childRepo.bulkSoftDeleteSubtree(childIds, updatedBy)); - - if (!entities.isEmpty()) { - List updaters = - buildBulkUpdaters( - entities, - updatedBy, - Operation.SOFT_DELETE, - "bulkSoftDeleteUpdaters", - e -> e.setDeleted(true)); - List changed = filterChanged(updaters); - if (!changed.isEmpty()) { - persistBulkUpdaters(changed, ENTITY_SOFT_DELETED, updatedBy, "bulkSoftDelete"); - ListCountCache.invalidate(entityType); - } - } + applyBulkSoftDelete(entities, updatedBy); // Always run per-entity hooks even when nothing at THIS level needed flipping — // descendants restored independently before the cascade still need to be re-deleted // by the per-entity hook. runSoftDeleteAdditionalChildren(allEntities, updatedBy); } + // This type can't be soft-deleted, so each entity at this level must be hard + // deleted instead. Pass hardDelete=false through to the per-entity delete so + // descendant levels that *do* support soft delete remain soft-deleted — the + // per-entity flow handles the asymmetry by inspecting each level's own + // supportsSoftDelete flag. + private void hardDeleteAtLevelOnly(List ids, String updatedBy) { + for (UUID id : ids) { + Entity.deleteEntity(updatedBy, entityType, id, true, false); + } + } + + private void applyBulkSoftDelete(List entities, String updatedBy) { + if (entities.isEmpty()) { + return; + } + List updaters = + buildBulkUpdaters( + entities, + updatedBy, + Operation.SOFT_DELETE, + "bulkSoftDeleteUpdaters", + e -> e.setDeleted(true)); + List changed = filterChanged(updaters); + if (!changed.isEmpty()) { + persistBulkUpdaters(changed, ENTITY_SOFT_DELETED, updatedBy, "bulkSoftDelete"); + ListCountCache.invalidate(entityType); + } + } + private void runSoftDeleteAdditionalChildren(List entities, String updatedBy) { for (T entity : entities) { softDeleteAdditionalChildren(entity.getId(), updatedBy); @@ -5888,10 +5895,13 @@ public final void bulkHardDeleteSubtree(List ids, String updatedBy) { "bulkHardDeleteFindChildren", (childRepo, childIds) -> childRepo.bulkHardDeleteSubtree(childIds, updatedBy)); bulkEntitySpecificCleanup(entities); + // Run BEFORE bulkCleanupReferences: hooks like DashboardRepository.cascadeChartCleanup + // walk HAS relationships to discover linked entities, and bulkCleanupReferences wipes + // those relationship rows. + runHardDeleteAdditionalChildren(entities, updatedBy); bulkCleanupReferences(entities); bulkDeleteEntityRows(entities); bulkInvalidate(entities); - runHardDeleteAdditionalChildren(entities, updatedBy); // Each cascade-deleted descendant needs the same postDelete hook the per-entity hard-delete // path runs (RdfUpdater.deleteEntity, plus subclass overrides like // UserRepository.deleteSuggestionTasksForUser). The legacy small-batch path went through diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java index f238c884186d..571d460b08d7 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java @@ -873,8 +873,11 @@ public Response restoreEntityAsync(UriInfo uriInfo, SecurityContext securityCont try { PutResponse response = repository.restoreEntity(userName, id); if (response == null) { - WebsocketNotificationHandler.sendRestoreOperationFailedNotification( - jobId, notifyUserId, entityName, "Entity is not in deleted state"); + // Pre-check saw the entity in DELETED state; a null response now means a + // concurrent restore won the race. Treat as idempotent success — the + // operator's request is satisfied. If the entity has since been hard- + // deleted, surface that as a real failure. + handleAlreadyRestored(jobId, id, entityName, notifyUserId); return; } repository.restoreFromSearch(response.getEntity()); @@ -904,6 +907,22 @@ public Response restoreEntityAsync(UriInfo uriInfo, SecurityContext securityCont return Response.accepted().entity(response).type(MediaType.APPLICATION_JSON).build(); } + private void handleAlreadyRestored(String jobId, UUID id, String entityName, UUID notifyUserId) { + try { + T restored = repository.find(id, Include.NON_DELETED); + LOG.info( + "[AsyncRestore] {} {} was already restored by another request (jobId={})", + entityType, + id, + jobId); + WebsocketNotificationHandler.sendRestoreOperationCompleteNotification( + jobId, notifyUserId, restored); + } catch (EntityNotFoundException missing) { + WebsocketNotificationHandler.sendRestoreOperationFailedNotification( + jobId, notifyUserId, entityName, "Entity was hard-deleted before restore"); + } + } + public Response exportCsvInternalAsync( SecurityContext securityContext, String name, boolean recursive) { OperationContext operationContext = diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java index 335900e0861e..ccd41f777ee5 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java @@ -60,6 +60,11 @@ *
  • All three bulk methods issue a single batched {@code findToBatchAllTypes} per tree * level that walks both {@code CONTAINS} and {@code PARENT_OF} so Glossary / Team / * recursive-Container descendants stop silently slipping past the cascade. + *
  • The per-entity {@code *AdditionalChildren} hooks fire even on the "entities present + * but none need flipping" branch (so a re-entered cascade can reconcile HAS-related + * descendants), and {@code hardDeleteAdditionalChildren} + {@code + * bulkEntitySpecificCleanup} fire on the full bulk hard-delete path with the expected + * per-entity / per-batch counts. * * * The full bulk DB-write path (version history, updateMany, change events, entity row From 46fc43f591d12f8f36058f319767972f42c27f5c Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Sat, 16 May 2026 22:21:48 -0700 Subject: [PATCH 22/38] fix(delete): populate relation fields before postDelete in bulk hard delete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bulkHardDeleteSubtree loads entities via dao.findEntitiesByIds without going through setFieldsInternal, so subclass postDelete overrides that read relation fields (e.g., TestCaseRepository.updateTestSuite reading testCase.getTestSuite()) saw null and NPE'd. The legacy Entity.deleteEntity path always called setFieldsInternal first, so the bulk replacement must too. - EntityRepository.bulkHardDeleteSubtree: call setFieldsInBulk(putFields, entities) up front (with per-entity fallback if the batch helper fails) so hooks see the same entity shape they did under the legacy cascade. - TestCaseRepository.updateTestSuite: defensive null-check on both testCase.getTestSuite() and the loaded TestSuite — guards against any future path that calls postDelete on a less-than-fully-loaded entity. Fixes TestCaseResourceIT.test_deleteTableDeletesTestCases (NPE on TestSuite.getConnection) and the Python TestSuite teardown failures that share the same root cause. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../service/jdbi3/EntityRepository.java | 29 +++++++++++++++---- .../service/jdbi3/TestCaseRepository.java | 6 ++++ 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java index e822eddf3229..8ab5489b709f 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java @@ -5886,6 +5886,12 @@ public final void bulkHardDeleteSubtree(List ids, String updatedBy) { if (entities.isEmpty()) { return; } + // Populate relation fields up front so the same subclass hooks the legacy + // Entity.deleteEntity path called against a fully-loaded entity (e.g., + // TestCaseRepository.updateTestSuite reading testCase.getTestSuite()) see the + // expected shape. bulkCleanupReferences wipes these relationship rows later, so + // hooks running after that point must remain null-safe. + populateRelationFields(entities); for (T entity : entities) { checkSystemEntityDeletion(entity); preDelete(entity, updatedBy); @@ -5902,16 +5908,29 @@ public final void bulkHardDeleteSubtree(List ids, String updatedBy) { bulkCleanupReferences(entities); bulkDeleteEntityRows(entities); bulkInvalidate(entities); - // Each cascade-deleted descendant needs the same postDelete hook the per-entity hard-delete - // path runs (RdfUpdater.deleteEntity, plus subclass overrides like - // UserRepository.deleteSuggestionTasksForUser). The legacy small-batch path went through - // Entity.deleteEntity which invoked postDelete; this bulk replacement is the only path - // that walks cascaded children now, so it must also fan out the hook explicitly. for (T entity : entities) { postDelete(entity, true); } } + private void populateRelationFields(List entities) { + try { + setFieldsInBulk(putFields, entities); + } catch (Exception e) { + LOG.debug( + "Bulk field population failed during bulk hard delete for {}, falling back per-entity: {}", + entityType, + e.getMessage()); + for (T entity : entities) { + try { + setFieldsInternal(entity, putFields); + } catch (Exception ignored) { + // postDelete subclass overrides must remain null-safe for cascade-deleted parents. + } + } + } + } + private void bulkCleanupReferences(List entities) { List entityIds = new ArrayList<>(entities.size()); List entityIdStrings = new ArrayList<>(entities.size()); diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TestCaseRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TestCaseRepository.java index 813ce837ea31..a76290b7fb80 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TestCaseRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TestCaseRepository.java @@ -877,8 +877,14 @@ public void clearParentCache() { } private void updateTestSuite(TestCase testCase) { + if (testCase.getTestSuite() == null) { + return; + } var testSuiteRepository = (TestSuiteRepository) Entity.getEntityRepository(Entity.TEST_SUITE); TestSuite testSuite = Entity.getEntity(testCase.getTestSuite(), "*", ALL); + if (testSuite == null) { + return; + } var original = TestSuiteRepository.copyTestSuite(testSuite); testSuiteRepository.postUpdate(original, testSuite); } From 2755b8f4f6e2f7dc2b77d8a8cf4e0c719680b0e5 Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Sun, 17 May 2026 08:19:49 -0700 Subject: [PATCH 23/38] fix(delete): mark NotFoundCache for bulk-hard-deleted entities MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirror the existing markEntityNotFound() call from cleanup() in the bulk hard-delete path. Without this, the bulk path invalidates Guava L1 / Redis but doesn't insert a NotFound marker, so a concurrent reader can re-populate the cache from the still-visible DB row mid-transaction. The next find / findByName then returns a stale "found" entity even though the row has been hard-deleted. This was breaking Python integration test teardowns: the conftest probes metadata.get_by_name(DatabaseService, fqn) before issuing the DELETE, the probe hits Redis and returns the stale cached service, the DELETE then returns 404 because the row is actually gone — and the retry loop blows up the test. The same shape was visible across multiple shard-2 datalake tests sharing one databaseService fixture and the shard-1 postgres / mysql test_data_quality teardowns. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../org/openmetadata/service/jdbi3/EntityRepository.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java index 8ab5489b709f..2450100641ac 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java @@ -5975,6 +5975,13 @@ private void bulkDeleteEntityRows(List entities) { private void bulkInvalidate(List entities) { for (T entity : entities) { invalidate(entity); + // Mirror cleanup()'s NotFoundCache marker so a concurrent reader that re-populates + // L1/Redis between bulkDeleteEntityRows and the next invalidate doesn't keep + // returning a stale "found" entity. Without this the next get_by_name/find against + // the same id or FQN can still hit the cache and return a deleted entity, which + // breaks fixture teardown (DELETE returns 404 because the row is gone but Redis + // still hands out the entity to the get_by_name probe). + markEntityNotFound(entity); } } From 15b8b709e1ccf65d7308e275065a56cda53c06c1 Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Sun, 17 May 2026 08:28:58 -0700 Subject: [PATCH 24/38] fix(testCase): drop unreachable null check, wrap getEntity in if-block MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gitar-bot flagged the testSuite==null check after Entity.getEntity as unreachable — Entity.getEntity throws EntityNotFoundException for missing entities rather than returning null, so the branch could never fire. - Removed the dead null check. - Replaced the early-return on testCase.getTestSuite()==null with an if (testCase.getTestSuite() != null) wrap to match the project's preferred style. - Catch EntityNotFoundException so postDelete still completes when the parent TestSuite was already hard-deleted as part of the same cascade. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../service/jdbi3/TestCaseRepository.java | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TestCaseRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TestCaseRepository.java index a76290b7fb80..e9dd6842bd49 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TestCaseRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TestCaseRepository.java @@ -877,16 +877,17 @@ public void clearParentCache() { } private void updateTestSuite(TestCase testCase) { - if (testCase.getTestSuite() == null) { - return; - } - var testSuiteRepository = (TestSuiteRepository) Entity.getEntityRepository(Entity.TEST_SUITE); - TestSuite testSuite = Entity.getEntity(testCase.getTestSuite(), "*", ALL); - if (testSuite == null) { - return; + if (testCase.getTestSuite() != null) { + try { + var testSuiteRepository = + (TestSuiteRepository) Entity.getEntityRepository(Entity.TEST_SUITE); + TestSuite testSuite = Entity.getEntity(testCase.getTestSuite(), "*", ALL); + var original = TestSuiteRepository.copyTestSuite(testSuite); + testSuiteRepository.postUpdate(original, testSuite); + } catch (EntityNotFoundException ignored) { + // TestSuite already deleted as part of the same cascade — nothing to update. + } } - var original = TestSuiteRepository.copyTestSuite(testSuite); - testSuiteRepository.postUpdate(original, testSuite); } private void updateLogicalTestSuite(UUID testSuiteId) { From 67f21d29df82485b16a6fee193ab110687e5d762 Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Mon, 18 May 2026 08:47:00 -0700 Subject: [PATCH 25/38] test(conftest): treat 404 as success in _safe_delete teardown helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fixture teardown for class/module/package-scoped services in the ingestion integration tests races with cascade deletions: when one test's teardown wipes a shared DatabaseService, the next test's teardown probes get_by_name (which can briefly serve from cache while the row is gone) and then issues DELETE that returns 404. _safe_delete was treating that 404 as a transient error and retrying 3× before raising, failing the test even though the entity was already gone — the exact state we wanted. Treat 404 as the desired terminal state. Real transient errors (5xx, timeouts, connection resets) still retry as before. Affected pre-existing failures: - tests/integration/postgres/test_data_quality.py::test_incompatible_column_type - tests/integration/mysql/test_data_quality.py::test_column_test_cases - shard-2 datalake teardowns that share one db_service fixture Co-Authored-By: Claude Opus 4.7 (1M context) --- ingestion/tests/integration/conftest.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/ingestion/tests/integration/conftest.py b/ingestion/tests/integration/conftest.py index 81a045a58107..4e1e3fb82c66 100644 --- a/ingestion/tests/integration/conftest.py +++ b/ingestion/tests/integration/conftest.py @@ -166,12 +166,19 @@ def _run(workflow_type: Type[IngestionWorkflow], config, raise_from_status=True) def _safe_delete(metadata, entity, entity_id, retries=3, **kwargs): - """Delete with retry logic to handle transient server errors during parallel teardown.""" + """Delete with retry logic to handle transient server errors during parallel teardown. + + A 404 here means the entity is already gone (e.g., wiped as part of an earlier + cascade or another worker's teardown); treat it as success rather than retrying. + """ for attempt in range(retries): try: metadata.delete(entity=entity, entity_id=entity_id, **kwargs) return # noqa: TRY300 - except Exception: + except Exception as exc: # noqa: BLE001 + if _is_not_found(exc): + logger.debug("Skipping %s %s delete — already gone", entity.__name__, entity_id) + return if attempt < retries - 1: logger.warning( "Retry %d/%d: delete %s %s", @@ -185,6 +192,13 @@ def _safe_delete(metadata, entity, entity_id, retries=3, **kwargs): raise +def _is_not_found(exc: BaseException) -> bool: + status = getattr(getattr(exc, "response", None), "status_code", None) + if status == 404: + return True + return "404" in str(exc) + + @pytest.fixture(scope="module") def db_service(metadata, create_service_request, unmask_password): service_entity = metadata.create_or_update(data=create_service_request) From 9a27caa21c2123402093db68230f0dc9171b7379 Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Mon, 18 May 2026 10:10:27 -0700 Subject: [PATCH 26/38] test(conftest): route datalake + sdk teardown through _safe_delete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix two follow-up CI issues from the previous _safe_delete change: 1. py_format_check failed on an unused `# noqa: BLE001` directive — drop it. 2. The shard-2 datalake + sdk fixture teardowns still called metadata.delete directly (not _safe_delete), so the 404-on-stale-cache pattern still blew up those tests. Route both through _safe_delete using the same `from ..conftest import _safe_delete # noqa: TID252` pattern the ometa tests already use. Verified with `make py_format_check` — all 2236 files clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- ingestion/tests/integration/conftest.py | 2 +- ingestion/tests/integration/datalake/conftest.py | 9 ++++++++- ingestion/tests/integration/sdk/conftest.py | 10 +++++++++- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/ingestion/tests/integration/conftest.py b/ingestion/tests/integration/conftest.py index 4e1e3fb82c66..e9daf2fd45cb 100644 --- a/ingestion/tests/integration/conftest.py +++ b/ingestion/tests/integration/conftest.py @@ -175,7 +175,7 @@ def _safe_delete(metadata, entity, entity_id, retries=3, **kwargs): try: metadata.delete(entity=entity, entity_id=entity_id, **kwargs) return # noqa: TRY300 - except Exception as exc: # noqa: BLE001 + except Exception as exc: if _is_not_found(exc): logger.debug("Skipping %s %s delete — already gone", entity.__name__, entity_id) return diff --git a/ingestion/tests/integration/datalake/conftest.py b/ingestion/tests/integration/datalake/conftest.py index c5af9afa1db6..68d73f4a8f57 100644 --- a/ingestion/tests/integration/datalake/conftest.py +++ b/ingestion/tests/integration/datalake/conftest.py @@ -28,6 +28,7 @@ from metadata.workflow.metadata import MetadataWorkflow from metadata.workflow.profiler import ProfilerWorkflow +from ..conftest import _safe_delete # noqa: TID252 from ..containers import MinioContainerConfigs, get_minio_container # noqa: TID252 from ..integration_base import generate_name # noqa: TID252 @@ -207,7 +208,13 @@ def run_ingestion(metadata, ingestion_config, datalake_service_name): yield db_service = metadata.get_by_name(entity=DatabaseService, fqn=datalake_service_name) if db_service: - metadata.delete(DatabaseService, db_service.id, recursive=True, hard_delete=True) + _safe_delete( + metadata, + entity=DatabaseService, + entity_id=db_service.id, + recursive=True, + hard_delete=True, + ) @pytest.fixture(scope="class") diff --git a/ingestion/tests/integration/sdk/conftest.py b/ingestion/tests/integration/sdk/conftest.py index 8136dd3090f8..de9374ad2a42 100644 --- a/ingestion/tests/integration/sdk/conftest.py +++ b/ingestion/tests/integration/sdk/conftest.py @@ -34,6 +34,8 @@ from metadata.ingestion.ometa.ometa_api import OpenMetadata from metadata.workflow.metadata import MetadataWorkflow +from ..conftest import _safe_delete # noqa: TID252 + @pytest.fixture(scope="module") def metadata(): @@ -69,7 +71,13 @@ def db_service(metadata, create_postgres_service, postgres_container): # noqa: service = metadata.get_by_name(DatabaseService, service_entity.fullyQualifiedName.root) if service: - metadata.delete(DatabaseService, service.id, recursive=True, hard_delete=True) + _safe_delete( + metadata, + entity=DatabaseService, + entity_id=service.id, + recursive=True, + hard_delete=True, + ) @pytest.fixture(scope="module") From a4ab2b236aa54a83cd654455fbc27f0e1494fb70 Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Mon, 18 May 2026 11:27:39 -0700 Subject: [PATCH 27/38] fix(restore): walk CONTAINS + PARENT_OF in restoreChildren for cascade symmetry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Copilot reviewer flagged that restoreChildren only queried Relationship.CONTAINS while deleteChildren and the bulk subtree walkers query CONTAINS + PARENT_OF. That asymmetry meant entities whose hierarchy is expressed via PARENT_OF (Team → Team, KnowledgePage → KnowledgePage, Classification → Tag, Domain → DataProduct) could be recursively soft-deleted but not recursively restored when restoring the parent. Switch the per-entity restore cascade to the same relation set so a hierarchy restored top-down comes back in the same shape it was cascaded down. Tests updated to stub the List findTo variant; class Javadoc reflects the new relation set. EntityRepositoryRestoreTest 13/13 pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../service/jdbi3/EntityRepository.java | 11 ++++++++++- .../jdbi3/EntityRepositoryRestoreTest.java | 18 +++++++++++------- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java index 2450100641ac..3524515e94bc 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java @@ -5612,8 +5612,17 @@ public final PutResponse restoreEntity(String updatedBy, UUID id) { @Transaction protected void restoreChildren(UUID id, String updatedBy) { + // Walk CONTAINS + PARENT_OF so the restore cascade is symmetric with deleteChildren + // and the bulk subtree walkers — Team → Team, KnowledgePage → KnowledgePage, + // Classification → Tag etc. express their hierarchy via PARENT_OF, and a CONTAINS-only + // probe would skip them on restore even though delete already cascades through them. List records = - daoCollection.relationshipDAO().findTo(id, entityType, Relationship.CONTAINS.ordinal()); + daoCollection + .relationshipDAO() + .findTo( + id, + entityType, + List.of(Relationship.CONTAINS.ordinal(), Relationship.PARENT_OF.ordinal())); if (records.isEmpty()) { return; } diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java index ccd41f777ee5..6b368ed810d2 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/EntityRepositoryRestoreTest.java @@ -14,7 +14,6 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.mockito.ArgumentMatchers.anyInt; import static org.mockito.ArgumentMatchers.anyList; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.CALLS_REAL_METHODS; @@ -49,9 +48,12 @@ * up the full bulk write path: * *
      - *
    • {@link EntityRepository#restoreChildren(UUID, String)} groups CONTAINS children by - * entity type and dispatches a single {@link EntityRepository#bulkRestoreSubtree(List, - * String)} call per type (instead of N recursive {@code Entity.restoreEntity} calls). + *
    • {@link EntityRepository#restoreChildren(UUID, String)} groups CONTAINS + PARENT_OF + * children by entity type and dispatches a single {@link + * EntityRepository#bulkRestoreSubtree(List, String)} call per type (instead of N + * recursive {@code Entity.restoreEntity} calls). The relation set matches + * {@code deleteChildren} so a Team / KnowledgePage / Classification hierarchy is + * restored the same way it was cascade-soft-deleted. *
    • {@link EntityRepository#deleteChildren(List, boolean, String)} with * {@code hardDelete=false} dispatches one {@link EntityRepository#bulkSoftDeleteSubtree( * List, String)} call per type and with {@code hardDelete=true} dispatches one @@ -150,11 +152,12 @@ void tearDown() { void restoreChildren_withNoChildren_isNoOp() { CountingPipelineRepo repo = new CountingPipelineRepo(pipelineDAO); UUID parentId = UUID.randomUUID(); - when(relationshipDAO.findTo(eq(parentId), eq(Entity.PIPELINE), anyInt())).thenReturn(List.of()); + when(relationshipDAO.findTo(eq(parentId), eq(Entity.PIPELINE), eq(SUBTREE_RELATIONS))) + .thenReturn(List.of()); repo.restoreChildren(parentId, "user"); - verify(relationshipDAO).findTo(eq(parentId), eq(Entity.PIPELINE), anyInt()); + verify(relationshipDAO).findTo(eq(parentId), eq(Entity.PIPELINE), eq(SUBTREE_RELATIONS)); assertEquals(0, repo.restoreAdditionalChildrenCalls); } @@ -171,7 +174,8 @@ void restoreChildren_groupsByTypeAndDispatchesOnceEach() { children.add(record(schemaA, Entity.DATABASE_SCHEMA)); children.add(record(schemaB, Entity.DATABASE_SCHEMA)); children.add(record(procA, Entity.STORED_PROCEDURE)); - when(relationshipDAO.findTo(eq(parentId), eq(Entity.PIPELINE), anyInt())).thenReturn(children); + when(relationshipDAO.findTo(eq(parentId), eq(Entity.PIPELINE), eq(SUBTREE_RELATIONS))) + .thenReturn(children); EntityRepository schemaRepo = mock(EntityRepository.class); EntityRepository procRepo = mock(EntityRepository.class); From e6af95b319da59405237e03afe2409c48bc9fce4 Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Mon, 18 May 2026 21:47:19 -0700 Subject: [PATCH 28/38] perf(feed): batch deleteByAbout thread cleanup through IN + deleteThreadsInBatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mohityadav766 flagged that the new deleteByAbout(List) overload was a fake batch — it just looped over the single-entity deleteByAbout(UUID), which itself looped over deleteThreadInternal(threadId) one thread at a time. So a DatabaseService with N descendants × M threads each would still produce N×M individual DELETEs against thread_entity / entity_relationship / field_relationship. Switch both overloads to a single batched path: - Add CollectionDAO.FeedDAO.findByEntityIds(tableName, List) so we pull every thread id for the whole input batch in one IN-list query. - Reuse the existing deleteThreadsInBatch(List) helper (which already fires one bulk DELETE per table — thread_entity, entity_relationship, field_relationship) instead of looping deleteThreadInternal. - Collapse the single-entity overload into deleteByAbout(List.of(id)) so both callers go through the same code. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../service/jdbi3/CollectionDAO.java | 4 +++ .../service/jdbi3/FeedRepository.java | 36 ++++++++----------- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/CollectionDAO.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/CollectionDAO.java index 349760cfbd11..d53300071002 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/CollectionDAO.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/CollectionDAO.java @@ -4125,6 +4125,10 @@ List findByEntityId( @SqlQuery("select id from thread_entity where entityId = :entityId") List findByEntityId(@Bind("entityId") String entityId); + @SqlQuery("select id from where entityId IN ()") + List findByEntityIds( + @Define("tableName") String tableName, @BindList("entityIds") List entityIds); + @ConnectionAwareSqlUpdate( value = "UPDATE SET json = JSON_SET(json, '$.about', :newEntityLink)\n" diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/FeedRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/FeedRepository.java index 71248e1320db..ac37848dad92 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/FeedRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/FeedRepository.java @@ -777,41 +777,35 @@ public int deleteThreadsInBatch(List threadUUIDs) { } public void deleteByAbout(UUID entityId) { + deleteByAbout(List.of(entityId)); + } + + public void deleteByAbout(List entityIds) { + if (entityIds == null || entityIds.isEmpty()) { + return; + } if (!isLegacyThreadStorageAvailable()) { LOG.debug( - "Skipping legacy feed cleanup for entity {} because thread storage is unavailable", - entityId); + "Skipping legacy feed cleanup for {} entities because thread storage is unavailable", + entityIds.size()); return; } - + List entityIdStrings = entityIds.stream().map(UUID::toString).toList(); List threadIds; try { threadIds = - listOrEmpty( - dao.feedDAO().findByEntityId(getLegacyThreadTableName(), entityId.toString())); + listOrEmpty(dao.feedDAO().findByEntityIds(getLegacyThreadTableName(), entityIdStrings)); } catch (Exception ex) { LOG.debug( - "Skipping legacy feed cleanup for entity {} because thread storage is unavailable", - entityId, + "Skipping legacy feed cleanup for {} entities because thread storage is unavailable", + entityIds.size(), ex); return; } - for (String threadId : threadIds) { - try { - deleteThreadInternal(UUID.fromString(threadId)); - } catch (Exception ex) { - // Continue deletion - } - } - } - - public void deleteByAbout(List entityIds) { - if (entityIds == null || entityIds.isEmpty()) { + if (threadIds.isEmpty()) { return; } - for (UUID entityId : entityIds) { - deleteByAbout(entityId); - } + deleteThreadsInBatch(threadIds.stream().map(UUID::fromString).toList()); } private boolean isLegacyThreadStorageAvailable() { From 37bff3c3f5b2f7931d978b064531438a0ff01f5f Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Mon, 18 May 2026 22:16:34 -0700 Subject: [PATCH 29/38] fix(feed): chunk deleteByAbout IN-list expansions to stay under DB param limits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewer flagged that the new findByEntityIds + deleteThreadsInBatch path passes the full id list straight into an IN-clause, where JDBI's @BindList expands every element into its own bind parameter. For the 12k-descendant hierarchies this PR specifically targets, that blows past SQL Server's ~2100-parameter ceiling and bloats MySQL's max_allowed_packet budget. Chunk both paths to 500 ids per query (matches the existing EntityRepository.RELATION_DELETE_BATCH_SIZE used for the same reason on the relationship side): - deleteByAbout(List) walks entityIds in 500-id chunks against findByEntityIds, accumulates thread ids across chunks, then hands them to deleteThreadsInBatch. - deleteThreadsInBatch(List) walks threadIds in 500-id chunks against deleteAllByThreadIds / deleteAllByPrefixes / deleteByIds and sums the per-chunk delete counts. A per-chunk find failure no longer aborts the whole cleanup — log it and move on, matching the original "skip if thread storage is unavailable" contract. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../service/jdbi3/FeedRepository.java | 45 ++++++++++++------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/FeedRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/FeedRepository.java index ac37848dad92..c15ca70e5a91 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/FeedRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/FeedRepository.java @@ -760,20 +760,29 @@ public void deleteThreadInternal(UUID id) { dao.feedDAO().delete(getLegacyThreadTableName(), id); } + // Keep IN-list expansions well under SQL Server's 2100 bind-parameter ceiling and + // MySQL's max_allowed_packet budget. Mirrors EntityRepository.RELATION_DELETE_BATCH_SIZE. + private static final int FEED_IN_BATCH_SIZE = 500; + @Transaction public int deleteThreadsInBatch(List threadUUIDs) { if (CommonUtil.nullOrEmpty(threadUUIDs)) return 0; List threadIds = threadUUIDs.stream().map(UUID::toString).toList(); + int deleted = 0; + for (int i = 0; i < threadIds.size(); i += FEED_IN_BATCH_SIZE) { + List chunk = threadIds.subList(i, Math.min(i + FEED_IN_BATCH_SIZE, threadIds.size())); - // Delete all the relationships to other entities - dao.relationshipDAO().deleteAllByThreadIds(threadIds, Entity.THREAD); + // Delete all the relationships to other entities + dao.relationshipDAO().deleteAllByThreadIds(chunk, Entity.THREAD); - // Delete all the field relationships to other entities - dao.fieldRelationshipDAO().deleteAllByPrefixes(threadIds); + // Delete all the field relationships to other entities + dao.fieldRelationshipDAO().deleteAllByPrefixes(chunk); - // Delete the thread and return the count - return dao.feedDAO().deleteByIds(getLegacyThreadTableName(), threadIds); + // Delete the threads in this chunk and tally the count + deleted += dao.feedDAO().deleteByIds(getLegacyThreadTableName(), chunk); + } + return deleted; } public void deleteByAbout(UUID entityId) { @@ -791,16 +800,20 @@ public void deleteByAbout(List entityIds) { return; } List entityIdStrings = entityIds.stream().map(UUID::toString).toList(); - List threadIds; - try { - threadIds = - listOrEmpty(dao.feedDAO().findByEntityIds(getLegacyThreadTableName(), entityIdStrings)); - } catch (Exception ex) { - LOG.debug( - "Skipping legacy feed cleanup for {} entities because thread storage is unavailable", - entityIds.size(), - ex); - return; + List threadIds = new ArrayList<>(); + for (int i = 0; i < entityIdStrings.size(); i += FEED_IN_BATCH_SIZE) { + List chunk = + entityIdStrings.subList(i, Math.min(i + FEED_IN_BATCH_SIZE, entityIdStrings.size())); + try { + threadIds.addAll( + listOrEmpty(dao.feedDAO().findByEntityIds(getLegacyThreadTableName(), chunk))); + } catch (Exception ex) { + LOG.debug( + "Skipping legacy feed cleanup for chunk of {} entities (offset {}) because thread storage is unavailable", + chunk.size(), + i, + ex); + } } if (threadIds.isEmpty()) { return; From d99e9ec20992dc04d5c3014ce098402cdce28875 Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Mon, 18 May 2026 22:24:15 -0700 Subject: [PATCH 30/38] fix(feed): restore best-effort semantics around batched thread cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Copilot reviewer flagged that the deleteByAbout(List) refactor lost the per-thread try/catch that previously made legacy feed cleanup best-effort. A malformed thread id (UUID.fromString throws IllegalArgumentException) or a DAO/runtime failure inside deleteThreadsInBatch would now propagate up the caller's hard-delete @Transaction and roll the whole cascade back — even though the old per-thread loop deliberately swallowed those so legacy feed issues never blocked entity deletion. Re-add both guards: - Parse thread ids defensively: skip + LOG.warn on each malformed id rather than throwing IllegalArgumentException out of the cleanup. - Wrap deleteThreadsInBatch in try/catch so a bulk DELETE failure logs at warn level and lets the parent hard-delete commit. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../service/jdbi3/FeedRepository.java | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/FeedRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/FeedRepository.java index c15ca70e5a91..20b1bab8e385 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/FeedRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/FeedRepository.java @@ -818,7 +818,28 @@ public void deleteByAbout(List entityIds) { if (threadIds.isEmpty()) { return; } - deleteThreadsInBatch(threadIds.stream().map(UUID::fromString).toList()); + // Keep legacy feed cleanup best-effort: a malformed thread id or a DAO failure + // here must not blow up the caller's hard-delete @Transaction. Parse defensively + // (skip + log malformed ids) and swallow batch-delete failures. + List threadUuids = new ArrayList<>(threadIds.size()); + for (String threadId : threadIds) { + try { + threadUuids.add(UUID.fromString(threadId)); + } catch (IllegalArgumentException ex) { + LOG.warn("Skipping malformed legacy thread id {} during feed cleanup", threadId); + } + } + if (threadUuids.isEmpty()) { + return; + } + try { + deleteThreadsInBatch(threadUuids); + } catch (Exception ex) { + LOG.warn( + "Legacy feed cleanup failed for {} threads; continuing entity delete", + threadUuids.size(), + ex); + } } private boolean isLegacyThreadStorageAvailable() { From 6baa6a90eab608dafad609535c497a00ec130484 Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Mon, 18 May 2026 23:20:23 -0700 Subject: [PATCH 31/38] fix(entity-dao): chunk deleteByIds IN-list to match findEntitiesByIds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Copilot reviewer flagged EntityDAO.deleteByIds(List) as an unbounded IN-clause: JDBI's @BindList expands every id into a separate bind parameter, and the bulk hard-delete path now walks 12k+ entity hierarchies, so a single DELETE statement would blow past SQL Server's ~2100-parameter ceiling and MySQL's max_allowed_packet budget. Mirror the findEntitiesByIds chunking already used in this file — 30k ids per chunk, short-circuit when the list fits in one chunk, sum per-chunk delete counts otherwise. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../org/openmetadata/service/jdbi3/EntityDAO.java | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityDAO.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityDAO.java index 5b57f5682bdb..065d7a09d577 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityDAO.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityDAO.java @@ -616,7 +616,20 @@ default int deleteByIds(List ids) { return 0; } List stringIds = ids.stream().map(UUID::toString).toList(); - return deleteByIds(getTableName(), stringIds); + // Chunk to match findEntitiesByIds — JDBI's @BindList expands every id into a + // separate bind parameter, and the bulk hard-delete walks 12k+ entity hierarchies, + // so the IN-list would otherwise blow past SQL Server's ~2100-parameter ceiling + // and MySQL's max_allowed_packet budget on a single statement. + int maxChunkSize = 30000; + if (stringIds.size() <= maxChunkSize) { + return deleteByIds(getTableName(), stringIds); + } + int deleted = 0; + for (int i = 0; i < stringIds.size(); i += maxChunkSize) { + List chunk = stringIds.subList(i, Math.min(i + maxChunkSize, stringIds.size())); + deleted += deleteByIds(getTableName(), chunk); + } + return deleted; } @ConnectionAwareSqlUpdate(value = "ANALYZE TABLE
  • ", connectionType = MYSQL) From 8766a909bd6a4ff67996ba10fbe37a281a7732f9 Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Tue, 19 May 2026 08:17:48 -0700 Subject: [PATCH 32/38] fix(dao): hoist IN-list chunk size + escape backslashes in updateFqn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address two Copilot quality flags: 1. The 30k IN-list chunk size was duplicated as a magic number in four EntityDAO methods (findReferencesByFqns, deleteByIds, findEntitiesByIds, findEntityByNames). Hoist to a single MAX_IN_LIST_CHUNK_SIZE constant on the interface so all four stay in sync if the limit needs tuning. 2. EntityDAO.updateFqn interpolated user-supplied prefixes into raw SQL with only apostrophe + double-quote escaping. MySQL's default mode treats `\` as a string-literal escape char, and Postgres does too when standard_conforming_strings is off, so an unescaped `\n` in newPrefix would be parsed as a newline before the REGEXP_REPLACE / REPLACE ever saw it. Name validation currently forbids these characters, but defense-in-depth is cheap. Added ListFilter.escapeBackslashAndApostrophe (backslash first, then apostrophe — order matters, otherwise the `\\` we just inserted gets re-doubled). escape() now composes through this helper, so the existing LIKE-aware path also picks up backslash escaping. updateFqn's MySQL replacement string and both Postgres prefixes route through the new helper before any further (LIKE / double-quote) escape pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openmetadata/service/jdbi3/EntityDAO.java | 40 +++++++++++++------ .../service/jdbi3/ListFilter.java | 13 +++++- 2 files changed, 40 insertions(+), 13 deletions(-) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityDAO.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityDAO.java index 065d7a09d577..c12a752ca3ff 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityDAO.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityDAO.java @@ -15,7 +15,7 @@ import static org.openmetadata.service.exception.CatalogExceptionMessage.entityNotFound; import static org.openmetadata.service.jdbi3.ListFilter.escape; -import static org.openmetadata.service.jdbi3.ListFilter.escapeApostrophe; +import static org.openmetadata.service.jdbi3.ListFilter.escapeBackslashAndApostrophe; import static org.openmetadata.service.jdbi3.locator.ConnectionType.MYSQL; import static org.openmetadata.service.jdbi3.locator.ConnectionType.POSTGRES; @@ -56,6 +56,17 @@ public interface EntityDAO { org.slf4j.Logger LOG = org.slf4j.LoggerFactory.getLogger(EntityDAO.class); + /** + * Maximum number of values expanded into a single SQL IN-list. JDBI's {@code @BindList} + * produces one bind parameter per element, and the bulk hard-delete + bulk lookup paths + * walk 12k+ entity hierarchies — past SQL Server's ~2100-parameter ceiling and MySQL's + * {@code max_allowed_packet} budget. Callers that may exceed this size must chunk their + * input lists; helpers in this interface ({@link #findEntitiesByIds}, + * {@link #findEntityByNames}, {@link #findReferencesByFqns}, {@link #deleteByIds}) already + * do. + */ + int MAX_IN_LIST_CHUNK_SIZE = 30_000; + /** Methods that need to be overridden by interfaces extending this */ String getTableName(); @@ -272,7 +283,7 @@ default List findReferencesByFqns(List entityFQNs, Incl } List nameHashes = entityFQNs.stream().distinct().map(FullyQualifiedName::buildHash).toList(); - int maxChunkSize = 30000; + int maxChunkSize = MAX_IN_LIST_CHUNK_SIZE; if (nameHashes.size() <= maxChunkSize) { return findReferenceRows(nameHashes, include).stream() .map(row -> row.toEntityReference(Entity.getEntityTypeFromClass(getEntityClass()))) @@ -325,6 +336,10 @@ default void updateFqn(String oldPrefix, String newPrefix) { if (!getNameHashColumn().equals("fqnHash")) { return; } + // escape() handles the regex source pattern (backslash + apostrophe + LIKE underscore). + // For the regex replacement, we still need backslash escaping (MySQL's default mode + // treats {@code \} as a string-literal escape char, so a stray "\n" in newPrefix would + // be parsed as a newline before REGEXP_REPLACE ever sees it). String mySqlUpdate = String.format( "UPDATE %s SET json = " @@ -333,11 +348,16 @@ default void updateFqn(String oldPrefix, String newPrefix) { + "WHERE fqnHash LIKE '%s.%%'", getTableName(), escape(oldPrefix), - escapeApostrophe(newPrefix), + escapeBackslashAndApostrophe(newPrefix), FullyQualifiedName.buildHash(oldPrefix), FullyQualifiedName.buildHash(newPrefix), FullyQualifiedName.buildHash(oldPrefix)); + // Postgres path embeds the prefixes inside a double-quoted JSON pattern, so escape + // backslashes and apostrophes first (so a literal "\\" or "''" isn't reparsed by the + // SQL string-literal layer), then escape double-quotes so the JSON-pattern delimiter + // can't be broken out of. Apostrophe escaping is still required because the JSON + // pattern itself sits inside a single-quoted SQL string literal. String postgresUpdate = String.format( "UPDATE %s SET json = " @@ -346,8 +366,8 @@ default void updateFqn(String oldPrefix, String newPrefix) { + ", fqnHash = REPLACE(fqnHash, '%s.', '%s.') " + "WHERE fqnHash LIKE '%s.%%'", getTableName(), - ReindexingUtil.escapeDoubleQuotes(escapeApostrophe(oldPrefix)), - ReindexingUtil.escapeDoubleQuotes(escapeApostrophe(newPrefix)), + ReindexingUtil.escapeDoubleQuotes(escapeBackslashAndApostrophe(oldPrefix)), + ReindexingUtil.escapeDoubleQuotes(escapeBackslashAndApostrophe(newPrefix)), FullyQualifiedName.buildHash(oldPrefix), FullyQualifiedName.buildHash(newPrefix), FullyQualifiedName.buildHash(oldPrefix)); @@ -616,11 +636,7 @@ default int deleteByIds(List ids) { return 0; } List stringIds = ids.stream().map(UUID::toString).toList(); - // Chunk to match findEntitiesByIds — JDBI's @BindList expands every id into a - // separate bind parameter, and the bulk hard-delete walks 12k+ entity hierarchies, - // so the IN-list would otherwise blow past SQL Server's ~2100-parameter ceiling - // and MySQL's max_allowed_packet budget on a single statement. - int maxChunkSize = 30000; + int maxChunkSize = MAX_IN_LIST_CHUNK_SIZE; if (stringIds.size() <= maxChunkSize) { return deleteByIds(getTableName(), stringIds); } @@ -718,7 +734,7 @@ default List findEntitiesByIds(List ids, Include include) { } List distinctIds = ids.stream().map(UUID::toString).distinct().toList(); - int maxChunkSize = 30000; + int maxChunkSize = MAX_IN_LIST_CHUNK_SIZE; if (distinctIds.size() <= maxChunkSize) { return findByIds(getTableName(), distinctIds, getCondition(include)).stream() @@ -763,7 +779,7 @@ default List findEntityByNames(List entityFQNs, Include include) { } List names = entityFQNs.stream().distinct().map(FullyQualifiedName::buildHash).toList(); - int maxChunkSize = 30000; + int maxChunkSize = MAX_IN_LIST_CHUNK_SIZE; if (names.size() <= maxChunkSize) { return findByNames(getTableName(), getNameHashColumn(), names, getCondition(include)).stream() diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/ListFilter.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/ListFilter.java index b7e21d374e50..218921c298ce 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/ListFilter.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/ListFilter.java @@ -977,10 +977,21 @@ public static String escapeApostrophe(String name) { return name.replace("'", "''"); } + /** + * Defence-in-depth: when a value is embedded inside a single-quoted SQL string literal, + * escape backslashes before apostrophes (MySQL treats {@code \} as a string-literal escape + * by default, and Postgres does too when {@code standard_conforming_strings = off}). Run + * this BEFORE {@link #escapeApostrophe} so the {@code \\} we just inserted isn't itself + * re-doubled. + */ + public static String escapeBackslashAndApostrophe(String name) { + return escapeApostrophe(name.replace("\\", "\\\\")); + } + public static String escape(String name) { // Escape string to be using in LIKE clause // "'" is used for indicated start and end of the string. Use "''" to escape it. - name = escapeApostrophe(name); + name = escapeBackslashAndApostrophe(name); // "_" is a wildcard and looks for any single character. Add "\\" in front of it to escape it return name.replaceAll("_", "\\\\_"); } From fe7dfbc1b3a89fe760c3bbbaba7175df75247baa Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Tue, 19 May 2026 08:47:06 -0700 Subject: [PATCH 33/38] fix(bulk-update): hydrate HAS-relationship fields with Include.ALL before bulk restore/soft-delete updaters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Copilot reviewer flagged that bulkRestoreSubtree (and by extension bulkSoftDeleteSubtree) builds EntityUpdaters from entities loaded via raw storage JSON (loadForBulk -> dao.findEntitiesByIds -> no setFields call). Entities whose updater rewrites relationships unconditionally — e.g. DashboardRepository.DashboardUpdater.entitySpecificUpdate which does deleteFrom(... HAS ...) + addRelationship for every charts/dataModels entry — would diff a null original.charts against a null updated.charts, fire the compareAndUpdate lambda (shouldCompare returns true for PUT/SOFT_DELETE with patchedFields=null), wipe the HAS rows, and re-add nothing. Net: every chart link disappears as a side effect of restoring or soft-deleting the parent — well before restoreAdditionalChildren / softDeleteAdditionalChildren walk the relation set. This is exactly the failure mode the single-entity restoreEntity already documents and guards against by calling setFieldsInternal(original, putFields, ALL) before building its updater. The bulk replacement needs the same contract. - New helper hydrateRelationsForBulkUpdater(List) loops setFieldsInternal with Include.ALL. Per-entity rather than setFieldsInBulk because the bulk fetchers (e.g. DashboardRepository.batchFetchCharts) hard-code NON_DELETED when batching HAS lookups, which would still hide cascade-deleted charts from the restore path. Restore batches are typically one subtree level, so the extra DB hits are acceptable. - Hooked into bulkRestoreSubtree right before buildBulkUpdaters, and into applyBulkSoftDelete with the same rationale. EntityRepositoryRestoreTest 13/13, AsyncServiceTest 11/11 still pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../service/jdbi3/EntityRepository.java | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java index 21cc2217864a..d78258cb908f 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java @@ -5683,6 +5683,18 @@ public final void bulkRestoreSubtree(List ids, String updatedBy) { List deletedEntities = entities.stream().filter(e -> Boolean.TRUE.equals(e.getDeleted())).toList(); if (!deletedEntities.isEmpty()) { + // Hydrate relationship fields with Include.ALL before the PUT updater diff runs. + // loadForBulk returned only the storage JSON, so HAS-style children + // (e.g., dashboard.charts, dashboard.dataModels) are null on the parsed entity. + // The PUT updater's compareAndUpdate("charts", ...) fires unconditionally and the + // update(...) lambda does deleteFrom(... HAS ...) followed by re-adding from + // updated.getCharts() — if updated.getCharts() is null/empty, every HAS row is + // wiped before the restoreAdditionalChildren hook ever runs to restore them. + // Using Include.ALL ensures the cascade-deleted charts/dataModels are visible to + // both sides of the diff so the relationships round-trip cleanly. Matches the + // single-entity restoreEntity contract (see the comment at the find/setFields call + // earlier in this file). + hydrateRelationsForBulkUpdater(deletedEntities); List updaters = buildBulkUpdaters(deletedEntities, updatedBy, Operation.PUT, "bulkRestoreUpdaters", null); List changed = filterChanged(updaters); @@ -5827,6 +5839,14 @@ private void applyBulkSoftDelete(List entities, String updatedBy) { if (entities.isEmpty()) { return; } + // Same reason as hydrateRelationsForBulkUpdater — buildBulkUpdaters uses bare JSON, and a + // PUT-style updater (e.g. DashboardUpdater.entitySpecificUpdate) calls + // deleteFrom(... HAS ...) then re-adds from updated.getCharts(). Without hydration + // both lists are empty and the soft-delete wipes the HAS rows that softDeleteAdditional- + // Children later needs to walk. Include.ALL handles both shapes: charts that are still + // live (parent soft-deleted in isolation) and charts already cascade-soft-deleted + // (parent soft-deleted as part of a wider sweep). + hydrateRelationsForBulkUpdater(entities); List updaters = buildBulkUpdaters( entities, @@ -5940,6 +5960,27 @@ private void populateRelationFields(List entities) { } } + /** + * Per-entity hydration with {@link Include#ALL} for the bulk restore path. The bulk + * {@link #setFieldsInBulk} variant hard-codes {@code NON_DELETED} when batch-fetching + * relationship references (see {@code DashboardRepository.batchFetchCharts}), so a + * cascade-deleted chart wouldn't show up in {@code dashboard.charts} — exactly the + * scenario where we need it to. Falling back to per-entity {@link #setFieldsInternal} + * routes through the subclass's {@code setFields(entity, fields, relationIncludes)} which + * honours the include passed in. Restore batches are typically small (single subtree + * level), so the extra DB round-trips are acceptable for the correctness this buys. + */ + private void hydrateRelationsForBulkUpdater(List entities) { + for (T entity : entities) { + try { + setFieldsInternal(entity, putFields, ALL); + } catch (Exception ignored) { + // Best-effort: if hydration fails on a single entity, the PUT updater may wipe its + // HAS rows — restoreAdditionalChildren will still attempt to put them back. + } + } + } + private void bulkCleanupReferences(List entities) { List entityIds = new ArrayList<>(entities.size()); List entityIdStrings = new ArrayList<>(entities.size()); From 6eaae5b46d43ecd5f0bc3c31cae0afff6eb2b92f Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Tue, 19 May 2026 09:07:33 -0700 Subject: [PATCH 34/38] review: address remaining open review threads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - EntityRepository.hydrateRelationsForBulkUpdater: log hydration failures at warn level with entity id/type instead of swallowing them silently, so any subsequent HAS-row wipe is correlatable in production logs (gitar-bot empty-catch flag). - EntityDAO.MAX_IN_LIST_CHUNK_SIZE Javadoc: drop the misleading SQL Server reference (not a supported connection type — its 2100-parameter ceiling would need a much smaller constant) and document the MySQL / PostgreSQL constraints that actually apply to this codebase (Copilot doc accuracy flag). - FeedRepository.FEED_IN_BATCH_SIZE comment: same — replace SQL Server hedging with MySQL/PostgreSQL framing and call out why the per-statement budget is tighter than the EntityDAO constant (three IN-list statements per chunk). - EntityResource.restoreEntity null branch: drop the redundant find(id, ALL) probe — EntityRepository.restoreEntity now calls find(id, ALL) up front and throws EntityNotFoundException for missing ids, so a null response can only mean "exists but not deleted" → 400. Updated the comment to match (Copilot stale-comment flag). EntityRepositoryRestoreTest 13/13, AsyncServiceTest 11/11 still pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openmetadata/service/jdbi3/EntityDAO.java | 17 +++++++++----- .../service/jdbi3/EntityRepository.java | 13 ++++++++--- .../service/jdbi3/FeedRepository.java | 8 +++++-- .../service/resources/EntityResource.java | 23 +++++-------------- 4 files changed, 33 insertions(+), 28 deletions(-) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityDAO.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityDAO.java index c12a752ca3ff..62de6f2187be 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityDAO.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityDAO.java @@ -58,12 +58,17 @@ public interface EntityDAO { /** * Maximum number of values expanded into a single SQL IN-list. JDBI's {@code @BindList} - * produces one bind parameter per element, and the bulk hard-delete + bulk lookup paths - * walk 12k+ entity hierarchies — past SQL Server's ~2100-parameter ceiling and MySQL's - * {@code max_allowed_packet} budget. Callers that may exceed this size must chunk their - * input lists; helpers in this interface ({@link #findEntitiesByIds}, - * {@link #findEntityByNames}, {@link #findReferencesByFqns}, {@link #deleteByIds}) already - * do. + * produces one bind parameter per element. OpenMetadata supports MySQL and PostgreSQL — + * PostgreSQL's protocol caps each statement at 65535 bind parameters + * (the {@code int2}-size {@code numParams} field), and MySQL's {@code max_allowed_packet} + * caps total statement size. 30k UUID/hash strings stays comfortably under both: each + * UUID is ~36 chars, so an IN-list of this size is ~1MB on the wire (well below the 64MB + * MySQL default) and still leaves headroom for Postgres's parameter ceiling. Callers that + * may exceed this size must chunk their input lists; helpers in this interface + * ({@link #findEntitiesByIds}, {@link #findEntityByNames}, {@link #findReferencesByFqns}, + * {@link #deleteByIds}) already do. (SQL Server isn't a supported connection type here — + * its ~2100 sp_executesql cap would require a separate, much smaller constant if it ever + * is.) */ int MAX_IN_LIST_CHUNK_SIZE = 30_000; diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java index d78258cb908f..c4cbdef27cc8 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java @@ -5974,9 +5974,16 @@ private void hydrateRelationsForBulkUpdater(List entities) { for (T entity : entities) { try { setFieldsInternal(entity, putFields, ALL); - } catch (Exception ignored) { - // Best-effort: if hydration fails on a single entity, the PUT updater may wipe its - // HAS rows — restoreAdditionalChildren will still attempt to put them back. + } catch (Exception ex) { + // Best-effort: if hydration fails on a single entity the PUT updater may wipe its + // HAS rows. restoreAdditionalChildren will still attempt to put them back, but log + // so operators can correlate any missing-relationship reports with hydration noise + // rather than digging through change-event history. + LOG.warn( + "Hydration failed for {} {}; HAS rows may be wiped before restore hook runs", + entityType, + entity.getId(), + ex); } } } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/FeedRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/FeedRepository.java index 20b1bab8e385..7764fa7c21c6 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/FeedRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/FeedRepository.java @@ -760,8 +760,12 @@ public void deleteThreadInternal(UUID id) { dao.feedDAO().delete(getLegacyThreadTableName(), id); } - // Keep IN-list expansions well under SQL Server's 2100 bind-parameter ceiling and - // MySQL's max_allowed_packet budget. Mirrors EntityRepository.RELATION_DELETE_BATCH_SIZE. + // Keep IN-list expansions well under MySQL's max_allowed_packet budget and within + // PostgreSQL's bind-parameter ceiling. 500 also matches the existing + // EntityRepository.RELATION_DELETE_BATCH_SIZE used for the same reason on the + // relationship side. Smaller than EntityDAO.MAX_IN_LIST_CHUNK_SIZE because the + // feed cleanup path issues three IN-list statements per chunk (relationships, + // field_relationship, thread_entity) and each has its own packet/parameter budget. private static final int FEED_IN_BATCH_SIZE = 500; @Transaction diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java index 571d460b08d7..57f62d757e51 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/EntityResource.java @@ -793,23 +793,12 @@ public Response restoreEntity( PutResponse response = repository.restoreEntity(securityContext.getUserPrincipal().getName(), id); if (response == null) { - // EntityRepository.restoreEntity returns null when find(id, DELETED) throws — - // either the entity doesn't exist at all (→ 404) or it exists but isn't deleted - // (→ 400). Probe with Include.ALL to tell them apart. The try block deliberately - // ONLY traps EntityNotFoundException so unrelated failures (DB connectivity, auth, - // etc.) propagate as 500 rather than being mis-mapped to 400. - boolean entityExists; - try { - repository.find(id, Include.ALL); - entityExists = true; - } catch (EntityNotFoundException missing) { - entityExists = false; - } - if (entityExists) { - throw new BadRequestException( - String.format("Entity %s:%s is not in deleted state", entityType, id)); - } - throw new EntityNotFoundException(CatalogExceptionMessage.entityNotFound(entityType, id)); + // EntityRepository.restoreEntity now calls find(id, Include.ALL) up front, so a truly + // missing id has already propagated EntityNotFoundException (→ 404) before we got + // here. A null response can only mean "entity exists but is not in DELETED state" — + // map that to 400. + throw new BadRequestException( + String.format("Entity %s:%s is not in deleted state", entityType, id)); } repository.restoreFromSearch(response.getEntity()); addHref(uriInfo, response.getEntity()); From 4b78c7e196cd860f6c49510c155110a5499a01d8 Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Wed, 20 May 2026 10:25:46 -0700 Subject: [PATCH 35/38] fix(bulk-delete): fire deleteFromSearch per cascade-deleted descendant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bulkHardDeleteSubtree was walking the cascade, deleting DB rows, but never firing the per-entity Elasticsearch removal that the legacy Entity.deleteEntity path triggered via delete()'s top-level deleteFromSearch dispatch. The bulk replacement is the only path that walks cascaded children now, so a missing call leaves stale ES docs that survive the hard delete. Concrete symptom: Playwright Domains.spec.ts:533 ("Should clear assets from data products after deletion of data product in Domain") consistently failed on this branch. The test creates Domain + DataProduct(PW_DataProduct_Sales), recursively hard-deletes the Domain, then re-creates Domain + DataProduct with the same names. The UI search for PW_DataProduct_Sales then returned TWO rows: the new DataProduct from the re-create plus a stale ES doc for the "deleted" cascade child whose DB row was actually gone. The test died on selectDataProduct's strict-mode locator match. Add deleteFromSearch(entity, true) alongside postDelete inside the end-of-walk loop. Mirrors the per-entity contract — legacy delete() called postDelete then deleteFromSearch in that order, the bulk path now does the same per cascaded entity. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../org/openmetadata/service/jdbi3/EntityRepository.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java index c4cbdef27cc8..9a1647e66205 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityRepository.java @@ -5939,6 +5939,14 @@ public final void bulkHardDeleteSubtree(List ids, String updatedBy) { bulkInvalidate(entities); for (T entity : entities) { postDelete(entity, true); + // Fire deleteFromSearch per-entity so cascade-deleted descendants are removed from + // Elasticsearch. The legacy per-entity Entity.deleteEntity path invoked this via + // delete()'s top-level dispatch — this bulk replacement is the only path that walks + // cascaded children now, so a missing call leaves stale ES docs that surface as + // duplicate results (e.g. Playwright Domains.spec.ts:533 found two "PW_DataProduct_ + // Sales" rows after a recursive Domain hard-delete because the DB row was gone but + // the search-index doc lingered). + deleteFromSearch(entity, true); } } From f06b21952e2e28692c03feb77cb56992cfa3b4f1 Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Wed, 20 May 2026 11:59:11 -0700 Subject: [PATCH 36/38] fix(escape): two-layer escape for MySQL REGEXP_REPLACE replacement, plus regression tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Copilot reviewer flagged that EntityDAO.updateFqn's MySQL path used escapeBackslashAndApostrophe(newPrefix), which only covers the SQL string-literal layer. The value is then fed to REGEXP_REPLACE's replacement argument, which has its own escape semantics — `\X` is a backreference / escape sequence, so a backslash that survived the SQL layer gets re-interpreted by the regex engine. For an input like "foo\1bar", "\1" would be treated as a capture-group backreference rather than a literal "\1" in the rewritten FQN; for an input like "foo\bar", the regex engine would consume the backslash as the start of an undefined escape. Add ListFilter.escapeForMySqlRegexReplacement(s) that composes both layers: Step 1 — regex replacement: \ → \\ (regex engine emits literal \) Step 2 — SQL string-literal: \\ → \\\\, ' → '' Net: one input \ → four \ in the SQL statement text → two \ for the regex engine → one literal \ in the replacement output. Apostrophes only matter for the SQL layer, so they're left to step 2's existing single-pass doubling (composing escapeBackslashAndApostrophe twice would re-escape apostrophes — chose explicit two-step composition to avoid that pitfall). EntityDAO.updateFqn's MySQL replacement now routes through the new helper. The Postgres path stays on escapeBackslashAndApostrophe + escapeDoubleQuotes because PG embeds the value in a static REPLACE call, not a regex replacement — the regex layer doesn't apply there. Test coverage in ListFilterTest: - test_escapeBackslashAndApostrophe_* — existing primitive - test_escape_alsoDoublesBackslashesViaBackslashAndApostrophe — regression guard that escape() picks up the backslash hardening transparently - test_escapeForMySqlRegexReplacement_* — new helper, including a "\1 backreference look-alike" case to guard the original concern directly All 22 ListFilterTest cases pass. 46/46 across EntityRepositoryRestoreTest + AsyncServiceTest + ListFilterTest. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../openmetadata/service/jdbi3/EntityDAO.java | 17 +++-- .../service/jdbi3/ListFilter.java | 28 +++++++ .../service/jdbi3/ListFilterTest.java | 75 +++++++++++++++++++ 3 files changed, 115 insertions(+), 5 deletions(-) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityDAO.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityDAO.java index 62de6f2187be..287c3188f579 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityDAO.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/EntityDAO.java @@ -16,6 +16,7 @@ import static org.openmetadata.service.exception.CatalogExceptionMessage.entityNotFound; import static org.openmetadata.service.jdbi3.ListFilter.escape; import static org.openmetadata.service.jdbi3.ListFilter.escapeBackslashAndApostrophe; +import static org.openmetadata.service.jdbi3.ListFilter.escapeForMySqlRegexReplacement; import static org.openmetadata.service.jdbi3.locator.ConnectionType.MYSQL; import static org.openmetadata.service.jdbi3.locator.ConnectionType.POSTGRES; @@ -341,10 +342,16 @@ default void updateFqn(String oldPrefix, String newPrefix) { if (!getNameHashColumn().equals("fqnHash")) { return; } - // escape() handles the regex source pattern (backslash + apostrophe + LIKE underscore). - // For the regex replacement, we still need backslash escaping (MySQL's default mode - // treats {@code \} as a string-literal escape char, so a stray "\n" in newPrefix would - // be parsed as a newline before REGEXP_REPLACE ever sees it). + // The regex replacement argument to MySQL's REGEXP_REPLACE has its own escape layer + // on top of the SQL string-literal layer — `\1`/`\2` are backreferences, `\\` is a + // literal backslash. Using escapeBackslashAndApostrophe here would only escape for the + // SQL layer, leaving a stray backslash in newPrefix to be interpreted by the regex + // engine. escapeForMySqlRegexReplacement applies both layers (regex-replacement first, + // then SQL string-literal) so an input backslash round-trips to a single literal + // backslash in the replacement output. The source pattern goes through escape() which + // already covers the SQL + LIKE-underscore layers — the regex-pattern layer is + // tolerated here because OpenMetadata's name validation forbids the regex metas that + // would matter (\ . * ? + ^ $ ( ) [ ] { } |). String mySqlUpdate = String.format( "UPDATE %s SET json = " @@ -353,7 +360,7 @@ default void updateFqn(String oldPrefix, String newPrefix) { + "WHERE fqnHash LIKE '%s.%%'", getTableName(), escape(oldPrefix), - escapeBackslashAndApostrophe(newPrefix), + escapeForMySqlRegexReplacement(newPrefix), FullyQualifiedName.buildHash(oldPrefix), FullyQualifiedName.buildHash(newPrefix), FullyQualifiedName.buildHash(oldPrefix)); diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/ListFilter.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/ListFilter.java index 218921c298ce..c83ab69609ec 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/ListFilter.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/ListFilter.java @@ -988,6 +988,34 @@ public static String escapeBackslashAndApostrophe(String name) { return escapeApostrophe(name.replace("\\", "\\\\")); } + /** + * Escape a string for use as the replacement argument to MySQL's + * {@code REGEXP_REPLACE}. Two layers of escaping are needed: + *
      + *
    1. Regex replacement layer: {@code REGEXP_REPLACE} treats {@code \} as the start of a + * backreference / escape sequence (e.g. {@code \1} resolves to capture group 1). + * Each literal backslash in the input needs to become {@code \\} for the regex + * engine to emit a single {@code \}.
    2. + *
    3. SQL string-literal layer: the regex-escaped value is then embedded inside a + * single-quoted SQL string, so each remaining {@code \} doubles again + * ({@code \\} → {@code \\\\}) and apostrophes double ({@code '} → {@code ''}).
    4. + *
    + * Net effect: one input backslash → four backslashes in the SQL statement text, which + * the SQL parser folds to two backslashes for the regex engine, which the regex engine + * folds to one literal backslash in the replacement output. Apostrophes just double + * once (regex replacement doesn't reserve apostrophes, only the SQL layer does). + * + *

    Compose with {@link #escapeApostrophe} rather than {@link #escapeBackslashAndApostrophe} + * for the second pass — applying {@code escapeBackslashAndApostrophe} twice would + * re-escape the apostrophes we already doubled. + */ + public static String escapeForMySqlRegexReplacement(String name) { + // Step 1: double backslashes for the regex replacement layer. + String regexEscaped = name.replace("\\", "\\\\"); + // Step 2: double backslashes (again) + apostrophes for the SQL string-literal layer. + return escapeBackslashAndApostrophe(regexEscaped); + } + public static String escape(String name) { // Escape string to be using in LIKE clause // "'" is used for indicated start and end of the string. Use "''" to escape it. diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/ListFilterTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/ListFilterTest.java index 06d9d779dc35..50c1937401f6 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/ListFilterTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/jdbi3/ListFilterTest.java @@ -19,6 +19,81 @@ void test_escapeApostrophe() { assertEquals("a\\_b\\_c\\_d", ListFilter.escape("a_b_c_d")); } + @Test + void test_escapeBackslashAndApostrophe_passesThroughPlainStrings() { + assertEquals("abcd", ListFilter.escapeBackslashAndApostrophe("abcd")); + assertEquals("", ListFilter.escapeBackslashAndApostrophe("")); + } + + @Test + void test_escapeBackslashAndApostrophe_doublesApostrophes() { + // ' → '' for the SQL string-literal layer + assertEquals("a''b", ListFilter.escapeBackslashAndApostrophe("a'b")); + assertEquals("''", ListFilter.escapeBackslashAndApostrophe("'")); + } + + @Test + void test_escapeBackslashAndApostrophe_doublesBackslashesBeforeApostrophes() { + // \ → \\ for the SQL string-literal layer (MySQL default + Postgres legacy mode); + // backslash escape must run BEFORE apostrophe escape so the \\ we just inserted + // is not itself re-doubled by a subsequent pass. + assertEquals("a\\\\b", ListFilter.escapeBackslashAndApostrophe("a\\b")); + assertEquals("\\\\\\\\", ListFilter.escapeBackslashAndApostrophe("\\\\")); + assertEquals("a\\\\''b", ListFilter.escapeBackslashAndApostrophe("a\\'b")); + } + + @Test + void test_escape_alsoDoublesBackslashesViaBackslashAndApostrophe() { + // Regression guard: escape() composes through escapeBackslashAndApostrophe, so a + // literal backslash in the input must come out doubled (defence-in-depth against + // SQL string-literal escape interpretation, on top of the existing LIKE underscore + // escape). + assertEquals("a\\\\b", ListFilter.escape("a\\b")); + assertEquals("a\\\\b\\_c", ListFilter.escape("a\\b_c")); + } + + @Test + void test_escapeForMySqlRegexReplacement_passesThroughPlainStrings() { + assertEquals("abcd", ListFilter.escapeForMySqlRegexReplacement("abcd")); + assertEquals("", ListFilter.escapeForMySqlRegexReplacement("")); + } + + @Test + void test_escapeForMySqlRegexReplacement_doublesApostrophesOnce() { + // Apostrophes only matter for the SQL string-literal layer — REGEXP_REPLACE's + // replacement context doesn't reserve them. Expect a single ' → '' doubling. + assertEquals("a''b", ListFilter.escapeForMySqlRegexReplacement("a'b")); + } + + @Test + void test_escapeForMySqlRegexReplacement_quadruplesBackslashes() { + // One input backslash needs to round-trip to one literal backslash in the + // REGEXP_REPLACE output, so it must be FOUR backslashes in the emitted SQL text: + // SQL text : \\\\ (4 backslashes) + // SQL parser: \\ (2 backslashes — '\\' is the SQL string-literal escape for '\') + // regex eng : \ (1 backslash — '\\' in the regex replacement is a literal '\') + // Without the regex-replacement escape, the regex engine would interpret the lone + // remaining '\' as the start of an escape/backref sequence. + assertEquals("a\\\\\\\\b", ListFilter.escapeForMySqlRegexReplacement("a\\b")); + assertEquals("\\\\\\\\", ListFilter.escapeForMySqlRegexReplacement("\\")); + } + + @Test + void test_escapeForMySqlRegexReplacement_protectsBackreferenceLookalikes() { + // Without the extra regex-replacement layer, "\1" in the input would survive as "\1" + // in the regex replacement and be interpreted as a backreference to capture group 1 + // (REGEXP_REPLACE doesn't have groups when called like updateFqn does, but the + // behaviour is implementation-defined — usually empty-string substitution). After + // the double escape it survives as a literal "\1" in the output. + assertEquals("\\\\\\\\1bar", ListFilter.escapeForMySqlRegexReplacement("\\1bar")); + } + + @Test + void test_escapeForMySqlRegexReplacement_combinesBackslashAndApostrophe() { + // Backslashes get four-x'd, apostrophes double once. + assertEquals("a\\\\\\\\''b", ListFilter.escapeForMySqlRegexReplacement("a\\'b")); + } + @Test void addCondition() { String condition; From 1df217d26bf4926e6fd12f2632212ed7e15d4c6e Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Wed, 20 May 2026 12:30:09 -0700 Subject: [PATCH 37/38] feat(sdk-java): extend fluent restore() to all data-asset fluents via generic EntityRestorer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fluent restore API previously existed only on Tables and Databases. User flagged the gap — restore (sync + server-side async via PUT /restore?async=true) is supported by the server for every entity-type, and the Java SDK base service (EntityServiceBase.restore / restoreServerAsync) is already shared across all of them. The fluent layer was the only thing missing. Rather than copy-paste TableRestorer/AsyncTableRestorer for each entity, hoist the pattern into a single generic helper: - common/EntityRestorer — sync .execute() returns T, .async() switches mode - common/AsyncEntityRestorer — .execute() returns AsyncJobResponse Tables and Databases now wire the generic helper too (drops the entity-specific restorer pair on each — ~80 lines removed). restore() now lives on 26 additional fluent classes' Finder builders: AIApplications, AIGovernancePolicies, Charts, Classifications, Containers, DashboardDataModels, Dashboards, DataContracts, DataProducts, DatabaseSchemas, Domains, Glossaries, GlossaryTerms, LLMServices, McpServers, Metrics, MlModels, Pipelines, PromptTemplates, Queries, SearchIndexes, StoredProcedures, Tags, Teams, Topics, Users. Each is a 3-line wrap-and-delegate; future restore-capable entity-types are a one-method addition. Tests in RestoreFluentAPITest extended from 4 → 12: kept the original Tables / Databases sync+async pairs, added per-fluent route-through assertions for DatabaseSchemas, Dashboards, Pipelines, Topics, MlModels, Containers, Glossaries, Domains. They all go through the same EntityRestorer, so the representative sample is enough to catch a typo in any single fluent's wire-up without re-asserting the same plumbing 26 times. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../sdk/fluent/AIApplications.java | 5 + .../sdk/fluent/AIGovernancePolicies.java | 5 + .../org/openmetadata/sdk/fluent/Charts.java | 4 + .../sdk/fluent/Classifications.java | 5 + .../openmetadata/sdk/fluent/Containers.java | 5 + .../sdk/fluent/DashboardDataModels.java | 5 + .../openmetadata/sdk/fluent/Dashboards.java | 5 + .../sdk/fluent/DataContracts.java | 5 + .../openmetadata/sdk/fluent/DataProducts.java | 5 + .../sdk/fluent/DatabaseSchemas.java | 5 + .../openmetadata/sdk/fluent/Databases.java | 46 +---- .../org/openmetadata/sdk/fluent/Domains.java | 4 + .../openmetadata/sdk/fluent/Glossaries.java | 5 + .../sdk/fluent/GlossaryTerms.java | 5 + .../openmetadata/sdk/fluent/LLMServices.java | 5 + .../openmetadata/sdk/fluent/McpServers.java | 5 + .../org/openmetadata/sdk/fluent/Metrics.java | 4 + .../org/openmetadata/sdk/fluent/MlModels.java | 4 + .../openmetadata/sdk/fluent/Pipelines.java | 5 + .../sdk/fluent/PromptTemplates.java | 5 + .../org/openmetadata/sdk/fluent/Queries.java | 4 + .../sdk/fluent/SearchIndexes.java | 5 + .../sdk/fluent/StoredProcedures.java | 5 + .../org/openmetadata/sdk/fluent/Tables.java | 45 +---- .../org/openmetadata/sdk/fluent/Tags.java | 4 + .../org/openmetadata/sdk/fluent/Teams.java | 4 + .../org/openmetadata/sdk/fluent/Topics.java | 4 + .../org/openmetadata/sdk/fluent/Users.java | 4 + .../fluent/common/AsyncEntityRestorer.java | 39 +++++ .../sdk/fluent/common/EntityRestorer.java | 45 +++++ .../sdk/fluent/RestoreFluentAPITest.java | 161 ++++++++++++++++++ 31 files changed, 371 insertions(+), 86 deletions(-) create mode 100644 openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/common/AsyncEntityRestorer.java create mode 100644 openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/common/EntityRestorer.java diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/AIApplications.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/AIApplications.java index 620094ebdbbc..d783ad381740 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/AIApplications.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/AIApplications.java @@ -213,6 +213,11 @@ public AIApplication get() { public AIApplicationDeleter delete() { return new AIApplicationDeleter(client, identifier); } + + public org.openmetadata.sdk.fluent.common.EntityRestorer restore() { + return new org.openmetadata.sdk.fluent.common.EntityRestorer<>( + client.aiApplications(), identifier); + } } // ==================== Deleter ==================== diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/AIGovernancePolicies.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/AIGovernancePolicies.java index 678f941cdbc6..12fff1ca9902 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/AIGovernancePolicies.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/AIGovernancePolicies.java @@ -215,6 +215,11 @@ public AIGovernancePolicy get() { public AIGovernancePolicyDeleter delete() { return new AIGovernancePolicyDeleter(client, identifier); } + + public org.openmetadata.sdk.fluent.common.EntityRestorer restore() { + return new org.openmetadata.sdk.fluent.common.EntityRestorer<>( + client.aiGovernancePolicies(), identifier); + } } // ==================== Deleter ==================== diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Charts.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Charts.java index eeda8f4ad421..b726153be885 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Charts.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Charts.java @@ -177,6 +177,10 @@ public FluentChart fetch() { public ChartDeleter delete() { return new ChartDeleter(client, identifier); } + + public org.openmetadata.sdk.fluent.common.EntityRestorer restore() { + return new org.openmetadata.sdk.fluent.common.EntityRestorer<>(client.charts(), identifier); + } } // ==================== Deleter ==================== diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Classifications.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Classifications.java index 7e9ab3eb36b9..c07463ea9a3f 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Classifications.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Classifications.java @@ -175,6 +175,11 @@ public FluentClassification fetch() { public ClassificationDeleter delete() { return new ClassificationDeleter(client, identifier); } + + public org.openmetadata.sdk.fluent.common.EntityRestorer restore() { + return new org.openmetadata.sdk.fluent.common.EntityRestorer<>( + client.classifications(), identifier); + } } // ==================== Deleter ==================== diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Containers.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Containers.java index a2f847834a2c..ca35c0341ef6 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Containers.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Containers.java @@ -235,6 +235,11 @@ public FluentContainer fetch() { public ContainerDeleter delete() { return new ContainerDeleter(client, identifier); } + + public org.openmetadata.sdk.fluent.common.EntityRestorer restore() { + return new org.openmetadata.sdk.fluent.common.EntityRestorer<>( + client.containers(), identifier); + } } // ==================== Deleter ==================== diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/DashboardDataModels.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/DashboardDataModels.java index 8f109552a41a..3f2dfa65557b 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/DashboardDataModels.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/DashboardDataModels.java @@ -192,6 +192,11 @@ public FluentDashboardDataModel fetch() { public DashboardDataModelDeleter delete() { return new DashboardDataModelDeleter(client, identifier); } + + public org.openmetadata.sdk.fluent.common.EntityRestorer restore() { + return new org.openmetadata.sdk.fluent.common.EntityRestorer<>( + client.dashboardDataModels(), identifier); + } } // ==================== Deleter ==================== diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Dashboards.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Dashboards.java index 4af581aadef5..ed2585a61e31 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Dashboards.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Dashboards.java @@ -183,6 +183,11 @@ public FluentDashboard fetch() { public DashboardDeleter delete() { return new DashboardDeleter(client, identifier); } + + public org.openmetadata.sdk.fluent.common.EntityRestorer restore() { + return new org.openmetadata.sdk.fluent.common.EntityRestorer<>( + client.dashboards(), identifier); + } } // ==================== Deleter ==================== diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/DataContracts.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/DataContracts.java index be6a50955f36..a7b248437207 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/DataContracts.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/DataContracts.java @@ -261,6 +261,11 @@ public FluentDataContract fetch() { public DataContractDeleter delete() { return new DataContractDeleter(client, identifier); } + + public org.openmetadata.sdk.fluent.common.EntityRestorer restore() { + return new org.openmetadata.sdk.fluent.common.EntityRestorer<>( + client.dataContracts(), identifier); + } } // ==================== Contract Operations ==================== diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/DataProducts.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/DataProducts.java index 5b912ba37487..e932c745a613 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/DataProducts.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/DataProducts.java @@ -180,6 +180,11 @@ public FluentDataProduct fetch() { public DataProductDeleter delete() { return new DataProductDeleter(client, identifier); } + + public org.openmetadata.sdk.fluent.common.EntityRestorer restore() { + return new org.openmetadata.sdk.fluent.common.EntityRestorer<>( + client.dataProducts(), identifier); + } } // ==================== Deleter ==================== diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/DatabaseSchemas.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/DatabaseSchemas.java index bb8e03771e59..948b6786ba37 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/DatabaseSchemas.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/DatabaseSchemas.java @@ -248,6 +248,11 @@ public FluentDatabaseSchema fetch() { public DatabaseSchemaDeleter delete() { return new DatabaseSchemaDeleter(client, identifier); } + + public org.openmetadata.sdk.fluent.common.EntityRestorer restore() { + return new org.openmetadata.sdk.fluent.common.EntityRestorer<>( + client.databaseSchemas(), identifier); + } } // ==================== Deleter ==================== diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Databases.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Databases.java index b73132e0a7e2..91b6a3046563 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Databases.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Databases.java @@ -247,8 +247,9 @@ public DatabaseDeleter delete() { return new DatabaseDeleter(client, identifier); } - public DatabaseRestorer restore() { - return new DatabaseRestorer(client, identifier); + public org.openmetadata.sdk.fluent.common.EntityRestorer restore() { + return new org.openmetadata.sdk.fluent.common.EntityRestorer<>( + client.databases(), identifier); } } @@ -283,47 +284,6 @@ public void confirm() { } } - // ==================== Restorer ==================== - - /** - * Fluent restore builder. {@link #execute()} runs the synchronous restore and returns the - * restored {@link Database}. Switching to {@link #async()} returns an - * {@link AsyncDatabaseRestorer} whose {@code execute()} triggers the server-side async - * path and returns an {@link org.openmetadata.sdk.models.AsyncJobResponse} with a job id — - * use this for services with thousands of schemas / tables (issue #4003). - */ - public static class DatabaseRestorer { - private final OpenMetadataClient client; - private final String id; - - public DatabaseRestorer(OpenMetadataClient client, String id) { - this.client = client; - this.id = id; - } - - public AsyncDatabaseRestorer async() { - return new AsyncDatabaseRestorer(client, id); - } - - public Database execute() { - return client.databases().restore(id); - } - } - - public static class AsyncDatabaseRestorer { - private final OpenMetadataClient client; - private final String id; - - public AsyncDatabaseRestorer(OpenMetadataClient client, String id) { - this.client = client; - this.id = id; - } - - public org.openmetadata.sdk.models.AsyncJobResponse execute() { - return client.databases().restoreServerAsync(id); - } - } - // ==================== Lister ==================== public static class DatabaseLister { diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Domains.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Domains.java index b5d3d76404a7..b28d61dc20e6 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Domains.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Domains.java @@ -223,6 +223,10 @@ public FluentDomain fetch() { public DomainDeleter delete() { return new DomainDeleter(client, identifier); } + + public org.openmetadata.sdk.fluent.common.EntityRestorer restore() { + return new org.openmetadata.sdk.fluent.common.EntityRestorer<>(client.domains(), identifier); + } } // ==================== Deleter ==================== diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Glossaries.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Glossaries.java index ffac0694634a..6e0868e15e50 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Glossaries.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Glossaries.java @@ -193,6 +193,11 @@ public FluentGlossary fetch() { public GlossaryDeleter delete() { return new GlossaryDeleter(client, identifier); } + + public org.openmetadata.sdk.fluent.common.EntityRestorer restore() { + return new org.openmetadata.sdk.fluent.common.EntityRestorer<>( + client.glossaries(), identifier); + } } // ==================== Deleter ==================== diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/GlossaryTerms.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/GlossaryTerms.java index 6370aa78eb60..fc31477a9a23 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/GlossaryTerms.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/GlossaryTerms.java @@ -227,6 +227,11 @@ public FluentGlossaryTerm fetch() { public GlossaryTermDeleter delete() { return new GlossaryTermDeleter(client, identifier); } + + public org.openmetadata.sdk.fluent.common.EntityRestorer restore() { + return new org.openmetadata.sdk.fluent.common.EntityRestorer<>( + client.glossaryTerms(), identifier); + } } // ==================== Deleter ==================== diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/LLMServices.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/LLMServices.java index f8b0cebc895f..25efdd7beaea 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/LLMServices.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/LLMServices.java @@ -214,6 +214,11 @@ public LLMService get() { public LLMServiceDeleter delete() { return new LLMServiceDeleter(client, identifier); } + + public org.openmetadata.sdk.fluent.common.EntityRestorer restore() { + return new org.openmetadata.sdk.fluent.common.EntityRestorer<>( + client.llmServices(), identifier); + } } // ==================== Deleter ==================== diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/McpServers.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/McpServers.java index f3b88c399ef8..dd982f606c6c 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/McpServers.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/McpServers.java @@ -290,6 +290,11 @@ public McpServer get() { public McpServerDeleter delete() { return new McpServerDeleter(client, identifier); } + + public org.openmetadata.sdk.fluent.common.EntityRestorer restore() { + return new org.openmetadata.sdk.fluent.common.EntityRestorer<>( + client.mcpServers(), identifier); + } } // ==================== Deleter ==================== diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Metrics.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Metrics.java index 36f96fbb7fd8..6b2ed1bd09cf 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Metrics.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Metrics.java @@ -182,6 +182,10 @@ public FluentMetric fetch() { public MetricDeleter delete() { return new MetricDeleter(client, identifier); } + + public org.openmetadata.sdk.fluent.common.EntityRestorer restore() { + return new org.openmetadata.sdk.fluent.common.EntityRestorer<>(client.metrics(), identifier); + } } // ==================== Deleter ==================== diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/MlModels.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/MlModels.java index 263270d9dc32..6dd864075f68 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/MlModels.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/MlModels.java @@ -178,6 +178,10 @@ public FluentMlModel fetch() { public MlModelDeleter delete() { return new MlModelDeleter(client, identifier); } + + public org.openmetadata.sdk.fluent.common.EntityRestorer restore() { + return new org.openmetadata.sdk.fluent.common.EntityRestorer<>(client.mlModels(), identifier); + } } // ==================== Deleter ==================== diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Pipelines.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Pipelines.java index 8097fa8cc4d3..fe8d55502bc0 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Pipelines.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Pipelines.java @@ -253,6 +253,11 @@ public FluentPipeline fetch() { public PipelineDeleter delete() { return new PipelineDeleter(client, identifier); } + + public org.openmetadata.sdk.fluent.common.EntityRestorer restore() { + return new org.openmetadata.sdk.fluent.common.EntityRestorer<>( + client.pipelines(), identifier); + } } // ==================== Deleter ==================== diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/PromptTemplates.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/PromptTemplates.java index 7af80a23323f..4670a75b23e0 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/PromptTemplates.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/PromptTemplates.java @@ -207,6 +207,11 @@ public PromptTemplate get() { public PromptTemplateDeleter delete() { return new PromptTemplateDeleter(client, identifier); } + + public org.openmetadata.sdk.fluent.common.EntityRestorer restore() { + return new org.openmetadata.sdk.fluent.common.EntityRestorer<>( + client.promptTemplates(), identifier); + } } // ==================== Deleter ==================== diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Queries.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Queries.java index 0b300bd7f4b9..35f464944b53 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Queries.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Queries.java @@ -177,6 +177,10 @@ public FluentQuery fetch() { public QueryDeleter delete() { return new QueryDeleter(client, identifier); } + + public org.openmetadata.sdk.fluent.common.EntityRestorer restore() { + return new org.openmetadata.sdk.fluent.common.EntityRestorer<>(client.queries(), identifier); + } } // ==================== Deleter ==================== diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/SearchIndexes.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/SearchIndexes.java index ec810d5cdfd5..c37e07fd57c3 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/SearchIndexes.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/SearchIndexes.java @@ -180,6 +180,11 @@ public FluentSearchIndex fetch() { public SearchIndexDeleter delete() { return new SearchIndexDeleter(client, identifier); } + + public org.openmetadata.sdk.fluent.common.EntityRestorer restore() { + return new org.openmetadata.sdk.fluent.common.EntityRestorer<>( + client.searchIndexes(), identifier); + } } // ==================== Deleter ==================== diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/StoredProcedures.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/StoredProcedures.java index 8ef24a22f032..d808adb73960 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/StoredProcedures.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/StoredProcedures.java @@ -190,6 +190,11 @@ public FluentStoredProcedure fetch() { public StoredProcedureDeleter delete() { return new StoredProcedureDeleter(client, identifier); } + + public org.openmetadata.sdk.fluent.common.EntityRestorer restore() { + return new org.openmetadata.sdk.fluent.common.EntityRestorer<>( + client.storedProcedures(), identifier); + } } // ==================== Deleter ==================== diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Tables.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Tables.java index d38111c1ef77..f63d4c86bbbc 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Tables.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Tables.java @@ -313,8 +313,8 @@ public TableDeleter delete() { return new TableDeleter(client, identifier); } - public TableRestorer restore() { - return new TableRestorer(client, identifier); + public org.openmetadata.sdk.fluent.common.EntityRestorer

    restore() { + return new org.openmetadata.sdk.fluent.common.EntityRestorer<>(client.tables(), identifier); } } @@ -377,47 +377,6 @@ public void confirm() { } } - // ==================== Restorer ==================== - - /** - * Fluent restore builder. {@link #execute()} runs the synchronous restore and returns the - * restored {@link Table}. Switching to {@link #async()} returns an - * {@link AsyncTableRestorer} whose {@code execute()} triggers the server-side async path - * and returns an {@link org.openmetadata.sdk.models.AsyncJobResponse} with a job id (issue - * #4003). - */ - public static class TableRestorer { - private final OpenMetadataClient client; - private final String id; - - public TableRestorer(OpenMetadataClient client, String id) { - this.client = client; - this.id = id; - } - - public AsyncTableRestorer async() { - return new AsyncTableRestorer(client, id); - } - - public Table execute() { - return client.tables().restore(id); - } - } - - public static class AsyncTableRestorer { - private final OpenMetadataClient client; - private final String id; - - public AsyncTableRestorer(OpenMetadataClient client, String id) { - this.client = client; - this.id = id; - } - - public org.openmetadata.sdk.models.AsyncJobResponse execute() { - return client.tables().restoreServerAsync(id); - } - } - // ==================== Lister ==================== public static class TableLister { diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Tags.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Tags.java index dd0ac56e8142..5bdd15b2aca8 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Tags.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Tags.java @@ -177,6 +177,10 @@ public FluentTag fetch() { public TagDeleter delete() { return new TagDeleter(client, identifier); } + + public org.openmetadata.sdk.fluent.common.EntityRestorer restore() { + return new org.openmetadata.sdk.fluent.common.EntityRestorer<>(client.tags(), identifier); + } } // ==================== Deleter ==================== diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Teams.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Teams.java index 95e7de4cee63..43db1de97327 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Teams.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Teams.java @@ -233,6 +233,10 @@ public FluentTeam fetch() { public TeamDeleter delete() { return new TeamDeleter(client, identifier); } + + public org.openmetadata.sdk.fluent.common.EntityRestorer restore() { + return new org.openmetadata.sdk.fluent.common.EntityRestorer<>(client.teams(), identifier); + } } // ==================== Deleter ==================== diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Topics.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Topics.java index f31328e0b714..09bfe323eed2 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Topics.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Topics.java @@ -238,6 +238,10 @@ public FluentTopic fetch() { public TopicDeleter delete() { return new TopicDeleter(client, identifier); } + + public org.openmetadata.sdk.fluent.common.EntityRestorer restore() { + return new org.openmetadata.sdk.fluent.common.EntityRestorer<>(client.topics(), identifier); + } } // ==================== Deleter ==================== diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Users.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Users.java index 35d54df0bb27..0c9c49fab84b 100644 --- a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Users.java +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/Users.java @@ -267,6 +267,10 @@ public FluentUser fetch() { public UserDeleter delete() { return new UserDeleter(client, identifier); } + + public org.openmetadata.sdk.fluent.common.EntityRestorer restore() { + return new org.openmetadata.sdk.fluent.common.EntityRestorer<>(client.users(), identifier); + } } // ==================== Deleter ==================== diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/common/AsyncEntityRestorer.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/common/AsyncEntityRestorer.java new file mode 100644 index 000000000000..6419d7a38b92 --- /dev/null +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/common/AsyncEntityRestorer.java @@ -0,0 +1,39 @@ +/* + * Copyright 2026 Collate + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.openmetadata.sdk.fluent.common; + +import org.openmetadata.sdk.models.AsyncJobResponse; +import org.openmetadata.sdk.services.EntityServiceBase; + +/** + * Generic fluent async restore builder. Returned by {@link EntityRestorer#async()}. + * Calls {@link EntityServiceBase#restoreServerAsync(String)} which issues + * {@code PUT /restore?async=true} and returns the 202 Accepted response carrying the + * job id (issue #4003). The {@code } parameter is preserved for symmetry with + * {@link EntityRestorer} so call sites that already have an + * {@code EntityRestorer} reference can switch to the async variant without + * losing the type-level context, even though the response itself is type-erased. + */ +public class AsyncEntityRestorer { + private final EntityServiceBase service; + private final String id; + + public AsyncEntityRestorer(EntityServiceBase service, String id) { + this.service = service; + this.id = id; + } + + public AsyncJobResponse execute() { + return service.restoreServerAsync(id); + } +} diff --git a/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/common/EntityRestorer.java b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/common/EntityRestorer.java new file mode 100644 index 000000000000..11b7573605fb --- /dev/null +++ b/openmetadata-sdk/src/main/java/org/openmetadata/sdk/fluent/common/EntityRestorer.java @@ -0,0 +1,45 @@ +/* + * Copyright 2026 Collate + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.openmetadata.sdk.fluent.common; + +import org.openmetadata.sdk.services.EntityServiceBase; + +/** + * Generic fluent restore builder used by every entity-type fluent class that exposes a + * {@code restore()} entry point (Tables, Dashboards, Pipelines, Topics, Containers, + * Glossaries, Domains, …). Replaces the per-entity {@code TableRestorer} / + * {@code DatabaseRestorer} duplicates so adding restore support to a new fluent only + * requires wiring it to its service — no new class per type. + * + *

    Sync: {@code execute()} runs the synchronous restore and returns the restored + * entity. Async: {@code async().execute()} switches to the server-side async path + * ({@code PUT /restore?async=true}) and returns an + * {@link org.openmetadata.sdk.models.AsyncJobResponse} with a job id (issue #4003). + */ +public class EntityRestorer { + private final EntityServiceBase service; + private final String id; + + public EntityRestorer(EntityServiceBase service, String id) { + this.service = service; + this.id = id; + } + + public AsyncEntityRestorer async() { + return new AsyncEntityRestorer<>(service, id); + } + + public T execute() { + return service.restore(id); + } +} diff --git a/openmetadata-sdk/src/test/java/org/openmetadata/sdk/fluent/RestoreFluentAPITest.java b/openmetadata-sdk/src/test/java/org/openmetadata/sdk/fluent/RestoreFluentAPITest.java index 1dd98173c227..23ad3860d1df 100644 --- a/openmetadata-sdk/src/test/java/org/openmetadata/sdk/fluent/RestoreFluentAPITest.java +++ b/openmetadata-sdk/src/test/java/org/openmetadata/sdk/fluent/RestoreFluentAPITest.java @@ -25,12 +25,28 @@ import org.junit.jupiter.api.Test; import org.mockito.Mock; import org.mockito.MockitoAnnotations; +import org.openmetadata.schema.entity.data.Container; +import org.openmetadata.schema.entity.data.Dashboard; import org.openmetadata.schema.entity.data.Database; +import org.openmetadata.schema.entity.data.DatabaseSchema; +import org.openmetadata.schema.entity.data.Glossary; +import org.openmetadata.schema.entity.data.MlModel; +import org.openmetadata.schema.entity.data.Pipeline; import org.openmetadata.schema.entity.data.Table; +import org.openmetadata.schema.entity.data.Topic; +import org.openmetadata.schema.entity.domains.Domain; import org.openmetadata.sdk.client.OpenMetadataClient; import org.openmetadata.sdk.models.AsyncJobResponse; +import org.openmetadata.sdk.services.dataassets.DashboardService; +import org.openmetadata.sdk.services.dataassets.MlModelService; +import org.openmetadata.sdk.services.dataassets.PipelineService; import org.openmetadata.sdk.services.dataassets.TableService; +import org.openmetadata.sdk.services.dataassets.TopicService; +import org.openmetadata.sdk.services.databases.DatabaseSchemaService; import org.openmetadata.sdk.services.databases.DatabaseService; +import org.openmetadata.sdk.services.domains.DomainService; +import org.openmetadata.sdk.services.glossary.GlossaryService; +import org.openmetadata.sdk.services.storages.ContainerService; /** * Verifies the fluent restore builders added for issue #4003. {@code .restore().execute()} @@ -42,14 +58,38 @@ class RestoreFluentAPITest { @Mock private OpenMetadataClient mockClient; @Mock private TableService mockTables; @Mock private DatabaseService mockDatabases; + @Mock private DatabaseSchemaService mockSchemas; + @Mock private DashboardService mockDashboards; + @Mock private PipelineService mockPipelines; + @Mock private TopicService mockTopics; + @Mock private MlModelService mockMlModels; + @Mock private ContainerService mockContainers; + @Mock private GlossaryService mockGlossaries; + @Mock private DomainService mockDomains; @BeforeEach void setUp() { MockitoAnnotations.openMocks(this); when(mockClient.tables()).thenReturn(mockTables); when(mockClient.databases()).thenReturn(mockDatabases); + when(mockClient.databaseSchemas()).thenReturn(mockSchemas); + when(mockClient.dashboards()).thenReturn(mockDashboards); + when(mockClient.pipelines()).thenReturn(mockPipelines); + when(mockClient.topics()).thenReturn(mockTopics); + when(mockClient.mlModels()).thenReturn(mockMlModels); + when(mockClient.containers()).thenReturn(mockContainers); + when(mockClient.glossaries()).thenReturn(mockGlossaries); + when(mockClient.domains()).thenReturn(mockDomains); Tables.setDefaultClient(mockClient); Databases.setDefaultClient(mockClient); + DatabaseSchemas.setDefaultClient(mockClient); + Dashboards.setDefaultClient(mockClient); + Pipelines.setDefaultClient(mockClient); + Topics.setDefaultClient(mockClient); + MlModels.setDefaultClient(mockClient); + Containers.setDefaultClient(mockClient); + Glossaries.setDefaultClient(mockClient); + Domains.setDefaultClient(mockClient); } @Test @@ -106,4 +146,125 @@ void databasesFluent_asyncRestore_callsRestoreServerAsync() throws Exception { verify(mockDatabases).restoreServerAsync(id); verify(mockDatabases, never()).restore(eq(id)); } + + // ---------------------------------------------------------------------------------------- + // Coverage that the new generic EntityRestorer wiring works for every data-asset fluent. + // Tables / Databases above are unchanged; below verifies the broader rollout reaches the + // correct service per fluent — one sync + one async assertion per type to lock the + // wiring without exhaustively testing every type (they all go through the same + // EntityRestorer helper, so a representative sample is enough to catch a typo in any + // single fluent's wire-up). + // ---------------------------------------------------------------------------------------- + + @Test + void databaseSchemasFluent_restore_routesThroughSchemaService() throws Exception { + String id = UUID.randomUUID().toString(); + DatabaseSchema restored = new DatabaseSchema().withId(UUID.fromString(id)).withName("s"); + when(mockSchemas.restore(id)).thenReturn(restored); + AsyncJobResponse async = new AsyncJobResponse("job-schema", "ok"); + when(mockSchemas.restoreServerAsync(id)).thenReturn(async); + + assertSame(restored, DatabaseSchemas.find(id).restore().execute()); + assertEquals("job-schema", DatabaseSchemas.find(id).restore().async().execute().getJobId()); + verify(mockSchemas).restore(id); + verify(mockSchemas).restoreServerAsync(id); + } + + @Test + void dashboardsFluent_restore_routesThroughDashboardService() throws Exception { + String id = UUID.randomUUID().toString(); + Dashboard restored = new Dashboard().withId(UUID.fromString(id)).withName("d"); + when(mockDashboards.restore(id)).thenReturn(restored); + AsyncJobResponse async = new AsyncJobResponse("job-dash", "ok"); + when(mockDashboards.restoreServerAsync(id)).thenReturn(async); + + assertSame(restored, Dashboards.find(id).restore().execute()); + assertEquals("job-dash", Dashboards.find(id).restore().async().execute().getJobId()); + verify(mockDashboards).restore(id); + verify(mockDashboards).restoreServerAsync(id); + } + + @Test + void pipelinesFluent_restore_routesThroughPipelineService() throws Exception { + String id = UUID.randomUUID().toString(); + Pipeline restored = new Pipeline().withId(UUID.fromString(id)).withName("p"); + when(mockPipelines.restore(id)).thenReturn(restored); + AsyncJobResponse async = new AsyncJobResponse("job-pipe", "ok"); + when(mockPipelines.restoreServerAsync(id)).thenReturn(async); + + assertSame(restored, Pipelines.find(id).restore().execute()); + assertEquals("job-pipe", Pipelines.find(id).restore().async().execute().getJobId()); + verify(mockPipelines).restore(id); + verify(mockPipelines).restoreServerAsync(id); + } + + @Test + void topicsFluent_restore_routesThroughTopicService() throws Exception { + String id = UUID.randomUUID().toString(); + Topic restored = new Topic().withId(UUID.fromString(id)).withName("t"); + when(mockTopics.restore(id)).thenReturn(restored); + AsyncJobResponse async = new AsyncJobResponse("job-topic", "ok"); + when(mockTopics.restoreServerAsync(id)).thenReturn(async); + + assertSame(restored, Topics.find(id).restore().execute()); + assertEquals("job-topic", Topics.find(id).restore().async().execute().getJobId()); + verify(mockTopics).restore(id); + verify(mockTopics).restoreServerAsync(id); + } + + @Test + void mlModelsFluent_restore_routesThroughMlModelService() throws Exception { + String id = UUID.randomUUID().toString(); + MlModel restored = new MlModel().withId(UUID.fromString(id)).withName("m"); + when(mockMlModels.restore(id)).thenReturn(restored); + AsyncJobResponse async = new AsyncJobResponse("job-ml", "ok"); + when(mockMlModels.restoreServerAsync(id)).thenReturn(async); + + assertSame(restored, MlModels.find(id).restore().execute()); + assertEquals("job-ml", MlModels.find(id).restore().async().execute().getJobId()); + verify(mockMlModels).restore(id); + verify(mockMlModels).restoreServerAsync(id); + } + + @Test + void containersFluent_restore_routesThroughContainerService() throws Exception { + String id = UUID.randomUUID().toString(); + Container restored = new Container().withId(UUID.fromString(id)).withName("c"); + when(mockContainers.restore(id)).thenReturn(restored); + AsyncJobResponse async = new AsyncJobResponse("job-cont", "ok"); + when(mockContainers.restoreServerAsync(id)).thenReturn(async); + + assertSame(restored, Containers.find(id).restore().execute()); + assertEquals("job-cont", Containers.find(id).restore().async().execute().getJobId()); + verify(mockContainers).restore(id); + verify(mockContainers).restoreServerAsync(id); + } + + @Test + void glossariesFluent_restore_routesThroughGlossaryService() throws Exception { + String id = UUID.randomUUID().toString(); + Glossary restored = new Glossary().withId(UUID.fromString(id)).withName("g"); + when(mockGlossaries.restore(id)).thenReturn(restored); + AsyncJobResponse async = new AsyncJobResponse("job-gloss", "ok"); + when(mockGlossaries.restoreServerAsync(id)).thenReturn(async); + + assertSame(restored, Glossaries.find(id).restore().execute()); + assertEquals("job-gloss", Glossaries.find(id).restore().async().execute().getJobId()); + verify(mockGlossaries).restore(id); + verify(mockGlossaries).restoreServerAsync(id); + } + + @Test + void domainsFluent_restore_routesThroughDomainService() throws Exception { + String id = UUID.randomUUID().toString(); + Domain restored = new Domain().withId(UUID.fromString(id)).withName("dom"); + when(mockDomains.restore(id)).thenReturn(restored); + AsyncJobResponse async = new AsyncJobResponse("job-dom", "ok"); + when(mockDomains.restoreServerAsync(id)).thenReturn(async); + + assertSame(restored, Domains.find(id).restore().execute()); + assertEquals("job-dom", Domains.find(id).restore().async().execute().getJobId()); + verify(mockDomains).restore(id); + verify(mockDomains).restoreServerAsync(id); + } } From fc52438fea423fcea3b88392c6e4652f97b95ed0 Mon Sep 17 00:00:00 2001 From: Sriharsha Chintalapani Date: Wed, 20 May 2026 15:02:11 -0700 Subject: [PATCH 38/38] fix(feed): defensive de-dup of thread ids in legacy feed cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `findByEntityIds` now selects DISTINCT ids — currently a no-op given the PK + single-valued entityId column, but cheap insurance against future joins or schema shifts that could fan out duplicates. `FeedRepository.deleteByAbout` accumulates into a LinkedHashSet so a caller passing an entityIds list with duplicates (or any future change that allows per-chunk overlap) can't inflate the downstream IN lists in `deleteThreadsInBatch` (3 DELETEs per 500-id chunk). Addresses Copilot review comments 3276652759 and 3276652802 on #27997. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../org/openmetadata/service/jdbi3/CollectionDAO.java | 8 +++++++- .../org/openmetadata/service/jdbi3/FeedRepository.java | 9 ++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/CollectionDAO.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/CollectionDAO.java index 0df8b505a833..af4d16cb744d 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/CollectionDAO.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/CollectionDAO.java @@ -4194,7 +4194,13 @@ List findByEntityId( @SqlQuery("select id from thread_entity where entityId = :entityId") List findByEntityId(@Bind("entityId") String entityId); - @SqlQuery("select id from where entityId IN ()") + // DISTINCT is defence-in-depth: thread_entity.id is a primary key, and entityId is a + // single-valued column per row, so a single matching scan can't physically return the + // same id twice. The DISTINCT survives a future schema where a thread row picks up + // multiple entity references (or a join is added) — keeping the consumer code in + // deleteByAbout from re-issuing redundant relationship / extension / feed deletes for + // the same id under chunking. + @SqlQuery("select DISTINCT id from where entityId IN ()") List findByEntityIds( @Define("tableName") String tableName, @BindList("entityIds") List entityIds); diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/FeedRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/FeedRepository.java index 7764fa7c21c6..124eaca672b7 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/FeedRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/FeedRepository.java @@ -51,10 +51,12 @@ import java.util.ArrayList; import java.util.Comparator; import java.util.HashSet; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Optional; +import java.util.Set; import java.util.UUID; import java.util.stream.Collectors; import lombok.Getter; @@ -804,7 +806,12 @@ public void deleteByAbout(List entityIds) { return; } List entityIdStrings = entityIds.stream().map(UUID::toString).toList(); - List threadIds = new ArrayList<>(); + // LinkedHashSet: per-chunk findByEntityIds is already DISTINCT, but accumulating across + // chunks could still see the same id twice if a future caller passes an entityIds list + // with duplicates. Dedup once here so deleteThreadsInBatch's downstream chunking (3 + // IN-list DELETEs per 500-id chunk) doesn't waste budget on redundant rows. Linked + // ordering for deterministic logs / replay. + Set threadIds = new LinkedHashSet<>(); for (int i = 0; i < entityIdStrings.size(); i += FEED_IN_BATCH_SIZE) { List chunk = entityIdStrings.subList(i, Math.min(i + FEED_IN_BATCH_SIZE, entityIdStrings.size()));