From b2fc95b9fcac6351136b29d79d9b49b206510363 Mon Sep 17 00:00:00 2001 From: jopemachine Date: Fri, 8 May 2026 15:49:18 +0900 Subject: [PATCH 1/8] refactor(BA-5979): split deployment search into admin and scoped layers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move ``search_model_deployments`` (no-scope, returns ``ModelDeploymentData`` directly from ``EndpointRow``) into a dedicated ``DeploymentAdminRepository`` and route it through a new ``DeploymentAdminService`` / ``DeploymentAdminProcessors`` package. The corresponding action is renamed to ``AdminSearchDeploymentsAction`` so its admin (no-scope) intent is explicit at every layer, mirroring the ``vfolder`` / ``login_client_type`` admin-split convention. The five service paths that previously called ``_convert_deployment_info_to_data`` on a freshly fetched ``DeploymentInfo`` (``create_deployment``, ``update_deployment``, ``get_deployment_by_id``, ``activate_revision``, plus the search path) now read straight from ``EndpointRow.to_model_deployment_data`` via the repository, so revision-id columns flow through the API path unchanged from the DB and the ``model_revisions`` ordering ambiguity that BA-5963 patched out becomes unreachable structurally. The legacy REST handler and the v2 GraphQL adapter both consume the new ``deployment_admin`` processor; the orphaned ``SearchDeploymentsAction`` file is removed. ``DeploymentAdminRepository`` shares the underlying ``DeploymentDBSource``, so SQL-level query logic stays in one place — only the layering above it is split. Resolves BA-5979. --- src/ai/backend/common/metrics/metric.py | 1 + .../api/adapters/deployment/adapter.py | 38 ++-- .../manager/api/rest/deployment/handler.py | 17 +- src/ai/backend/manager/api/rest/tree.py | 1 + .../backend/manager/data/deployment/types.py | 14 ++ src/ai/backend/manager/models/endpoint/row.py | 104 +++++++++++ .../repositories/deployment/__init__.py | 2 + .../deployment/admin_repository.py | 77 ++++++++ .../deployment/db_source/db_source.py | 63 +++++++ .../repositories/deployment/repositories.py | 5 +- .../repositories/deployment/repository.py | 20 +++ ...oyments.py => admin_search_deployments.py} | 11 +- .../services/deployment/admin_service.py | 37 ++++ .../manager/services/deployment/processors.py | 40 ++++- .../manager/services/deployment/service.py | 168 ++---------------- src/ai/backend/manager/services/factory.py | 8 +- src/ai/backend/manager/services/processors.py | 7 + tests/component/deployment/test_deployment.py | 4 +- .../unit/manager/models/test_endpoint_row.py | 152 ++++++++++++++++ .../deployment/test_deployment_service.py | 74 -------- 20 files changed, 583 insertions(+), 260 deletions(-) create mode 100644 src/ai/backend/manager/repositories/deployment/admin_repository.py rename src/ai/backend/manager/services/deployment/actions/{search_deployments.py => admin_search_deployments.py} (68%) create mode 100644 src/ai/backend/manager/services/deployment/admin_service.py create mode 100644 tests/unit/manager/models/test_endpoint_row.py diff --git a/src/ai/backend/common/metrics/metric.py b/src/ai/backend/common/metrics/metric.py index 68062786edf..1779e6cd586 100644 --- a/src/ai/backend/common/metrics/metric.py +++ b/src/ai/backend/common/metrics/metric.py @@ -420,6 +420,7 @@ class LayerType(enum.StrEnum): AUDIT_LOG_REPOSITORY = "audit_log_repository" CONTAINER_REGISTRY_REPOSITORY = "container_registry_repository" DEPLOYMENT_REPOSITORY = "deployment_repository" + DEPLOYMENT_ADMIN_REPOSITORY = "deployment_admin_repository" DOMAIN_REPOSITORY = "domain_repository" DOTFILE_REPOSITORY = "dotfile_repository" ETCD_CONFIG_REPOSITORY = "etcd_config_repository" diff --git a/src/ai/backend/manager/api/adapters/deployment/adapter.py b/src/ai/backend/manager/api/adapters/deployment/adapter.py index 9508752efc6..fa6cf9e0c52 100644 --- a/src/ai/backend/manager/api/adapters/deployment/adapter.py +++ b/src/ai/backend/manager/api/adapters/deployment/adapter.py @@ -248,6 +248,9 @@ from ai.backend.manager.services.deployment.actions.access_token.search_access_tokens import ( SearchAccessTokensAction, ) +from ai.backend.manager.services.deployment.actions.admin_search_deployments import ( + AdminSearchDeploymentsAction, +) from ai.backend.manager.services.deployment.actions.auto_scaling_rule.bulk_delete_auto_scaling_rules import ( BulkDeleteAutoScalingRulesAction, ) @@ -310,9 +313,6 @@ from ai.backend.manager.services.deployment.actions.route.update_route_traffic_status import ( UpdateRouteTrafficStatusAction, ) -from ai.backend.manager.services.deployment.actions.search_deployments import ( - SearchDeploymentsAction, -) from ai.backend.manager.services.deployment.actions.search_replicas import SearchReplicasAction from ai.backend.manager.services.deployment.actions.sync_replicas import SyncReplicaAction from ai.backend.manager.services.deployment.actions.update_deployment import UpdateDeploymentAction @@ -578,8 +578,10 @@ async def admin_search( ) -> AdminSearchDeploymentsPayload: """Search deployments (admin, no scope).""" querier = self._build_deployment_querier(input) - action_result = await self._processors.deployment.search_deployments.wait_for_complete( - SearchDeploymentsAction(querier=querier) + action_result = ( + await self._processors.deployment_admin.admin_search_deployments.wait_for_complete( + AdminSearchDeploymentsAction(querier=querier) + ) ) return AdminSearchDeploymentsPayload( items=[self._deployment_data_to_dto(item) for item in action_result.data], @@ -618,8 +620,10 @@ def _by_created_user() -> sa.sql.expression.ColumnElement[bool]: offset=input.offset, base_conditions=[_by_created_user], ) - action_result = await self._processors.deployment.search_deployments.wait_for_complete( - SearchDeploymentsAction(querier=querier) + action_result = ( + await self._processors.deployment_admin.admin_search_deployments.wait_for_complete( + AdminSearchDeploymentsAction(querier=querier) + ) ) return AdminSearchDeploymentsPayload( items=[self._deployment_data_to_dto(item) for item in action_result.data], @@ -656,8 +660,10 @@ def _by_project_id() -> sa.sql.expression.ColumnElement[bool]: offset=input.offset, base_conditions=[_by_project_id], ) - action_result = await self._processors.deployment.search_deployments.wait_for_complete( - SearchDeploymentsAction(querier=querier) + action_result = ( + await self._processors.deployment_admin.admin_search_deployments.wait_for_complete( + AdminSearchDeploymentsAction(querier=querier) + ) ) return AdminSearchDeploymentsPayload( items=[self._deployment_data_to_dto(item) for item in action_result.data], @@ -1284,7 +1290,13 @@ async def batch_load_by_ids( ) -> list[DeploymentNode | None]: """Batch load deployments by ID for DataLoader use. - Returns DeploymentNode DTOs in the same order as the input deployment_ids list. + Routed through the admin (no-scope) processor — DataLoader runs + under an already-authorised parent query so cross-scope filtering + is unnecessary; the ``by_ids`` condition is itself the bound on + what gets returned. + + Returns ``DeploymentNode`` DTOs in the same order as the input + ``deployment_ids`` list; missing IDs come back as ``None``. """ if not deployment_ids: return [] @@ -1292,8 +1304,10 @@ async def batch_load_by_ids( pagination=OffsetPagination(limit=len(deployment_ids)), conditions=[DeploymentConditions.by_ids(deployment_ids)], ) - action_result = await self._processors.deployment.search_deployments.wait_for_complete( - SearchDeploymentsAction(querier=querier) + action_result = ( + await self._processors.deployment_admin.admin_search_deployments.wait_for_complete( + AdminSearchDeploymentsAction(querier=querier) + ) ) deployment_map = { data.id: self._deployment_data_to_dto(data) for data in action_result.data diff --git a/src/ai/backend/manager/api/rest/deployment/handler.py b/src/ai/backend/manager/api/rest/deployment/handler.py index ec4e998008f..6e235e59dc6 100644 --- a/src/ai/backend/manager/api/rest/deployment/handler.py +++ b/src/ai/backend/manager/api/rest/deployment/handler.py @@ -57,6 +57,9 @@ DeploymentUpdaterSpec, ReplicaSpecUpdaterSpec, ) +from ai.backend.manager.services.deployment.actions.admin_search_deployments import ( + AdminSearchDeploymentsAction, +) from ai.backend.manager.services.deployment.actions.create_deployment import ( CreateDeploymentAction, ) @@ -88,13 +91,13 @@ SearchRoutesAction, UpdateRouteTrafficStatusAction, ) -from ai.backend.manager.services.deployment.actions.search_deployments import ( - SearchDeploymentsAction, -) from ai.backend.manager.services.deployment.actions.update_deployment import ( UpdateDeploymentAction, ) -from ai.backend.manager.services.deployment.processors import DeploymentProcessors +from ai.backend.manager.services.deployment.processors import ( + DeploymentAdminProcessors, + DeploymentProcessors, +) from ai.backend.manager.types import OptionalState from .adapter import ( @@ -114,9 +117,11 @@ def __init__( self, *, deployment: DeploymentProcessors, + deployment_admin: DeploymentAdminProcessors, runtime_variant_adapter: RuntimeVariantAdapter, ) -> None: self._deployment = deployment + self._deployment_admin = deployment_admin # ``RuntimeVariantAdapter`` supplies the id↔name bridge used by # this legacy REST handler: incoming requests arrive with a # name string (legacy contract) but the internal chain is @@ -200,8 +205,8 @@ async def search_deployments( querier = self._deployment_adapter.build_querier(body.parsed) # Call service action - action_result = await self._deployment.search_deployments.wait_for_complete( - SearchDeploymentsAction(querier=querier) + action_result = await self._deployment_admin.admin_search_deployments.wait_for_complete( + AdminSearchDeploymentsAction(querier=querier) ) # Build response diff --git a/src/ai/backend/manager/api/rest/tree.py b/src/ai/backend/manager/api/rest/tree.py index 54beea3d142..b5d5f864eab 100644 --- a/src/ai/backend/manager/api/rest/tree.py +++ b/src/ai/backend/manager/api/rest/tree.py @@ -166,6 +166,7 @@ def build_api_routes( ) deployment_handler = DeploymentAPIHandler( deployment=processors.deployment, + deployment_admin=processors.deployment_admin, runtime_variant_adapter=adapters.runtime_variant, ) domainconfig_handler = DomainConfigHandler(dotfile=processors.dotfile) diff --git a/src/ai/backend/manager/data/deployment/types.py b/src/ai/backend/manager/data/deployment/types.py index ac072072032..db3398e2288 100644 --- a/src/ai/backend/manager/data/deployment/types.py +++ b/src/ai/backend/manager/data/deployment/types.py @@ -1150,6 +1150,20 @@ class DeploymentInfoSearchResult: has_previous_page: bool +@dataclass +class ModelDeploymentDataSearchResult: + """Search result with pagination for the API-shaped ``ModelDeploymentData``. + + Returned by repository methods that project ``EndpointRow`` straight to + ``ModelDeploymentData`` without going through ``DeploymentInfo``. + """ + + items: list[ModelDeploymentData] + total_count: int + has_next_page: bool + has_previous_page: bool + + @dataclass class AutoScalingRuleSearchResult: """Search result with pagination for auto-scaling rules.""" diff --git a/src/ai/backend/manager/models/endpoint/row.py b/src/ai/backend/manager/models/endpoint/row.py index 53422f308bc..050648bdefd 100644 --- a/src/ai/backend/manager/models/endpoint/row.py +++ b/src/ai/backend/manager/models/endpoint/row.py @@ -34,6 +34,10 @@ ) from ai.backend.common.config import model_definition_iv +from ai.backend.common.data.model_deployment.types import ( + DeploymentStrategy, + ModelDeploymentStatus, +) from ai.backend.common.identifier.deployment import DeploymentID from ai.backend.common.identifier.deployment_revision import DeploymentRevisionID from ai.backend.common.identifier.project import ProjectID @@ -72,8 +76,12 @@ DeploymentState, DeploymentSummaryData, ModelDeploymentAutoScalingRuleData, + ModelDeploymentData, + ModelDeploymentMetadataInfo, + ModelRevisionData, ModelRevisionSpec, ReplicaSpec, + ReplicaStateData, ) from ai.backend.manager.data.model_serving.types import ( EndpointAutoScalingRuleData, @@ -108,6 +116,28 @@ from ai.backend.manager.models.routing import RoutingRow from ai.backend.manager.models.user import UserRow + +def _lifecycle_to_status(lifecycle: EndpointLifecycle) -> ModelDeploymentStatus: + """Map the persisted ``EndpointLifecycle`` to the v2 ``ModelDeploymentStatus``. + + The lifecycle axis is monotonic (PENDING → DEPLOYING → READY → DESTROYING + → DESTROYED); v2 exposes replica reconciliation as the orthogonal + ``scaling_state`` field, so legacy ``SCALING`` folds into ``READY`` and + legacy ``CREATED`` (never-deployed) folds into ``PENDING``. + """ + match lifecycle: + case EndpointLifecycle.PENDING | EndpointLifecycle.CREATED: + return ModelDeploymentStatus.PENDING + case EndpointLifecycle.READY | EndpointLifecycle.SCALING: + return ModelDeploymentStatus.READY + case EndpointLifecycle.DEPLOYING: + return ModelDeploymentStatus.DEPLOYING + case EndpointLifecycle.DESTROYING: + return ModelDeploymentStatus.STOPPING + case EndpointLifecycle.DESTROYED: + return ModelDeploymentStatus.STOPPED + + __all__ = ( "EndpointAutoScalingRuleRow", "EndpointLifecycle", @@ -727,6 +757,80 @@ def from_deployment_creator( revision_history_limit=creator.metadata.revision_history_limit, ) + def to_model_deployment_data(self) -> ModelDeploymentData: + """Project this row directly to a ``ModelDeploymentData`` for API responses. + + This bypasses the ``DeploymentInfo`` intermediate so the revision-id + columns flow through the API path unchanged from the DB. Eager-load + requirements: ``EndpointRow.revisions`` (with + ``DeploymentRevisionRow.image_row``) and ``EndpointRow.deployment_policy``. + + ``current_revision_id`` and ``deploying_revision_id`` are read from + the row columns directly. The ``revision`` spec is resolved by + explicit match against ``current_revision``; if no matching row is + loaded (dangling reference, or caller forgot to eager-load + ``revisions``), ``revision`` is ``None`` while ``current_revision_id`` + still surfaces the column value — callers can act on the ID even when + the joined spec is unavailable. + """ + revision: ModelRevisionData | None = None + if self.current_revision is not None: + match = next( + (r for r in self.revisions if r.id == self.current_revision), + None, + ) + if match is None: + log.error( + "Deployment {} has current_revision_id {} but no matching " + "DeploymentRevisionRow was found among loaded revisions; " + "the revision spec will be reported as null. This usually " + "means EndpointRow.revisions was not eagerly loaded by the " + "caller.", + self.id, + self.current_revision, + ) + else: + revision = match.to_data() + + desired_count = ( + self.desired_replicas if self.desired_replicas is not None else self.replicas + ) + policy_data = ( + self.deployment_policy.to_data() if self.deployment_policy is not None else None + ) + + return ModelDeploymentData( + id=self.id, + metadata=ModelDeploymentMetadataInfo( + name=self.name, + status=_lifecycle_to_status(self.lifecycle_stage), + tags=[self.tag] if self.tag else [], + project_id=self.project, + domain_name=self.domain, + created_at=self.created_at, + updated_at=self.created_at, + ), + network_access=DeploymentNetworkSpec( + open_to_public=self.open_to_public if self.open_to_public is not None else False, + url=self.url, + ), + revision_history_ids=[self.current_revision] if self.current_revision else [], + revision=revision, + current_revision_id=self.current_revision, + deploying_revision_id=self.deploying_revision, + scaling_rule_ids=[], + replica_state=ReplicaStateData( + desired_replica_count=desired_count, + replica_ids=[], + ), + default_deployment_strategy=DeploymentStrategy.ROLLING, + created_user_id=self.created_user, + options=self.options, + scaling_state=self.scaling_state, + policy=policy_data, + sub_step=self.sub_step, + ) + def to_deployment_info(self) -> DeploymentInfo: """Convert EndpointRow to DeploymentInfo dataclass using revision data.""" policy_data = None diff --git a/src/ai/backend/manager/repositories/deployment/__init__.py b/src/ai/backend/manager/repositories/deployment/__init__.py index aba78cccffc..bc7240657d5 100644 --- a/src/ai/backend/manager/repositories/deployment/__init__.py +++ b/src/ai/backend/manager/repositories/deployment/__init__.py @@ -3,9 +3,11 @@ from ai.backend.manager.models.endpoint.conditions import DeploymentConditions from ai.backend.manager.models.routing.conditions import RouteConditions +from .admin_repository import DeploymentAdminRepository from .repository import DeploymentRepository __all__ = [ + "DeploymentAdminRepository", "DeploymentRepository", "DeploymentConditions", "RouteConditions", diff --git a/src/ai/backend/manager/repositories/deployment/admin_repository.py b/src/ai/backend/manager/repositories/deployment/admin_repository.py new file mode 100644 index 00000000000..dc5c68ac840 --- /dev/null +++ b/src/ai/backend/manager/repositories/deployment/admin_repository.py @@ -0,0 +1,77 @@ +"""Admin-only repository for deployment queries that span every scope. + +Holds reads that are not bounded by a project / user / domain scope. The +regular ``DeploymentRepository`` keeps the scoped variants; admin-style +"see everything" queries live here so the layering is explicit and the +GraphQL/REST ``admin_*`` paths have a clearly-named home. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from ai.backend.common.exception import BackendAIError +from ai.backend.common.metrics.metric import DomainType, LayerType +from ai.backend.common.resilience.policies.metrics import MetricArgs, MetricPolicy +from ai.backend.common.resilience.policies.retry import BackoffStrategy, RetryArgs, RetryPolicy +from ai.backend.common.resilience.resilience import Resilience +from ai.backend.manager.data.deployment.types import ModelDeploymentDataSearchResult +from ai.backend.manager.repositories.base import BatchQuerier +from ai.backend.manager.repositories.deployment.db_source.db_source import DeploymentDBSource + +if TYPE_CHECKING: + from ai.backend.manager.models.storage import StorageSessionManager + from ai.backend.manager.models.utils import ExtendedAsyncSAEngine + +__all__ = ("DeploymentAdminRepository",) + + +deployment_admin_repository_resilience = Resilience( + policies=[ + MetricPolicy( + MetricArgs( + domain=DomainType.REPOSITORY, + layer=LayerType.DEPLOYMENT_ADMIN_REPOSITORY, + ) + ), + RetryPolicy( + RetryArgs( + max_retries=3, + retry_delay=0.1, + backoff_strategy=BackoffStrategy.FIXED, + non_retryable_exceptions=(BackendAIError,), + ) + ), + ] +) + + +class DeploymentAdminRepository: + """Admin (no-scope) reads against the ``endpoints`` table. + + Shares the underlying ``DeploymentDBSource`` with the regular + repository — the SQL-level query stays in one place; the split exists + so the calling layer makes its admin intent explicit. + """ + + _db_source: DeploymentDBSource + + def __init__( + self, + db: ExtendedAsyncSAEngine, + storage_manager: StorageSessionManager, + ) -> None: + self._db_source = DeploymentDBSource(db, storage_manager) + + @deployment_admin_repository_resilience.apply() + async def search_model_deployments( + self, + querier: BatchQuerier, + ) -> ModelDeploymentDataSearchResult: + """Search every deployment without a scope filter. + + Callers may still pass arbitrary ``conditions`` through the + querier — the absence of a scope filter on the repository method + itself is what makes this admin-only. + """ + return await self._db_source.search_model_deployments(querier) diff --git a/src/ai/backend/manager/repositories/deployment/db_source/db_source.py b/src/ai/backend/manager/repositories/deployment/db_source/db_source.py index f1b608bfe0b..1485188b4e7 100644 --- a/src/ai/backend/manager/repositories/deployment/db_source/db_source.py +++ b/src/ai/backend/manager/repositories/deployment/db_source/db_source.py @@ -69,6 +69,8 @@ LegacyRevisionCreateReadBundle, ModelDeploymentAccessTokenData, ModelDeploymentAutoScalingRuleData, + ModelDeploymentData, + ModelDeploymentDataSearchResult, ModelRevisionData, ModelRevisionSpec, RevisionSearchResult, @@ -406,6 +408,37 @@ async def get_endpoint( return row.to_deployment_info() + async def get_model_deployment_data( + self, + endpoint_id: uuid.UUID, + ) -> ModelDeploymentData: + """Fetch a deployment as the API-shaped ``ModelDeploymentData``. + + Bypasses the ``DeploymentInfo`` intermediate so the API path's + revision-id columns flow through unchanged from the DB row. + + Raises: + EndpointNotFound: If the endpoint does not exist. + """ + async with self._begin_readonly_session_read_committed() as db_sess: + query = ( + sa.select(EndpointRow) + .where(EndpointRow.id == endpoint_id) + .options( + selectinload(EndpointRow.revisions).selectinload( + DeploymentRevisionRow.image_row + ), + selectinload(EndpointRow.deployment_policy), + ) + ) + result = await db_sess.execute(query) + row: EndpointRow | None = result.scalar_one_or_none() + + if not row: + raise EndpointNotFound(f"Endpoint {endpoint_id} not found") + + return row.to_model_deployment_data() + async def get_deployments_by_ids( self, deployment_ids: set[DeploymentID], @@ -1154,6 +1187,36 @@ async def search_endpoints( has_previous_page=result.has_previous_page, ) + async def search_model_deployments( + self, + querier: BatchQuerier, + ) -> ModelDeploymentDataSearchResult: + """Search endpoints and project each row directly to ``ModelDeploymentData``. + + Mirrors ``search_endpoints`` but skips the ``DeploymentInfo`` step + so the API path consumes the row's authoritative projection. + """ + async with self._begin_readonly_session_read_committed() as db_sess: + query = sa.select(EndpointRow).options( + selectinload(EndpointRow.revisions).selectinload(DeploymentRevisionRow.image_row), + selectinload(EndpointRow.deployment_policy), + ) + + result = await execute_batch_querier( + db_sess, + query, + querier, + ) + + items = [row.EndpointRow.to_model_deployment_data() for row in result.rows] + + return ModelDeploymentDataSearchResult( + items=items, + total_count=result.total_count, + has_next_page=result.has_next_page, + has_previous_page=result.has_previous_page, + ) + async def search_deployments_in_project( self, querier: BatchQuerier, diff --git a/src/ai/backend/manager/repositories/deployment/repositories.py b/src/ai/backend/manager/repositories/deployment/repositories.py index 49ccf2b4a41..6e75c795737 100644 --- a/src/ai/backend/manager/repositories/deployment/repositories.py +++ b/src/ai/backend/manager/repositories/deployment/repositories.py @@ -5,6 +5,7 @@ from ai.backend.manager.repositories.types import RepositoryArgs +from .admin_repository import DeploymentAdminRepository from .repository import DeploymentRepository @@ -13,6 +14,7 @@ class DeploymentRepositories: """Container for deployment-related repositories.""" repository: DeploymentRepository + admin_repository: DeploymentAdminRepository @classmethod def create(cls, args: RepositoryArgs) -> Self: @@ -24,4 +26,5 @@ def create(cls, args: RepositoryArgs) -> Self: args.valkey_live_client, args.valkey_schedule_client, ) - return cls(repository=repository) + admin_repository = DeploymentAdminRepository(args.db, args.storage_manager) + return cls(repository=repository, admin_repository=admin_repository) diff --git a/src/ai/backend/manager/repositories/deployment/repository.py b/src/ai/backend/manager/repositories/deployment/repository.py index ab0689207fd..e31f2419fbc 100644 --- a/src/ai/backend/manager/repositories/deployment/repository.py +++ b/src/ai/backend/manager/repositories/deployment/repository.py @@ -66,6 +66,7 @@ LegacyRevisionCreateReadBundle, ModelDeploymentAccessTokenData, ModelDeploymentAutoScalingRuleData, + ModelDeploymentData, ModelRevisionData, ModelRevisionSpec, RevisionSearchResult, @@ -345,6 +346,25 @@ async def get_endpoint_info( """ return await self._db_source.get_endpoint(endpoint_id) + @deployment_repository_resilience.apply() + async def get_model_deployment_data( + self, + endpoint_id: uuid.UUID, + ) -> ModelDeploymentData: + """Fetch a deployment as the API-shaped ``ModelDeploymentData``. + + Bypasses ``DeploymentInfo`` so the API path's revision-id columns + flow through unchanged from the DB row. Prefer this over + ``get_endpoint_info`` + a service-side conversion when the caller + produces an API response — there is no ordering ambiguity to + resolve and dangling references surface as ``revision=None`` with + the column ID still populated. + + Raises: + EndpointNotFound: If the endpoint does not exist. + """ + return await self._db_source.get_model_deployment_data(endpoint_id) + @deployment_repository_resilience.apply() async def destroy_endpoint( self, diff --git a/src/ai/backend/manager/services/deployment/actions/search_deployments.py b/src/ai/backend/manager/services/deployment/actions/admin_search_deployments.py similarity index 68% rename from src/ai/backend/manager/services/deployment/actions/search_deployments.py rename to src/ai/backend/manager/services/deployment/actions/admin_search_deployments.py index 3160f1c939c..3d2baba3aba 100644 --- a/src/ai/backend/manager/services/deployment/actions/search_deployments.py +++ b/src/ai/backend/manager/services/deployment/actions/admin_search_deployments.py @@ -9,7 +9,14 @@ @dataclass -class SearchDeploymentsAction(DeploymentBaseAction): +class AdminSearchDeploymentsAction(DeploymentBaseAction): + """Search every deployment with no scope (superadmin path). + + Routed through ``DeploymentAdminProcessors`` so callers make the + admin intent explicit; scope-restricted variants live on the regular + ``DeploymentService`` and ``DeploymentRepository``. + """ + querier: BatchQuerier @override @@ -23,7 +30,7 @@ def operation_type(cls) -> ActionOperationType: @dataclass -class SearchDeploymentsActionResult(BaseActionResult): +class AdminSearchDeploymentsActionResult(BaseActionResult): data: list[ModelDeploymentData] total_count: int has_next_page: bool diff --git a/src/ai/backend/manager/services/deployment/admin_service.py b/src/ai/backend/manager/services/deployment/admin_service.py new file mode 100644 index 00000000000..31fc061eb35 --- /dev/null +++ b/src/ai/backend/manager/services/deployment/admin_service.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from ai.backend.manager.repositories.deployment.admin_repository import ( + DeploymentAdminRepository, +) +from ai.backend.manager.services.deployment.actions.admin_search_deployments import ( + AdminSearchDeploymentsAction, + AdminSearchDeploymentsActionResult, +) + +__all__ = ("DeploymentAdminService",) + + +class DeploymentAdminService: + """Admin (no-scope) service operations for deployments. + + Holds the call sites that are not bounded by a project / user / domain + scope (admin search, DataLoader batch lookups). Scoped operations live + on :class:`DeploymentService`. + """ + + _admin_repository: DeploymentAdminRepository + + def __init__(self, admin_repository: DeploymentAdminRepository) -> None: + self._admin_repository = admin_repository + + async def admin_search_deployments( + self, action: AdminSearchDeploymentsAction + ) -> AdminSearchDeploymentsActionResult: + """Search every deployment without a scope filter.""" + result = await self._admin_repository.search_model_deployments(action.querier) + return AdminSearchDeploymentsActionResult( + data=result.items, + total_count=result.total_count, + has_next_page=result.has_next_page, + has_previous_page=result.has_previous_page, + ) diff --git a/src/ai/backend/manager/services/deployment/processors.py b/src/ai/backend/manager/services/deployment/processors.py index 98f4ab80736..780de2a7b5e 100644 --- a/src/ai/backend/manager/services/deployment/processors.py +++ b/src/ai/backend/manager/services/deployment/processors.py @@ -30,6 +30,10 @@ SearchAccessTokensAction, SearchAccessTokensActionResult, ) +from ai.backend.manager.services.deployment.actions.admin_search_deployments import ( + AdminSearchDeploymentsAction, + AdminSearchDeploymentsActionResult, +) from ai.backend.manager.services.deployment.actions.auto_scaling_rule.bulk_delete_auto_scaling_rules import ( BulkDeleteAutoScalingRulesAction, BulkDeleteAutoScalingRulesActionResult, @@ -116,10 +120,6 @@ UpdateRouteTrafficStatusAction, UpdateRouteTrafficStatusActionResult, ) -from ai.backend.manager.services.deployment.actions.search_deployments import ( - SearchDeploymentsAction, - SearchDeploymentsActionResult, -) from ai.backend.manager.services.deployment.actions.search_deployments_in_project import ( SearchDeploymentsInProjectAction, SearchDeploymentsInProjectActionResult, @@ -138,6 +138,7 @@ ) if TYPE_CHECKING: + from ai.backend.manager.services.deployment.admin_service import DeploymentAdminService from ai.backend.manager.services.deployment.service import DeploymentService @@ -158,7 +159,6 @@ class DeploymentProcessors(AbstractProcessorPackage): destroy_deployment: SingleEntityActionProcessor[ DestroyDeploymentAction, DestroyDeploymentActionResult ] - search_deployments: ActionProcessor[SearchDeploymentsAction, SearchDeploymentsActionResult] search_deployments_in_project: ActionProcessor[ SearchDeploymentsInProjectAction, SearchDeploymentsInProjectActionResult ] @@ -248,7 +248,6 @@ def __init__( self.destroy_deployment = SingleEntityActionProcessor( service.destroy_deployment, action_monitors, validators=rbac_single_entity_validators ) - self.search_deployments = ActionProcessor(service.search_deployments, action_monitors) self.search_deployments_in_project = ActionProcessor( service.search_deployments_in_project, action_monitors, @@ -324,7 +323,6 @@ def supported_actions(self) -> list[ActionSpec]: UpdateDeploymentAction.spec(), ReplaceDeploymentOptionsAction.spec(), DestroyDeploymentAction.spec(), - SearchDeploymentsAction.spec(), SearchDeploymentsInProjectAction.spec(), GetDeploymentByIdAction.spec(), GetDeploymentPolicyAction.spec(), @@ -358,3 +356,31 @@ def supported_actions(self) -> list[ActionSpec]: BulkDeleteAccessTokensAction.spec(), SearchAccessTokensAction.spec(), ] + + +class DeploymentAdminProcessors(AbstractProcessorPackage): + """Admin (no-scope) processors for deployments. + + Counterpart of :class:`DeploymentProcessors` that owns the no-scope + queries — admin search and the DataLoader batch path. Scoped reads + stay on :class:`DeploymentProcessors`. + """ + + admin_search_deployments: ActionProcessor[ + AdminSearchDeploymentsAction, AdminSearchDeploymentsActionResult + ] + + def __init__( + self, + service: DeploymentAdminService, + action_monitors: list[ActionMonitor], + ) -> None: + self.admin_search_deployments = ActionProcessor( + service.admin_search_deployments, action_monitors + ) + + @override + def supported_actions(self) -> list[ActionSpec]: + return [ + AdminSearchDeploymentsAction.spec(), + ] diff --git a/src/ai/backend/manager/services/deployment/service.py b/src/ai/backend/manager/services/deployment/service.py index 77ae25b2cdb..aa42681a6fc 100644 --- a/src/ai/backend/manager/services/deployment/service.py +++ b/src/ai/backend/manager/services/deployment/service.py @@ -8,9 +8,7 @@ from ai.backend.common.data.endpoint.types import EndpointLifecycle from ai.backend.common.data.model_deployment.types import ( ActivenessStatus, - DeploymentStrategy, LivenessStatus, - ModelDeploymentStatus, ReadinessStatus, ) from ai.backend.common.data.permission.types import RBACElementType @@ -18,9 +16,6 @@ MintEndpointTokenRequest, ) from ai.backend.common.identifier.deployment import DeploymentID -from ai.backend.common.types import ( - ResourceSlot, -) from ai.backend.logging.utils import BraceStyleAdapter from ai.backend.manager.clients.appproxy.client import AppProxyClientPool from ai.backend.manager.data.deployment.creator import ( @@ -28,18 +23,9 @@ VFolderMountsCreator, ) from ai.backend.manager.data.deployment.types import ( - ClusterConfigData, - DeploymentInfo, ModelDeploymentAccessTokenData, - ModelDeploymentData, - ModelDeploymentMetadataInfo, - ModelMountConfigData, ModelReplicaData, - ModelRevisionData, ModelRevisionSpec, - ModelRuntimeConfigData, - ReplicaStateData, - ResourceConfigData, RevisionRefreshResult, RouteHealthStatus, RouteInfo, @@ -178,10 +164,6 @@ UpdateRouteTrafficStatusAction, UpdateRouteTrafficStatusActionResult, ) -from ai.backend.manager.services.deployment.actions.search_deployments import ( - SearchDeploymentsAction, - SearchDeploymentsActionResult, -) from ai.backend.manager.services.deployment.actions.search_deployments_in_project import ( SearchDeploymentsInProjectAction, SearchDeploymentsInProjectActionResult, @@ -204,114 +186,6 @@ log = BraceStyleAdapter(logging.getLogger(__name__)) -def _map_lifecycle_to_status(lifecycle: EndpointLifecycle) -> ModelDeploymentStatus: - """Map EndpointLifecycle to ModelDeploymentStatus for the v2 status surface. - - The lifecycle axis is monotonic (PENDING → DEPLOYING → READY → DESTROYING - → DESTROYED); v2 exposes replica reconciliation as the orthogonal - ``scaling_state`` field on the deployment node. ``SCALING`` is therefore - no longer surfaced through ``ModelDeploymentStatus`` — a legacy - ``lifecycle=SCALING`` row folds into ``READY`` so clients only have to - consult ``scaling_state`` to decide whether a replica reconcile is in - flight. Legacy ``CREATED`` (never-deployed) folds into ``PENDING``. - """ - match lifecycle: - case EndpointLifecycle.PENDING | EndpointLifecycle.CREATED: - return ModelDeploymentStatus.PENDING - case EndpointLifecycle.READY | EndpointLifecycle.SCALING: - return ModelDeploymentStatus.READY - case EndpointLifecycle.DEPLOYING: - return ModelDeploymentStatus.DEPLOYING - case EndpointLifecycle.DESTROYING: - return ModelDeploymentStatus.STOPPING - case EndpointLifecycle.DESTROYED: - return ModelDeploymentStatus.STOPPED - - -def _convert_deployment_info_to_data(info: DeploymentInfo) -> ModelDeploymentData: - """Convert DeploymentInfo to ModelDeploymentData. - - Note: Some fields are set to defaults as DeploymentInfo doesn't have all the data. - """ - revision: ModelRevisionData | None = None - rev: ModelRevisionSpec | None = None - if info.current_revision_id is not None: - rev = next( - (r for r in info.model_revisions if r.revision_id == info.current_revision_id), - None, - ) - if rev is None: - log.error( - "Deployment {} has current_revision_id {} but no matching " - "ModelRevisionSpec was found in DeploymentInfo.model_revisions; " - "current_revision will be reported as null. This usually means " - "EndpointRow.revisions was not eagerly loaded by the caller.", - info.id, - info.current_revision_id, - ) - if rev is not None: - if rev.revision_id is None: - raise ValueError(f"ModelRevisionSpec has no revision_id for deployment {info.id}") - revision = ModelRevisionData( - id=rev.revision_id, - cluster_config=ClusterConfigData( - mode=rev.resource_spec.cluster_mode, - size=rev.resource_spec.cluster_size, - ), - resource_config=ResourceConfigData( - resource_group_name=info.metadata.resource_group, - resource_slot=ResourceSlot.from_json(rev.resource_spec.resource_slots), - ), - model_mount_config=ModelMountConfigData( - vfolder_id=rev.mounts.model_vfolder_id, - mount_destination=rev.mounts.model_mount_destination, - definition_path=rev.mounts.model_definition_path or "", - ), - model_runtime_config=ModelRuntimeConfigData( - runtime_variant_id=rev.execution.runtime_variant_id, - inference_runtime_config=rev.execution.inference_runtime_config or {}, - ), - extra_vfolder_mounts=list(rev.mounts.extra_mounts), - image_id=rev.image_id, - created_at=info.metadata.created_at or datetime.now(UTC), - model_definition=rev.model_definition, - revision_preset_id=rev.revision_preset_id, - ) - - desired_count = info.replica_spec.desired_replica_count - if desired_count is None: - desired_count = info.replica_spec.replica_count - - return ModelDeploymentData( - id=info.id, - metadata=ModelDeploymentMetadataInfo( - name=info.metadata.name, - status=_map_lifecycle_to_status(info.state.lifecycle), - tags=[info.metadata.tag] if info.metadata.tag else [], - project_id=info.metadata.project, - domain_name=info.metadata.domain, - created_at=info.metadata.created_at or datetime.now(UTC), - updated_at=info.metadata.created_at or datetime.now(UTC), - ), - network_access=info.network, - revision_history_ids=[info.current_revision_id] if info.current_revision_id else [], - revision=revision, - current_revision_id=info.current_revision_id, - deploying_revision_id=info.deploying_revision_id, - scaling_rule_ids=[], # Not available in DeploymentInfo - replica_state=ReplicaStateData( - desired_replica_count=desired_count, - replica_ids=[], # Not available in DeploymentInfo - ), - default_deployment_strategy=DeploymentStrategy.ROLLING, - created_user_id=info.metadata.created_user, - options=info.options, - scaling_state=info.state.scaling_state, - policy=info.policy, - sub_step=info.sub_step, - ) - - _HEALTH_STATUS_TO_READINESS: dict[RouteHealthStatus, ReadinessStatus] = { RouteHealthStatus.HEALTHY: ReadinessStatus.HEALTHY, RouteHealthStatus.UNHEALTHY: ReadinessStatus.UNHEALTHY, @@ -439,12 +313,10 @@ async def create_deployment( revision=action.creator.model_revision, auto_activate=action.auto_activate, ) - updated_deployment_info = await self._deployment_repository.get_endpoint_info( + deployment_data = await self._deployment_repository.get_model_deployment_data( deployment_info.id ) - return CreateDeploymentActionResult( - data=_convert_deployment_info_to_data(updated_deployment_info) - ) + return CreateDeploymentActionResult(data=deployment_data) async def create_legacy_deployment( self, action: CreateLegacyDeploymentAction @@ -484,8 +356,9 @@ async def update_deployment( log.info("Updating deployment with ID: {}", action.updater.pk_value) endpoint_id = cast(UUID, action.updater.pk_value) spec = cast(DeploymentUpdaterSpec, action.updater.spec) - deployment_info = await self._deployment_controller.update_deployment(endpoint_id, spec) - return UpdateDeploymentActionResult(data=_convert_deployment_info_to_data(deployment_info)) + await self._deployment_controller.update_deployment(endpoint_id, spec) + deployment_data = await self._deployment_repository.get_model_deployment_data(endpoint_id) + return UpdateDeploymentActionResult(data=deployment_data) async def replace_deployment_options( self, action: ReplaceDeploymentOptionsAction @@ -526,26 +399,6 @@ async def destroy_deployment( await self._deployment_controller.mark_lifecycle_needed(DeploymentLifecycleType.DESTROYING) return DestroyDeploymentActionResult(success=success) - async def search_deployments( - self, action: SearchDeploymentsAction - ) -> SearchDeploymentsActionResult: - """Search deployments with filtering and pagination. - - Args: - action: Action containing BatchQuerier for filtering and pagination - - Returns: - SearchDeploymentsActionResult: Result containing list of deployments and pagination info - """ - result = await self._deployment_repository.search_endpoints(action.querier) - deployments = [_convert_deployment_info_to_data(info) for info in result.items] - return SearchDeploymentsActionResult( - data=deployments, - total_count=result.total_count, - has_next_page=result.has_next_page, - has_previous_page=result.has_previous_page, - ) - async def search_deployments_in_project( self, action: SearchDeploymentsInProjectAction ) -> SearchDeploymentsInProjectActionResult: @@ -574,8 +427,10 @@ async def get_deployment_by_id( Raises: EndpointNotFound: If the deployment does not exist """ - deployment_info = await self._deployment_repository.get_endpoint_info(action.deployment_id) - return GetDeploymentByIdActionResult(data=_convert_deployment_info_to_data(deployment_info)) + deployment_data = await self._deployment_repository.get_model_deployment_data( + action.deployment_id + ) + return GetDeploymentByIdActionResult(data=deployment_data) async def get_deployment_policy( self, action: GetDeploymentPolicyAction @@ -674,8 +529,11 @@ async def activate_revision( result = await self._deployment_controller.activate_revision( action.deployment_id, action.revision_id ) + deployment_data = await self._deployment_repository.get_model_deployment_data( + action.deployment_id + ) return ActivateRevisionActionResult( - deployment=_convert_deployment_info_to_data(result.deployment_info), + deployment=deployment_data, previous_revision_id=result.previous_revision_id, activated_revision_id=result.activated_revision_id, deployment_policy=result.deployment_policy, diff --git a/src/ai/backend/manager/services/factory.py b/src/ai/backend/manager/services/factory.py index d9ba5d7e15e..0828f4d289d 100644 --- a/src/ai/backend/manager/services/factory.py +++ b/src/ai/backend/manager/services/factory.py @@ -20,7 +20,11 @@ from ai.backend.manager.services.auth.service import AuthService from ai.backend.manager.services.container_registry.processors import ContainerRegistryProcessors from ai.backend.manager.services.container_registry.service import ContainerRegistryService -from ai.backend.manager.services.deployment.processors import DeploymentProcessors +from ai.backend.manager.services.deployment.admin_service import DeploymentAdminService +from ai.backend.manager.services.deployment.processors import ( + DeploymentAdminProcessors, + DeploymentProcessors, +) from ai.backend.manager.services.deployment.service import DeploymentService from ai.backend.manager.services.deployment_revision_preset.processors import ( DeploymentRevisionPresetProcessors, @@ -389,6 +393,7 @@ def create_services(args: ServiceArgs) -> Services: runtime_variant_preset_repository=repositories.runtime_variant_preset.repository, appproxy_client_pool=args.appproxy_client_pool, ), + deployment_admin=DeploymentAdminService(repositories.deployment.admin_repository), storage_namespace=StorageNamespaceService(repositories.storage_namespace.repository), audit_log=AuditLogService(repositories.audit_log.repository), scheduling_history=SchedulingHistoryService(repositories.scheduling_history.repository), @@ -508,6 +513,7 @@ def create_processors( services.artifact_revision, action_monitors, validators ), deployment=DeploymentProcessors(services.deployment, action_monitors, validators), + deployment_admin=DeploymentAdminProcessors(services.deployment_admin, action_monitors), storage_namespace=StorageNamespaceProcessors( services.storage_namespace, action_monitors, validators ), diff --git a/src/ai/backend/manager/services/processors.py b/src/ai/backend/manager/services/processors.py index 14a725c2081..3147e2960fe 100644 --- a/src/ai/backend/manager/services/processors.py +++ b/src/ai/backend/manager/services/processors.py @@ -78,7 +78,11 @@ from ai.backend.manager.services.container_registry.service import ( ContainerRegistryService, ) + from ai.backend.manager.services.deployment.admin_service import ( + DeploymentAdminService, + ) from ai.backend.manager.services.deployment.processors import ( + DeploymentAdminProcessors, DeploymentProcessors, ) from ai.backend.manager.services.deployment.service import ( @@ -404,6 +408,7 @@ class Services: artifact_revision: ArtifactRevisionService artifact_registry: ArtifactRegistryService deployment: DeploymentService + deployment_admin: DeploymentAdminService storage_namespace: StorageNamespaceService audit_log: AuditLogService scheduling_history: SchedulingHistoryService @@ -469,6 +474,7 @@ class Processors(AbstractProcessorPackage): artifact_registry: ArtifactRegistryProcessors artifact_revision: ArtifactRevisionProcessors deployment: DeploymentProcessors + deployment_admin: DeploymentAdminProcessors storage_namespace: StorageNamespaceProcessors audit_log: AuditLogProcessors scheduling_history: SchedulingHistoryProcessors @@ -527,6 +533,7 @@ def supported_actions(self) -> list[ActionSpec]: *self.artifact_revision.supported_actions(), *self.artifact.supported_actions(), *self.deployment.supported_actions(), + *self.deployment_admin.supported_actions(), *self.storage_namespace.supported_actions(), *self.audit_log.supported_actions(), *self.scheduling_history.supported_actions(), diff --git a/tests/component/deployment/test_deployment.py b/tests/component/deployment/test_deployment.py index f5b1b4673e9..e80aee63e7f 100644 --- a/tests/component/deployment/test_deployment.py +++ b/tests/component/deployment/test_deployment.py @@ -52,8 +52,8 @@ from ai.backend.common.identifier.vfolder import VFolderUUID from ai.backend.common.types import ClusterMode from ai.backend.manager.api.adapters.deployment.adapter import DeploymentAdapter +from ai.backend.manager.models.endpoint.row import _lifecycle_to_status from ai.backend.manager.services.deployment.processors import DeploymentProcessors -from ai.backend.manager.services.deployment.service import _map_lifecycle_to_status from ai.backend.manager.services.processors import Processors if TYPE_CHECKING: @@ -644,7 +644,7 @@ def test_lifecycle_to_status_mapping(self) -> None: } for lifecycle, expected_status in mapping.items(): - actual_status = _map_lifecycle_to_status(lifecycle) + actual_status = _lifecycle_to_status(lifecycle) assert actual_status == expected_status, ( f"EndpointLifecycle.{lifecycle.name} should map to " f"ModelDeploymentStatus.{expected_status.name}, got {actual_status.name}" diff --git a/tests/unit/manager/models/test_endpoint_row.py b/tests/unit/manager/models/test_endpoint_row.py new file mode 100644 index 00000000000..39002f8a551 --- /dev/null +++ b/tests/unit/manager/models/test_endpoint_row.py @@ -0,0 +1,152 @@ +"""Tests for EndpointRow projection methods.""" + +from __future__ import annotations + +import uuid +from datetime import UTC, datetime +from typing import Any +from unittest.mock import MagicMock + +from ai.backend.common.data.endpoint.types import EndpointLifecycle, ScalingState +from ai.backend.common.data.model_deployment.types import ModelDeploymentStatus +from ai.backend.common.identifier.deployment import DeploymentID +from ai.backend.common.identifier.deployment_revision import DeploymentRevisionID +from ai.backend.common.types import ClusterMode, ResourceSlot +from ai.backend.manager.data.deployment.types import ( + ClusterConfigData, + DeploymentOptions, + ModelMountConfigData, + ModelRevisionData, + ModelRuntimeConfigData, + ResourceConfigData, +) +from ai.backend.manager.models.endpoint.row import EndpointRow + + +def _stub_endpoint( + *, + current_revision: DeploymentRevisionID | None, + deploying_revision: DeploymentRevisionID | None, + revisions: list[Any], +) -> Any: + """Build a non-DB stub that satisfies ``to_model_deployment_data`` reads. + + The method is a pure projection — instantiating a real ``EndpointRow`` + against the DB schema would force a session and cascade test setup that + has nothing to do with the projection logic, so a ``MagicMock`` carrying + the columns the method touches is sufficient. + """ + stub = MagicMock() + stub.id = DeploymentID(uuid.uuid4()) + stub.name = "test-deployment" + stub.lifecycle_stage = EndpointLifecycle.DEPLOYING + stub.tag = None + stub.project = uuid.uuid4() + stub.domain = "default" + stub.created_at = datetime(2024, 1, 1, tzinfo=UTC) + stub.open_to_public = False + stub.url = None + stub.current_revision = current_revision + stub.deploying_revision = deploying_revision + stub.revisions = revisions + stub.replicas = 1 + stub.desired_replicas = None + stub.deployment_policy = None + stub.created_user = uuid.uuid4() + stub.options = DeploymentOptions() + stub.scaling_state = ScalingState.STABLE + stub.sub_step = None + return stub + + +def _stub_revision(rev_id: DeploymentRevisionID) -> Any: + """Build a ``DeploymentRevisionRow`` stub whose ``to_data()`` returns a + minimal, well-formed ``ModelRevisionData``. + """ + rev = MagicMock() + rev.id = rev_id + rev.to_data.return_value = ModelRevisionData( + id=rev_id, + cluster_config=ClusterConfigData(mode=ClusterMode.SINGLE_NODE, size=1), + resource_config=ResourceConfigData( + resource_group_name="default", + resource_slot=ResourceSlot({}), + ), + model_runtime_config=ModelRuntimeConfigData( + runtime_variant_id=uuid.uuid4(), # type: ignore[arg-type] + ), + model_mount_config=ModelMountConfigData( + vfolder_id=None, + mount_destination="/models", + definition_path="model-definition.yml", + ), + created_at=datetime(2024, 1, 1, tzinfo=UTC), + image_id=None, + ) + return rev + + +class TestEndpointRowToModelDeploymentData: + """Pin the API-facing projection to direct DB columns (BA-5979). + + The previous code path matched the current revision via + ``info.model_revisions[0]``, which non-deterministically swapped + ``current_revision_id`` with ``deploying_revision_id`` when Postgres + returned the relationship rows in a different physical order. The new + method must source IDs from ``EndpointRow`` columns directly so the + revision-row order is irrelevant. + """ + + def test_current_revision_resolved_by_id_match_not_list_order(self) -> None: + current_id = DeploymentRevisionID(uuid.uuid4()) + deploying_id = DeploymentRevisionID(uuid.uuid4()) + current_rev = _stub_revision(current_id) + deploying_rev = _stub_revision(deploying_id) + + # Deploying revision listed first — the natural-failure case for the + # old ``[0]``-blind lookup. + endpoint = _stub_endpoint( + current_revision=current_id, + deploying_revision=deploying_id, + revisions=[deploying_rev, current_rev], + ) + + data = EndpointRow.to_model_deployment_data(endpoint) + + assert data.current_revision_id == current_id + assert data.deploying_revision_id == deploying_id + assert data.current_revision_id != data.deploying_revision_id + assert data.revision is not None + assert data.revision.id == current_id + + def test_current_revision_id_survives_dangling_reference(self) -> None: + """If the revision row was deleted but the column still points to it, + surface the column ID and report the spec as ``None`` — do not collapse + ``current_revision_id`` to ``None`` together with the missing spec. + """ + dangling_id = DeploymentRevisionID(uuid.uuid4()) + + endpoint = _stub_endpoint( + current_revision=dangling_id, + deploying_revision=None, + revisions=[], + ) + + data = EndpointRow.to_model_deployment_data(endpoint) + + assert data.current_revision_id == dangling_id + assert data.deploying_revision_id is None + assert data.revision is None + + def test_lifecycle_status_mapping(self) -> None: + """Status surface follows the lifecycle column directly.""" + endpoint = _stub_endpoint( + current_revision=None, + deploying_revision=None, + revisions=[], + ) + endpoint.lifecycle_stage = EndpointLifecycle.READY + + data = EndpointRow.to_model_deployment_data(endpoint) + + assert data.metadata.status == ModelDeploymentStatus.READY diff --git a/tests/unit/manager/services/deployment/test_deployment_service.py b/tests/unit/manager/services/deployment/test_deployment_service.py index 6fc9d4c58a2..aacc3574167 100644 --- a/tests/unit/manager/services/deployment/test_deployment_service.py +++ b/tests/unit/manager/services/deployment/test_deployment_service.py @@ -7,7 +7,6 @@ from __future__ import annotations import uuid -from collections.abc import Callable from datetime import UTC, datetime from typing import cast from unittest.mock import AsyncMock, MagicMock @@ -22,7 +21,6 @@ ) from ai.backend.common.dto.manager.v2.deployment.types import IntOrPercent from ai.backend.common.identifier.deployment import DeploymentID -from ai.backend.common.identifier.deployment_revision import DeploymentRevisionID from ai.backend.common.identifier.image import ImageID from ai.backend.common.identifier.runtime_variant import RuntimeVariantID from ai.backend.common.identifier.vfolder import VFolderUUID @@ -52,9 +50,7 @@ ExecutionSpec, ModelMountConfigData, ModelRevisionData, - ModelRevisionSpec, ModelRuntimeConfigData, - MountMetadata, ReplicaSpec, ResourceConfigData, ResourceSpec, @@ -82,7 +78,6 @@ from ai.backend.manager.services.deployment.processors import DeploymentProcessors from ai.backend.manager.services.deployment.service import ( DeploymentService, - _convert_deployment_info_to_data, ) from ai.backend.manager.sokovan.deployment import DeploymentController @@ -641,72 +636,3 @@ async def test_persists_coordinator_jwt_instead_of_random( creator = cast(RBACEntityCreator[object], repo_call.args[0]) spec = cast(EndpointTokenCreatorSpec, creator.spec) assert spec.token == sample_coordinator_jwt - - -class TestConvertDeploymentInfoToData: - """Regression test for ``_convert_deployment_info_to_data`` (BA-5963).""" - - @pytest.fixture - def make_revision_spec(self) -> Callable[[], ModelRevisionSpec]: - def make() -> ModelRevisionSpec: - return ModelRevisionSpec( - revision_id=DeploymentRevisionID(uuid.uuid4()), - image_id=ImageID(uuid.uuid4()), - resource_spec=ResourceSpec( - cluster_mode=ClusterMode.SINGLE_NODE, - cluster_size=1, - resource_slots={"cpu": "1"}, - ), - mounts=MountMetadata( - model_vfolder_id=VFolderUUID(uuid.uuid4()), - model_definition_path="model-definition.yaml", - model_mount_destination="/models", - extra_mounts=[], - ), - execution=ExecutionSpec( - runtime_variant_id=RuntimeVariantID(uuid.uuid4()), - ), - ) - - return make - - def test_current_revision_resolved_by_id_match_not_list_order( - self, - make_revision_spec: Callable[[], ModelRevisionSpec], - ) -> None: - """Pin: revision lookup must use explicit ``current_revision_id``, not list[0].""" - deploying_spec = make_revision_spec() - current_spec = make_revision_spec() - - deployment_info = DeploymentInfo( - id=DeploymentID(uuid.uuid4()), - metadata=DeploymentMetadata( - name="ba5963-test", - domain="default", - project=uuid.uuid4(), - resource_group="default", - created_user=uuid.uuid4(), - session_owner=uuid.uuid4(), - created_at=datetime(2024, 1, 1, tzinfo=UTC), - revision_history_limit=10, - ), - state=DeploymentState( - lifecycle=EndpointLifecycle.DEPLOYING, - scaling_state=ScalingState.STABLE, - retry_count=0, - ), - replica_spec=ReplicaSpec(replica_count=1), - network=DeploymentNetworkSpec(open_to_public=False), - model_revisions=[deploying_spec, current_spec], - options=DeploymentOptions(), - current_revision_id=current_spec.revision_id, - deploying_revision_id=deploying_spec.revision_id, - ) - - deployment_data = _convert_deployment_info_to_data(deployment_info) - - assert deployment_data.current_revision_id == current_spec.revision_id - assert deployment_data.deploying_revision_id == deploying_spec.revision_id - assert deployment_data.current_revision_id != deployment_data.deploying_revision_id - assert deployment_data.revision is not None - assert deployment_data.revision.id == current_spec.revision_id From 9f0b30ffe299fa9f5d621c07fd5b623ecf765669 Mon Sep 17 00:00:00 2001 From: jopemachine Date: Fri, 8 May 2026 15:52:11 +0900 Subject: [PATCH 2/8] chore: add news fragment for BA-5979 --- changes/11522.enhance.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changes/11522.enhance.md diff --git a/changes/11522.enhance.md b/changes/11522.enhance.md new file mode 100644 index 00000000000..21dc0d7e279 --- /dev/null +++ b/changes/11522.enhance.md @@ -0,0 +1 @@ +Project ``EndpointRow`` directly to ``ModelDeploymentData`` for the API path so deployment responses no longer pass through a fragile ``DeploymentInfo`` conversion, and split no-scope deployment reads into a dedicated admin repository/service/processor. From 979b084f1465778fab1c5da56a1bac4868a1102e Mon Sep 17 00:00:00 2001 From: jopemachine Date: Fri, 8 May 2026 16:11:33 +0900 Subject: [PATCH 3/8] refactor: restore legacy DeploymentInfo path under SearchLegacyDeploymentsAction Walks back the over-eager rewiring from the previous BA-5979 commit: - Restore ``_convert_deployment_info_to_data`` (and its ``_map_lifecycle_to_status`` helper) on ``DeploymentService`` for the legacy projection path. - Rename the action to ``SearchLegacyDeploymentsAction`` (file ``services/deployment/actions/search_legacy_deployments.py``) so the legacy intent is explicit; keep the original ``search_endpoints`` -> ``DeploymentInfo`` -> converter -> ``ModelDeploymentData`` pipeline intact in the service handler. - Adapter ``my_search`` / ``project_search`` and the GraphQL ``batch_load_by_ids`` go back to the regular processor's ``search_legacy_deployments``; only ``admin_search`` stays on the new ``DeploymentAdminProcessors.admin_search_deployments``. - The legacy v1 REST handler and ``tree.py`` are reverted to ``origin/main`` except for the import / call-name update forced by the action rename. - Drop the now-unused ``search_model_deployments`` helper from ``DeploymentRepository`` so the no-scope query is owned by ``DeploymentAdminRepository`` alone. --- .../api/adapters/deployment/adapter.py | 20 ++- .../manager/api/rest/deployment/handler.py | 17 +- src/ai/backend/manager/api/rest/tree.py | 1 - .../actions/search_legacy_deployments.py | 44 +++++ .../manager/services/deployment/processors.py | 11 ++ .../manager/services/deployment/service.py | 150 ++++++++++++++++++ 6 files changed, 220 insertions(+), 23 deletions(-) create mode 100644 src/ai/backend/manager/services/deployment/actions/search_legacy_deployments.py diff --git a/src/ai/backend/manager/api/adapters/deployment/adapter.py b/src/ai/backend/manager/api/adapters/deployment/adapter.py index fa6cf9e0c52..c31d8739568 100644 --- a/src/ai/backend/manager/api/adapters/deployment/adapter.py +++ b/src/ai/backend/manager/api/adapters/deployment/adapter.py @@ -313,6 +313,9 @@ from ai.backend.manager.services.deployment.actions.route.update_route_traffic_status import ( UpdateRouteTrafficStatusAction, ) +from ai.backend.manager.services.deployment.actions.search_legacy_deployments import ( + SearchLegacyDeploymentsAction, +) from ai.backend.manager.services.deployment.actions.search_replicas import SearchReplicasAction from ai.backend.manager.services.deployment.actions.sync_replicas import SyncReplicaAction from ai.backend.manager.services.deployment.actions.update_deployment import UpdateDeploymentAction @@ -621,8 +624,8 @@ def _by_created_user() -> sa.sql.expression.ColumnElement[bool]: base_conditions=[_by_created_user], ) action_result = ( - await self._processors.deployment_admin.admin_search_deployments.wait_for_complete( - AdminSearchDeploymentsAction(querier=querier) + await self._processors.deployment.search_legacy_deployments.wait_for_complete( + SearchLegacyDeploymentsAction(querier=querier) ) ) return AdminSearchDeploymentsPayload( @@ -661,8 +664,8 @@ def _by_project_id() -> sa.sql.expression.ColumnElement[bool]: base_conditions=[_by_project_id], ) action_result = ( - await self._processors.deployment_admin.admin_search_deployments.wait_for_complete( - AdminSearchDeploymentsAction(querier=querier) + await self._processors.deployment.search_legacy_deployments.wait_for_complete( + SearchLegacyDeploymentsAction(querier=querier) ) ) return AdminSearchDeploymentsPayload( @@ -1290,11 +1293,6 @@ async def batch_load_by_ids( ) -> list[DeploymentNode | None]: """Batch load deployments by ID for DataLoader use. - Routed through the admin (no-scope) processor — DataLoader runs - under an already-authorised parent query so cross-scope filtering - is unnecessary; the ``by_ids`` condition is itself the bound on - what gets returned. - Returns ``DeploymentNode`` DTOs in the same order as the input ``deployment_ids`` list; missing IDs come back as ``None``. """ @@ -1305,8 +1303,8 @@ async def batch_load_by_ids( conditions=[DeploymentConditions.by_ids(deployment_ids)], ) action_result = ( - await self._processors.deployment_admin.admin_search_deployments.wait_for_complete( - AdminSearchDeploymentsAction(querier=querier) + await self._processors.deployment.search_legacy_deployments.wait_for_complete( + SearchLegacyDeploymentsAction(querier=querier) ) ) deployment_map = { diff --git a/src/ai/backend/manager/api/rest/deployment/handler.py b/src/ai/backend/manager/api/rest/deployment/handler.py index 6e235e59dc6..060bec8aca0 100644 --- a/src/ai/backend/manager/api/rest/deployment/handler.py +++ b/src/ai/backend/manager/api/rest/deployment/handler.py @@ -57,9 +57,6 @@ DeploymentUpdaterSpec, ReplicaSpecUpdaterSpec, ) -from ai.backend.manager.services.deployment.actions.admin_search_deployments import ( - AdminSearchDeploymentsAction, -) from ai.backend.manager.services.deployment.actions.create_deployment import ( CreateDeploymentAction, ) @@ -91,13 +88,13 @@ SearchRoutesAction, UpdateRouteTrafficStatusAction, ) +from ai.backend.manager.services.deployment.actions.search_legacy_deployments import ( + SearchLegacyDeploymentsAction, +) from ai.backend.manager.services.deployment.actions.update_deployment import ( UpdateDeploymentAction, ) -from ai.backend.manager.services.deployment.processors import ( - DeploymentAdminProcessors, - DeploymentProcessors, -) +from ai.backend.manager.services.deployment.processors import DeploymentProcessors from ai.backend.manager.types import OptionalState from .adapter import ( @@ -117,11 +114,9 @@ def __init__( self, *, deployment: DeploymentProcessors, - deployment_admin: DeploymentAdminProcessors, runtime_variant_adapter: RuntimeVariantAdapter, ) -> None: self._deployment = deployment - self._deployment_admin = deployment_admin # ``RuntimeVariantAdapter`` supplies the id↔name bridge used by # this legacy REST handler: incoming requests arrive with a # name string (legacy contract) but the internal chain is @@ -205,8 +200,8 @@ async def search_deployments( querier = self._deployment_adapter.build_querier(body.parsed) # Call service action - action_result = await self._deployment_admin.admin_search_deployments.wait_for_complete( - AdminSearchDeploymentsAction(querier=querier) + action_result = await self._deployment.search_legacy_deployments.wait_for_complete( + SearchLegacyDeploymentsAction(querier=querier) ) # Build response diff --git a/src/ai/backend/manager/api/rest/tree.py b/src/ai/backend/manager/api/rest/tree.py index b5d5f864eab..54beea3d142 100644 --- a/src/ai/backend/manager/api/rest/tree.py +++ b/src/ai/backend/manager/api/rest/tree.py @@ -166,7 +166,6 @@ def build_api_routes( ) deployment_handler = DeploymentAPIHandler( deployment=processors.deployment, - deployment_admin=processors.deployment_admin, runtime_variant_adapter=adapters.runtime_variant, ) domainconfig_handler = DomainConfigHandler(dotfile=processors.dotfile) diff --git a/src/ai/backend/manager/services/deployment/actions/search_legacy_deployments.py b/src/ai/backend/manager/services/deployment/actions/search_legacy_deployments.py new file mode 100644 index 00000000000..4d23fcf9186 --- /dev/null +++ b/src/ai/backend/manager/services/deployment/actions/search_legacy_deployments.py @@ -0,0 +1,44 @@ +from dataclasses import dataclass +from typing import override + +from ai.backend.manager.actions.action import BaseActionResult +from ai.backend.manager.actions.types import ActionOperationType +from ai.backend.manager.data.deployment.types import ModelDeploymentData +from ai.backend.manager.repositories.base import BatchQuerier +from ai.backend.manager.services.deployment.actions.base import DeploymentBaseAction + + +@dataclass +class SearchLegacyDeploymentsAction(DeploymentBaseAction): + """Legacy no-scope deployment search. + + Backs the v1 REST handler's ``GET /v3/services`` and the v2 adapter's + scoped (``my_search`` / ``project_search``) paths that piggyback their + scope filter onto a no-scope querier. New admin call sites should use + :class:`AdminSearchDeploymentsAction` (routed through + ``DeploymentAdminProcessors``) instead — this action is preserved so the + legacy contract stays intact. + """ + + querier: BatchQuerier + + @override + def entity_id(self) -> str | None: + return None + + @override + @classmethod + def operation_type(cls) -> ActionOperationType: + return ActionOperationType.SEARCH + + +@dataclass +class SearchLegacyDeploymentsActionResult(BaseActionResult): + data: list[ModelDeploymentData] + total_count: int + has_next_page: bool + has_previous_page: bool + + @override + def entity_id(self) -> str | None: + return None diff --git a/src/ai/backend/manager/services/deployment/processors.py b/src/ai/backend/manager/services/deployment/processors.py index 780de2a7b5e..805a2da547e 100644 --- a/src/ai/backend/manager/services/deployment/processors.py +++ b/src/ai/backend/manager/services/deployment/processors.py @@ -124,6 +124,10 @@ SearchDeploymentsInProjectAction, SearchDeploymentsInProjectActionResult, ) +from ai.backend.manager.services.deployment.actions.search_legacy_deployments import ( + SearchLegacyDeploymentsAction, + SearchLegacyDeploymentsActionResult, +) from ai.backend.manager.services.deployment.actions.search_replicas import ( SearchReplicasAction, SearchReplicasActionResult, @@ -159,6 +163,9 @@ class DeploymentProcessors(AbstractProcessorPackage): destroy_deployment: SingleEntityActionProcessor[ DestroyDeploymentAction, DestroyDeploymentActionResult ] + search_legacy_deployments: ActionProcessor[ + SearchLegacyDeploymentsAction, SearchLegacyDeploymentsActionResult + ] search_deployments_in_project: ActionProcessor[ SearchDeploymentsInProjectAction, SearchDeploymentsInProjectActionResult ] @@ -248,6 +255,9 @@ def __init__( self.destroy_deployment = SingleEntityActionProcessor( service.destroy_deployment, action_monitors, validators=rbac_single_entity_validators ) + self.search_legacy_deployments = ActionProcessor( + service.search_legacy_deployments, action_monitors + ) self.search_deployments_in_project = ActionProcessor( service.search_deployments_in_project, action_monitors, @@ -323,6 +333,7 @@ def supported_actions(self) -> list[ActionSpec]: UpdateDeploymentAction.spec(), ReplaceDeploymentOptionsAction.spec(), DestroyDeploymentAction.spec(), + SearchLegacyDeploymentsAction.spec(), SearchDeploymentsInProjectAction.spec(), GetDeploymentByIdAction.spec(), GetDeploymentPolicyAction.spec(), diff --git a/src/ai/backend/manager/services/deployment/service.py b/src/ai/backend/manager/services/deployment/service.py index aa42681a6fc..cc7fb308b4d 100644 --- a/src/ai/backend/manager/services/deployment/service.py +++ b/src/ai/backend/manager/services/deployment/service.py @@ -8,7 +8,9 @@ from ai.backend.common.data.endpoint.types import EndpointLifecycle from ai.backend.common.data.model_deployment.types import ( ActivenessStatus, + DeploymentStrategy, LivenessStatus, + ModelDeploymentStatus, ReadinessStatus, ) from ai.backend.common.data.permission.types import RBACElementType @@ -16,6 +18,7 @@ MintEndpointTokenRequest, ) from ai.backend.common.identifier.deployment import DeploymentID +from ai.backend.common.types import ResourceSlot from ai.backend.logging.utils import BraceStyleAdapter from ai.backend.manager.clients.appproxy.client import AppProxyClientPool from ai.backend.manager.data.deployment.creator import ( @@ -23,9 +26,18 @@ VFolderMountsCreator, ) from ai.backend.manager.data.deployment.types import ( + ClusterConfigData, + DeploymentInfo, ModelDeploymentAccessTokenData, + ModelDeploymentData, + ModelDeploymentMetadataInfo, + ModelMountConfigData, ModelReplicaData, + ModelRevisionData, ModelRevisionSpec, + ModelRuntimeConfigData, + ReplicaStateData, + ResourceConfigData, RevisionRefreshResult, RouteHealthStatus, RouteInfo, @@ -168,6 +180,10 @@ SearchDeploymentsInProjectAction, SearchDeploymentsInProjectActionResult, ) +from ai.backend.manager.services.deployment.actions.search_legacy_deployments import ( + SearchLegacyDeploymentsAction, + SearchLegacyDeploymentsActionResult, +) from ai.backend.manager.services.deployment.actions.search_replicas import ( SearchReplicasAction, SearchReplicasActionResult, @@ -186,6 +202,117 @@ log = BraceStyleAdapter(logging.getLogger(__name__)) +def _map_lifecycle_to_status(lifecycle: EndpointLifecycle) -> ModelDeploymentStatus: + """Map EndpointLifecycle to ModelDeploymentStatus for the v2 status surface. + + Kept private to this module — only ``_convert_deployment_info_to_data`` (the + legacy ``DeploymentInfo``-based projection used by ``search_legacy_deployments``) + relies on it. The v2 path projects ``EndpointRow`` directly via + ``_lifecycle_to_status`` defined in ``models/endpoint/row.py``. + """ + match lifecycle: + case EndpointLifecycle.PENDING | EndpointLifecycle.CREATED: + return ModelDeploymentStatus.PENDING + case EndpointLifecycle.READY | EndpointLifecycle.SCALING: + return ModelDeploymentStatus.READY + case EndpointLifecycle.DEPLOYING: + return ModelDeploymentStatus.DEPLOYING + case EndpointLifecycle.DESTROYING: + return ModelDeploymentStatus.STOPPING + case EndpointLifecycle.DESTROYED: + return ModelDeploymentStatus.STOPPED + + +def _convert_deployment_info_to_data(info: DeploymentInfo) -> ModelDeploymentData: + """Project a ``DeploymentInfo`` to ``ModelDeploymentData`` for legacy callers. + + Used only by ``DeploymentService.search_legacy_deployments`` (v1 REST and the + v2 adapter's scoped paths that piggyback their scope filter onto a no-scope + querier). The v2 service paths now project from ``EndpointRow`` directly via + ``EndpointRow.to_model_deployment_data`` and do not pass through this helper. + + Note: Some fields are set to placeholder defaults because ``DeploymentInfo`` + does not carry them. + """ + revision: ModelRevisionData | None = None + rev: ModelRevisionSpec | None = None + if info.current_revision_id is not None: + rev = next( + (r for r in info.model_revisions if r.revision_id == info.current_revision_id), + None, + ) + if rev is None: + log.error( + "Deployment {} has current_revision_id {} but no matching " + "ModelRevisionSpec was found in DeploymentInfo.model_revisions; " + "current_revision will be reported as null. This usually means " + "EndpointRow.revisions was not eagerly loaded by the caller.", + info.id, + info.current_revision_id, + ) + if rev is not None: + if rev.revision_id is None: + raise ValueError(f"ModelRevisionSpec has no revision_id for deployment {info.id}") + revision = ModelRevisionData( + id=rev.revision_id, + cluster_config=ClusterConfigData( + mode=rev.resource_spec.cluster_mode, + size=rev.resource_spec.cluster_size, + ), + resource_config=ResourceConfigData( + resource_group_name=info.metadata.resource_group, + resource_slot=ResourceSlot.from_json(rev.resource_spec.resource_slots), + ), + model_mount_config=ModelMountConfigData( + vfolder_id=rev.mounts.model_vfolder_id, + mount_destination=rev.mounts.model_mount_destination, + definition_path=rev.mounts.model_definition_path or "", + ), + model_runtime_config=ModelRuntimeConfigData( + runtime_variant_id=rev.execution.runtime_variant_id, + inference_runtime_config=rev.execution.inference_runtime_config or {}, + ), + extra_vfolder_mounts=list(rev.mounts.extra_mounts), + image_id=rev.image_id, + created_at=info.metadata.created_at or datetime.now(UTC), + model_definition=rev.model_definition, + revision_preset_id=rev.revision_preset_id, + ) + + desired_count = info.replica_spec.desired_replica_count + if desired_count is None: + desired_count = info.replica_spec.replica_count + + return ModelDeploymentData( + id=info.id, + metadata=ModelDeploymentMetadataInfo( + name=info.metadata.name, + status=_map_lifecycle_to_status(info.state.lifecycle), + tags=[info.metadata.tag] if info.metadata.tag else [], + project_id=info.metadata.project, + domain_name=info.metadata.domain, + created_at=info.metadata.created_at or datetime.now(UTC), + updated_at=info.metadata.created_at or datetime.now(UTC), + ), + network_access=info.network, + revision_history_ids=[info.current_revision_id] if info.current_revision_id else [], + revision=revision, + current_revision_id=info.current_revision_id, + deploying_revision_id=info.deploying_revision_id, + scaling_rule_ids=[], # Not available in DeploymentInfo + replica_state=ReplicaStateData( + desired_replica_count=desired_count, + replica_ids=[], # Not available in DeploymentInfo + ), + default_deployment_strategy=DeploymentStrategy.ROLLING, + created_user_id=info.metadata.created_user, + options=info.options, + scaling_state=info.state.scaling_state, + policy=info.policy, + sub_step=info.sub_step, + ) + + _HEALTH_STATUS_TO_READINESS: dict[RouteHealthStatus, ReadinessStatus] = { RouteHealthStatus.HEALTHY: ReadinessStatus.HEALTHY, RouteHealthStatus.UNHEALTHY: ReadinessStatus.UNHEALTHY, @@ -399,6 +526,29 @@ async def destroy_deployment( await self._deployment_controller.mark_lifecycle_needed(DeploymentLifecycleType.DESTROYING) return DestroyDeploymentActionResult(success=success) + async def search_legacy_deployments( + self, action: SearchLegacyDeploymentsAction + ) -> SearchLegacyDeploymentsActionResult: + """Legacy no-scope deployment search. + + Preserves the pre-refactor projection path — + ``DeploymentRepository.search_endpoints`` returns ``DeploymentInfo`` + items, which are then mapped to ``ModelDeploymentData`` via + ``_convert_deployment_info_to_data``. Backs the v1 REST handler's + ``GET /v3/services`` and the v2 adapter's scoped (``my_search`` / + ``project_search``) paths that piggyback their scope filter onto a + no-scope querier. New admin call sites should go through + :class:`DeploymentAdminService` instead. + """ + result = await self._deployment_repository.search_endpoints(action.querier) + deployments = [_convert_deployment_info_to_data(info) for info in result.items] + return SearchLegacyDeploymentsActionResult( + data=deployments, + total_count=result.total_count, + has_next_page=result.has_next_page, + has_previous_page=result.has_previous_page, + ) + async def search_deployments_in_project( self, action: SearchDeploymentsInProjectAction ) -> SearchDeploymentsInProjectActionResult: From 0e5d3dff24a77a596eeb22759769751b704d0a49 Mon Sep 17 00:00:00 2001 From: jopemachine Date: Fri, 8 May 2026 16:24:43 +0900 Subject: [PATCH 4/8] refactor: move EndpointRow -> ModelDeploymentData projection out of the model layer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous commit attached ``to_model_deployment_data`` and the ``_lifecycle_to_status`` helper to ``EndpointRow``, but the conversion is purely an adapter between the ORM row and the API-shaped data type — that is not a model-layer concern. Move both into ``repositories/deployment/db_source/db_source.py`` as private free functions (``_endpoint_row_to_model_deployment_data`` and ``_lifecycle_to_status``); ``get_model_deployment_data`` and ``search_model_deployments`` now call the helper directly. The companion unit test moves to ``tests/unit/manager/repositories/deployment/test_endpoint_projection.py`` to match the new home, and the component test reverts to importing ``_map_lifecycle_to_status`` from ``services/deployment/service.py`` (its origin/main location). --- src/ai/backend/manager/models/endpoint/row.py | 103 ------------------ .../deployment/db_source/db_source.py | 101 ++++++++++++++++- tests/component/deployment/test_deployment.py | 4 +- .../deployment/test_endpoint_projection.py} | 20 ++-- 4 files changed, 112 insertions(+), 116 deletions(-) rename tests/unit/manager/{models/test_endpoint_row.py => repositories/deployment/test_endpoint_projection.py} (88%) diff --git a/src/ai/backend/manager/models/endpoint/row.py b/src/ai/backend/manager/models/endpoint/row.py index 050648bdefd..a6cb7b70908 100644 --- a/src/ai/backend/manager/models/endpoint/row.py +++ b/src/ai/backend/manager/models/endpoint/row.py @@ -34,10 +34,6 @@ ) from ai.backend.common.config import model_definition_iv -from ai.backend.common.data.model_deployment.types import ( - DeploymentStrategy, - ModelDeploymentStatus, -) from ai.backend.common.identifier.deployment import DeploymentID from ai.backend.common.identifier.deployment_revision import DeploymentRevisionID from ai.backend.common.identifier.project import ProjectID @@ -76,12 +72,8 @@ DeploymentState, DeploymentSummaryData, ModelDeploymentAutoScalingRuleData, - ModelDeploymentData, - ModelDeploymentMetadataInfo, - ModelRevisionData, ModelRevisionSpec, ReplicaSpec, - ReplicaStateData, ) from ai.backend.manager.data.model_serving.types import ( EndpointAutoScalingRuleData, @@ -117,27 +109,6 @@ from ai.backend.manager.models.user import UserRow -def _lifecycle_to_status(lifecycle: EndpointLifecycle) -> ModelDeploymentStatus: - """Map the persisted ``EndpointLifecycle`` to the v2 ``ModelDeploymentStatus``. - - The lifecycle axis is monotonic (PENDING → DEPLOYING → READY → DESTROYING - → DESTROYED); v2 exposes replica reconciliation as the orthogonal - ``scaling_state`` field, so legacy ``SCALING`` folds into ``READY`` and - legacy ``CREATED`` (never-deployed) folds into ``PENDING``. - """ - match lifecycle: - case EndpointLifecycle.PENDING | EndpointLifecycle.CREATED: - return ModelDeploymentStatus.PENDING - case EndpointLifecycle.READY | EndpointLifecycle.SCALING: - return ModelDeploymentStatus.READY - case EndpointLifecycle.DEPLOYING: - return ModelDeploymentStatus.DEPLOYING - case EndpointLifecycle.DESTROYING: - return ModelDeploymentStatus.STOPPING - case EndpointLifecycle.DESTROYED: - return ModelDeploymentStatus.STOPPED - - __all__ = ( "EndpointAutoScalingRuleRow", "EndpointLifecycle", @@ -757,80 +728,6 @@ def from_deployment_creator( revision_history_limit=creator.metadata.revision_history_limit, ) - def to_model_deployment_data(self) -> ModelDeploymentData: - """Project this row directly to a ``ModelDeploymentData`` for API responses. - - This bypasses the ``DeploymentInfo`` intermediate so the revision-id - columns flow through the API path unchanged from the DB. Eager-load - requirements: ``EndpointRow.revisions`` (with - ``DeploymentRevisionRow.image_row``) and ``EndpointRow.deployment_policy``. - - ``current_revision_id`` and ``deploying_revision_id`` are read from - the row columns directly. The ``revision`` spec is resolved by - explicit match against ``current_revision``; if no matching row is - loaded (dangling reference, or caller forgot to eager-load - ``revisions``), ``revision`` is ``None`` while ``current_revision_id`` - still surfaces the column value — callers can act on the ID even when - the joined spec is unavailable. - """ - revision: ModelRevisionData | None = None - if self.current_revision is not None: - match = next( - (r for r in self.revisions if r.id == self.current_revision), - None, - ) - if match is None: - log.error( - "Deployment {} has current_revision_id {} but no matching " - "DeploymentRevisionRow was found among loaded revisions; " - "the revision spec will be reported as null. This usually " - "means EndpointRow.revisions was not eagerly loaded by the " - "caller.", - self.id, - self.current_revision, - ) - else: - revision = match.to_data() - - desired_count = ( - self.desired_replicas if self.desired_replicas is not None else self.replicas - ) - policy_data = ( - self.deployment_policy.to_data() if self.deployment_policy is not None else None - ) - - return ModelDeploymentData( - id=self.id, - metadata=ModelDeploymentMetadataInfo( - name=self.name, - status=_lifecycle_to_status(self.lifecycle_stage), - tags=[self.tag] if self.tag else [], - project_id=self.project, - domain_name=self.domain, - created_at=self.created_at, - updated_at=self.created_at, - ), - network_access=DeploymentNetworkSpec( - open_to_public=self.open_to_public if self.open_to_public is not None else False, - url=self.url, - ), - revision_history_ids=[self.current_revision] if self.current_revision else [], - revision=revision, - current_revision_id=self.current_revision, - deploying_revision_id=self.deploying_revision, - scaling_rule_ids=[], - replica_state=ReplicaStateData( - desired_replica_count=desired_count, - replica_ids=[], - ), - default_deployment_strategy=DeploymentStrategy.ROLLING, - created_user_id=self.created_user, - options=self.options, - scaling_state=self.scaling_state, - policy=policy_data, - sub_step=self.sub_step, - ) - def to_deployment_info(self) -> DeploymentInfo: """Convert EndpointRow to DeploymentInfo dataclass using revision data.""" policy_data = None diff --git a/src/ai/backend/manager/repositories/deployment/db_source/db_source.py b/src/ai/backend/manager/repositories/deployment/db_source/db_source.py index 1485188b4e7..1c13d455350 100644 --- a/src/ai/backend/manager/repositories/deployment/db_source/db_source.py +++ b/src/ai/backend/manager/repositories/deployment/db_source/db_source.py @@ -20,6 +20,10 @@ from ai.backend.common.config import ModelHealthCheck from ai.backend.common.data.endpoint.types import EndpointLifecycle +from ai.backend.common.data.model_deployment.types import ( + DeploymentStrategy, + ModelDeploymentStatus, +) from ai.backend.common.data.permission.types import RBACElementType from ai.backend.common.dto.manager.v2.runtime_variant_preset.types import ( PresetTarget, @@ -59,6 +63,7 @@ DeploymentInfoWithAutoScalingRules, DeploymentLastHistory, DeploymentLifecycleSubStep, + DeploymentNetworkSpec, DeploymentOptions, DeploymentPolicyData, DeploymentPolicySearchResult, @@ -71,8 +76,10 @@ ModelDeploymentAutoScalingRuleData, ModelDeploymentData, ModelDeploymentDataSearchResult, + ModelDeploymentMetadataInfo, ModelRevisionData, ModelRevisionSpec, + ReplicaStateData, RevisionSearchResult, RouteHealthStatus, RouteInfo, @@ -205,6 +212,96 @@ class EndpointWithRoutesRawData: log = BraceStyleAdapter(logging.getLogger(__name__)) +def _lifecycle_to_status(lifecycle: EndpointLifecycle) -> ModelDeploymentStatus: + """Map the persisted ``EndpointLifecycle`` to the v2 ``ModelDeploymentStatus``. + + Private to this module — used by ``_endpoint_row_to_model_deployment_data``. + The legacy projection (``DeploymentInfo`` -> ``ModelDeploymentData``) + keeps its own copy in ``services/deployment/service.py``. + """ + match lifecycle: + case EndpointLifecycle.PENDING | EndpointLifecycle.CREATED: + return ModelDeploymentStatus.PENDING + case EndpointLifecycle.READY | EndpointLifecycle.SCALING: + return ModelDeploymentStatus.READY + case EndpointLifecycle.DEPLOYING: + return ModelDeploymentStatus.DEPLOYING + case EndpointLifecycle.DESTROYING: + return ModelDeploymentStatus.STOPPING + case EndpointLifecycle.DESTROYED: + return ModelDeploymentStatus.STOPPED + + +def _endpoint_row_to_model_deployment_data(row: EndpointRow) -> ModelDeploymentData: + """Project an ``EndpointRow`` straight to the API-shaped ``ModelDeploymentData``. + + Lives at the repository boundary because the conversion is purely an + adapter between the ORM row and the API data type — neither the model + layer (which must stay free of API-shaped projections) nor the service + layer (which must not touch ORM rows) is the right home for it. + + Eager-load requirements: ``EndpointRow.revisions`` (with + ``DeploymentRevisionRow.image_row``) and ``EndpointRow.deployment_policy``. + + ``current_revision_id`` and ``deploying_revision_id`` are read from the + row columns directly. The ``revision`` spec is resolved by explicit match + against ``current_revision``; if no matching row is loaded (dangling + reference, or caller forgot to eager-load ``revisions``), ``revision`` + is ``None`` while ``current_revision_id`` still surfaces the column + value — callers can act on the ID even when the joined spec is missing. + """ + revision: ModelRevisionData | None = None + if row.current_revision is not None: + match = next((r for r in row.revisions if r.id == row.current_revision), None) + if match is None: + log.error( + "Deployment {} has current_revision_id {} but no matching " + "DeploymentRevisionRow was found among loaded revisions; " + "the revision spec will be reported as null. This usually " + "means EndpointRow.revisions was not eagerly loaded by the " + "caller.", + row.id, + row.current_revision, + ) + else: + revision = match.to_data() + + desired_count = row.desired_replicas if row.desired_replicas is not None else row.replicas + policy_data = row.deployment_policy.to_data() if row.deployment_policy is not None else None + + return ModelDeploymentData( + id=row.id, + metadata=ModelDeploymentMetadataInfo( + name=row.name, + status=_lifecycle_to_status(row.lifecycle_stage), + tags=[row.tag] if row.tag else [], + project_id=row.project, + domain_name=row.domain, + created_at=row.created_at, + updated_at=row.created_at, + ), + network_access=DeploymentNetworkSpec( + open_to_public=row.open_to_public if row.open_to_public is not None else False, + url=row.url, + ), + revision_history_ids=[row.current_revision] if row.current_revision else [], + revision=revision, + current_revision_id=row.current_revision, + deploying_revision_id=row.deploying_revision, + scaling_rule_ids=[], + replica_state=ReplicaStateData( + desired_replica_count=desired_count, + replica_ids=[], + ), + default_deployment_strategy=DeploymentStrategy.ROLLING, + created_user_id=row.created_user, + options=row.options, + scaling_state=row.scaling_state, + policy=policy_data, + sub_step=row.sub_step, + ) + + def _project_preset_slots( preset_row: DeploymentRevisionPresetRow | None, slot_entries: list[tuple[str, Decimal]], @@ -437,7 +534,7 @@ async def get_model_deployment_data( if not row: raise EndpointNotFound(f"Endpoint {endpoint_id} not found") - return row.to_model_deployment_data() + return _endpoint_row_to_model_deployment_data(row) async def get_deployments_by_ids( self, @@ -1208,7 +1305,7 @@ async def search_model_deployments( querier, ) - items = [row.EndpointRow.to_model_deployment_data() for row in result.rows] + items = [_endpoint_row_to_model_deployment_data(row.EndpointRow) for row in result.rows] return ModelDeploymentDataSearchResult( items=items, diff --git a/tests/component/deployment/test_deployment.py b/tests/component/deployment/test_deployment.py index e80aee63e7f..f5b1b4673e9 100644 --- a/tests/component/deployment/test_deployment.py +++ b/tests/component/deployment/test_deployment.py @@ -52,8 +52,8 @@ from ai.backend.common.identifier.vfolder import VFolderUUID from ai.backend.common.types import ClusterMode from ai.backend.manager.api.adapters.deployment.adapter import DeploymentAdapter -from ai.backend.manager.models.endpoint.row import _lifecycle_to_status from ai.backend.manager.services.deployment.processors import DeploymentProcessors +from ai.backend.manager.services.deployment.service import _map_lifecycle_to_status from ai.backend.manager.services.processors import Processors if TYPE_CHECKING: @@ -644,7 +644,7 @@ def test_lifecycle_to_status_mapping(self) -> None: } for lifecycle, expected_status in mapping.items(): - actual_status = _lifecycle_to_status(lifecycle) + actual_status = _map_lifecycle_to_status(lifecycle) assert actual_status == expected_status, ( f"EndpointLifecycle.{lifecycle.name} should map to " f"ModelDeploymentStatus.{expected_status.name}, got {actual_status.name}" diff --git a/tests/unit/manager/models/test_endpoint_row.py b/tests/unit/manager/repositories/deployment/test_endpoint_projection.py similarity index 88% rename from tests/unit/manager/models/test_endpoint_row.py rename to tests/unit/manager/repositories/deployment/test_endpoint_projection.py index 39002f8a551..3e1ea2b803c 100644 --- a/tests/unit/manager/models/test_endpoint_row.py +++ b/tests/unit/manager/repositories/deployment/test_endpoint_projection.py @@ -1,4 +1,4 @@ -"""Tests for EndpointRow projection methods.""" +"""Tests for the API-facing ``EndpointRow`` projection at the repository boundary.""" from __future__ import annotations @@ -20,7 +20,9 @@ ModelRuntimeConfigData, ResourceConfigData, ) -from ai.backend.manager.models.endpoint.row import EndpointRow +from ai.backend.manager.repositories.deployment.db_source.db_source import ( + _endpoint_row_to_model_deployment_data, +) def _stub_endpoint( @@ -29,12 +31,12 @@ def _stub_endpoint( deploying_revision: DeploymentRevisionID | None, revisions: list[Any], ) -> Any: - """Build a non-DB stub that satisfies ``to_model_deployment_data`` reads. + """Build a non-DB stub that satisfies the projection helper's reads. - The method is a pure projection — instantiating a real ``EndpointRow`` + The helper is a pure projection — instantiating a real ``EndpointRow`` against the DB schema would force a session and cascade test setup that has nothing to do with the projection logic, so a ``MagicMock`` carrying - the columns the method touches is sufficient. + the columns the helper touches is sufficient. """ stub = MagicMock() stub.id = DeploymentID(uuid.uuid4()) @@ -93,7 +95,7 @@ class TestEndpointRowToModelDeploymentData: ``info.model_revisions[0]``, which non-deterministically swapped ``current_revision_id`` with ``deploying_revision_id`` when Postgres returned the relationship rows in a different physical order. The new - method must source IDs from ``EndpointRow`` columns directly so the + helper must source IDs from ``EndpointRow`` columns directly so the revision-row order is irrelevant. """ @@ -111,7 +113,7 @@ def test_current_revision_resolved_by_id_match_not_list_order(self) -> None: revisions=[deploying_rev, current_rev], ) - data = EndpointRow.to_model_deployment_data(endpoint) + data = _endpoint_row_to_model_deployment_data(endpoint) assert data.current_revision_id == current_id assert data.deploying_revision_id == deploying_id @@ -132,7 +134,7 @@ def test_current_revision_id_survives_dangling_reference(self) -> None: revisions=[], ) - data = EndpointRow.to_model_deployment_data(endpoint) + data = _endpoint_row_to_model_deployment_data(endpoint) assert data.current_revision_id == dangling_id assert data.deploying_revision_id is None @@ -147,6 +149,6 @@ def test_lifecycle_status_mapping(self) -> None: ) endpoint.lifecycle_stage = EndpointLifecycle.READY - data = EndpointRow.to_model_deployment_data(endpoint) + data = _endpoint_row_to_model_deployment_data(endpoint) assert data.metadata.status == ModelDeploymentStatus.READY From e27a8ed7f3566d0256f1ebce61feba30ebe358d3 Mon Sep 17 00:00:00 2001 From: jopemachine Date: Fri, 8 May 2026 16:53:57 +0900 Subject: [PATCH 5/8] refactor: split scoped deployment search into dedicated actions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Following the vfolder convention so v2 paths no longer borrow the ``search_legacy_deployments`` action. - Add ``UserDeploymentSearchScope`` (alongside the existing ``ProjectDeploymentSearchScope``) so the user-scope filter (``EndpointRow.created_user == user_id``) lives at the repository boundary instead of being injected at the adapter via ``base_conditions``. - Add ``SearchUserDeploymentsAction`` and ``SearchProjectDeploymentsAction`` (returning ``ModelDeploymentData``) plus matching service handlers, processor fields, and repository / DB-source methods. The internal scope-name is ``User``; the v2 adapter exposes it as ``my_search``. - Adapter: - ``my_search`` -> ``search_user_deployments`` - ``project_search`` -> ``search_project_deployments`` - ``batch_load_by_ids`` (DataLoader) -> ``admin_search_deployments`` — the ``by_ids`` filter is itself the bound on the result set, so the no-scope admin processor is the right home; this also keeps ``search_legacy_deployments`` reserved for the v1 REST handler. The existing ``SearchDeploymentsInProjectAction`` (returning ``DeploymentSummaryData``) is unchanged — its lighter shape is still what project admin list pages consume. --- .../api/adapters/deployment/adapter.py | 46 ++++++------- .../deployment/db_source/db_source.py | 67 +++++++++++++++++++ .../repositories/deployment/repository.py | 26 ++++++- .../repositories/deployment/types/__init__.py | 2 + .../repositories/deployment/types/endpoint.py | 23 +++++++ .../actions/search_project_deployments.py | 55 +++++++++++++++ .../actions/search_user_deployments.py | 58 ++++++++++++++++ .../manager/services/deployment/processors.py | 26 +++++++ .../manager/services/deployment/service.py | 36 ++++++++++ 9 files changed, 314 insertions(+), 25 deletions(-) create mode 100644 src/ai/backend/manager/services/deployment/actions/search_project_deployments.py create mode 100644 src/ai/backend/manager/services/deployment/actions/search_user_deployments.py diff --git a/src/ai/backend/manager/api/adapters/deployment/adapter.py b/src/ai/backend/manager/api/adapters/deployment/adapter.py index c31d8739568..0f724e502c0 100644 --- a/src/ai/backend/manager/api/adapters/deployment/adapter.py +++ b/src/ai/backend/manager/api/adapters/deployment/adapter.py @@ -9,8 +9,6 @@ from typing import TYPE_CHECKING from uuid import UUID -import sqlalchemy as sa - if TYPE_CHECKING: from ai.backend.manager.services.processors import Processors from ai.backend.manager.sokovan.deployment.coordinator import DeploymentCoordinator @@ -227,6 +225,10 @@ combine_conditions_or, negate_conditions, ) +from ai.backend.manager.repositories.deployment.types import ( + ProjectDeploymentSearchScope, + UserDeploymentSearchScope, +) from ai.backend.manager.repositories.deployment.updaters import ( DeploymentMetadataUpdaterSpec, DeploymentNetworkSpecUpdaterSpec, @@ -313,10 +315,13 @@ from ai.backend.manager.services.deployment.actions.route.update_route_traffic_status import ( UpdateRouteTrafficStatusAction, ) -from ai.backend.manager.services.deployment.actions.search_legacy_deployments import ( - SearchLegacyDeploymentsAction, +from ai.backend.manager.services.deployment.actions.search_project_deployments import ( + SearchProjectDeploymentsAction, ) from ai.backend.manager.services.deployment.actions.search_replicas import SearchReplicasAction +from ai.backend.manager.services.deployment.actions.search_user_deployments import ( + SearchUserDeploymentsAction, +) from ai.backend.manager.services.deployment.actions.sync_replicas import SyncReplicaAction from ai.backend.manager.services.deployment.actions.update_deployment import UpdateDeploymentAction from ai.backend.manager.types import OptionalState, TriState @@ -601,16 +606,13 @@ async def my_search( user = current_user() if user is None: raise RuntimeError("No authenticated user in context") + scope = UserDeploymentSearchScope(user_id=user.user_id) conditions: list[QueryCondition] = [] if input.filter: conditions.extend(self._convert_deployment_filter(input.filter)) orders: list[QueryOrder] = ( self._convert_deployment_orders(input.order) if input.order else [] ) - - def _by_created_user() -> sa.sql.expression.ColumnElement[bool]: - return EndpointRow.created_user == user.user_id - querier = self._build_querier( conditions=conditions, orders=orders, @@ -621,12 +623,9 @@ def _by_created_user() -> sa.sql.expression.ColumnElement[bool]: before=input.before, limit=input.limit, offset=input.offset, - base_conditions=[_by_created_user], ) - action_result = ( - await self._processors.deployment.search_legacy_deployments.wait_for_complete( - SearchLegacyDeploymentsAction(querier=querier) - ) + action_result = await self._processors.deployment.search_user_deployments.wait_for_complete( + SearchUserDeploymentsAction(scope=scope, querier=querier) ) return AdminSearchDeploymentsPayload( items=[self._deployment_data_to_dto(item) for item in action_result.data], @@ -641,16 +640,13 @@ async def project_search( input: AdminSearchDeploymentsInput, ) -> AdminSearchDeploymentsPayload: """Search deployments within a specific project.""" + scope = ProjectDeploymentSearchScope(project_id=project_id) conditions: list[QueryCondition] = [] if input.filter: conditions.extend(self._convert_deployment_filter(input.filter)) orders: list[QueryOrder] = ( self._convert_deployment_orders(input.order) if input.order else [] ) - - def _by_project_id() -> sa.sql.expression.ColumnElement[bool]: - return EndpointRow.project == project_id - querier = self._build_querier( conditions=conditions, orders=orders, @@ -661,11 +657,10 @@ def _by_project_id() -> sa.sql.expression.ColumnElement[bool]: before=input.before, limit=input.limit, offset=input.offset, - base_conditions=[_by_project_id], ) action_result = ( - await self._processors.deployment.search_legacy_deployments.wait_for_complete( - SearchLegacyDeploymentsAction(querier=querier) + await self._processors.deployment.search_project_deployments.wait_for_complete( + SearchProjectDeploymentsAction(scope=scope, querier=querier) ) ) return AdminSearchDeploymentsPayload( @@ -1293,8 +1288,11 @@ async def batch_load_by_ids( ) -> list[DeploymentNode | None]: """Batch load deployments by ID for DataLoader use. - Returns ``DeploymentNode`` DTOs in the same order as the input - ``deployment_ids`` list; missing IDs come back as ``None``. + Routed through the admin (no-scope) search since the DataLoader + path runs under an already-authorised parent query and the + ``by_ids`` filter is itself the bound on what gets returned. + Output is aligned with the input ``deployment_ids`` order; missing + IDs come back as ``None``. """ if not deployment_ids: return [] @@ -1303,8 +1301,8 @@ async def batch_load_by_ids( conditions=[DeploymentConditions.by_ids(deployment_ids)], ) action_result = ( - await self._processors.deployment.search_legacy_deployments.wait_for_complete( - SearchLegacyDeploymentsAction(querier=querier) + await self._processors.deployment_admin.admin_search_deployments.wait_for_complete( + AdminSearchDeploymentsAction(querier=querier) ) ) deployment_map = { diff --git a/src/ai/backend/manager/repositories/deployment/db_source/db_source.py b/src/ai/backend/manager/repositories/deployment/db_source/db_source.py index 1c13d455350..dcc3918fc08 100644 --- a/src/ai/backend/manager/repositories/deployment/db_source/db_source.py +++ b/src/ai/backend/manager/repositories/deployment/db_source/db_source.py @@ -187,6 +187,7 @@ ProjectDeploymentSearchScope, RouteData, RouteServiceDiscoveryInfo, + UserDeploymentSearchScope, ) from ai.backend.manager.repositories.scheduler.types.session_creation import ( ContainerUserContext, @@ -1314,6 +1315,72 @@ async def search_model_deployments( has_previous_page=result.has_previous_page, ) + async def search_user_deployments( + self, + querier: BatchQuerier, + scope: UserDeploymentSearchScope, + ) -> ModelDeploymentDataSearchResult: + """Search deployments owned by a specific user, returning ``ModelDeploymentData``. + + Backs the v2 adapter's ``my_search`` path. Scope filter + (``EndpointRow.created_user == user_id``) is injected via + ``execute_batch_querier``'s ``scope`` argument. + """ + async with self._begin_readonly_session_read_committed() as db_sess: + query = sa.select(EndpointRow).options( + selectinload(EndpointRow.revisions).selectinload(DeploymentRevisionRow.image_row), + selectinload(EndpointRow.deployment_policy), + ) + + result = await execute_batch_querier( + db_sess, + query, + querier, + scope=scope, + ) + + items = [_endpoint_row_to_model_deployment_data(row.EndpointRow) for row in result.rows] + + return ModelDeploymentDataSearchResult( + items=items, + total_count=result.total_count, + has_next_page=result.has_next_page, + has_previous_page=result.has_previous_page, + ) + + async def search_project_deployments( + self, + querier: BatchQuerier, + scope: ProjectDeploymentSearchScope, + ) -> ModelDeploymentDataSearchResult: + """Search deployments in a project, returning ``ModelDeploymentData``. + + Distinct from :meth:`search_deployments_in_project`, which returns + the lighter-weight ``DeploymentSummaryData`` for project admin + list pages. Backs the v2 adapter's ``project_search`` path. + """ + async with self._begin_readonly_session_read_committed() as db_sess: + query = sa.select(EndpointRow).options( + selectinload(EndpointRow.revisions).selectinload(DeploymentRevisionRow.image_row), + selectinload(EndpointRow.deployment_policy), + ) + + result = await execute_batch_querier( + db_sess, + query, + querier, + scope=scope, + ) + + items = [_endpoint_row_to_model_deployment_data(row.EndpointRow) for row in result.rows] + + return ModelDeploymentDataSearchResult( + items=items, + total_count=result.total_count, + has_next_page=result.has_next_page, + has_previous_page=result.has_previous_page, + ) + async def search_deployments_in_project( self, querier: BatchQuerier, diff --git a/src/ai/backend/manager/repositories/deployment/repository.py b/src/ai/backend/manager/repositories/deployment/repository.py index e31f2419fbc..ac9428454b2 100644 --- a/src/ai/backend/manager/repositories/deployment/repository.py +++ b/src/ai/backend/manager/repositories/deployment/repository.py @@ -67,6 +67,7 @@ ModelDeploymentAccessTokenData, ModelDeploymentAutoScalingRuleData, ModelDeploymentData, + ModelDeploymentDataSearchResult, ModelRevisionData, ModelRevisionSpec, RevisionSearchResult, @@ -106,7 +107,12 @@ from .db_source import DeploymentDBSource from .storage_source import DeploymentStorageSource -from .types import ProjectDeploymentSearchScope, RouteData, RouteServiceDiscoveryInfo +from .types import ( + ProjectDeploymentSearchScope, + RouteData, + RouteServiceDiscoveryInfo, + UserDeploymentSearchScope, +) log = BraceStyleAdapter(logging.getLogger(__name__)) @@ -1525,6 +1531,24 @@ async def search_endpoints( """ return await self._db_source.search_endpoints(querier) + @deployment_repository_resilience.apply() + async def search_user_deployments( + self, + querier: BatchQuerier, + scope: UserDeploymentSearchScope, + ) -> ModelDeploymentDataSearchResult: + """Search a user's own deployments — backs the v2 ``my_search`` path.""" + return await self._db_source.search_user_deployments(querier, scope) + + @deployment_repository_resilience.apply() + async def search_project_deployments( + self, + querier: BatchQuerier, + scope: ProjectDeploymentSearchScope, + ) -> ModelDeploymentDataSearchResult: + """Search a project's deployments returning full ``ModelDeploymentData``.""" + return await self._db_source.search_project_deployments(querier, scope) + @deployment_repository_resilience.apply() async def search_deployments_in_project( self, diff --git a/src/ai/backend/manager/repositories/deployment/types/__init__.py b/src/ai/backend/manager/repositories/deployment/types/__init__.py index 741b2a06b16..ccea70bcaf8 100644 --- a/src/ai/backend/manager/repositories/deployment/types/__init__.py +++ b/src/ai/backend/manager/repositories/deployment/types/__init__.py @@ -7,6 +7,7 @@ ProjectDeploymentSearchScope, RouteData, RouteServiceDiscoveryInfo, + UserDeploymentSearchScope, ) __all__ = [ @@ -16,4 +17,5 @@ "ProjectDeploymentSearchScope", "RouteData", "RouteServiceDiscoveryInfo", + "UserDeploymentSearchScope", ] diff --git a/src/ai/backend/manager/repositories/deployment/types/endpoint.py b/src/ai/backend/manager/repositories/deployment/types/endpoint.py index 5b29435c725..62491f15f50 100644 --- a/src/ai/backend/manager/repositories/deployment/types/endpoint.py +++ b/src/ai/backend/manager/repositories/deployment/types/endpoint.py @@ -116,3 +116,26 @@ def existence_checks(self) -> Sequence[ExistenceCheck[UUID]]: error=ProjectNotFound(str(self.project_id)), ), ] + + +@dataclass(frozen=True) +class UserDeploymentSearchScope(SearchScope): + """Required scope for searching deployments created by a given user. + + Backs the v2 adapter's ``my_search`` path; the adapter resolves the + current user and constructs the scope before invoking the action. + """ + + user_id: UUID + + def to_condition(self) -> QueryCondition: + user_id = self.user_id + + def inner() -> sa.sql.expression.ColumnElement[bool]: + return EndpointRow.created_user == user_id + + return inner + + @property + def existence_checks(self) -> Sequence[ExistenceCheck[UUID]]: + return [] diff --git a/src/ai/backend/manager/services/deployment/actions/search_project_deployments.py b/src/ai/backend/manager/services/deployment/actions/search_project_deployments.py new file mode 100644 index 00000000000..96ce334e085 --- /dev/null +++ b/src/ai/backend/manager/services/deployment/actions/search_project_deployments.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import override + +from ai.backend.common.data.permission.types import RBACElementType, ScopeType +from ai.backend.manager.actions.action import BaseActionResult +from ai.backend.manager.actions.types import ActionOperationType +from ai.backend.manager.data.deployment.types import ModelDeploymentData +from ai.backend.manager.data.permission.types import RBACElementRef +from ai.backend.manager.repositories.base import BatchQuerier +from ai.backend.manager.repositories.deployment.types import ProjectDeploymentSearchScope +from ai.backend.manager.services.deployment.actions.base import DeploymentScopeAction + + +@dataclass +class SearchProjectDeploymentsAction(DeploymentScopeAction): + """Search deployments within a project, returning ``ModelDeploymentData``. + + Distinct from :class:`SearchDeploymentsInProjectAction`, which returns + the lighter-weight ``DeploymentSummaryData`` for project admin list + pages. Backs the v2 adapter's ``project_search`` path. + """ + + scope: ProjectDeploymentSearchScope + querier: BatchQuerier + + @override + @classmethod + def operation_type(cls) -> ActionOperationType: + return ActionOperationType.SEARCH + + @override + def scope_type(self) -> ScopeType: + return ScopeType.PROJECT + + @override + def scope_id(self) -> str: + return str(self.scope.project_id) + + @override + def target_element(self) -> RBACElementRef: + return RBACElementRef(RBACElementType.PROJECT, str(self.scope.project_id)) + + +@dataclass +class SearchProjectDeploymentsActionResult(BaseActionResult): + data: list[ModelDeploymentData] + total_count: int + has_next_page: bool + has_previous_page: bool + + @override + def entity_id(self) -> str | None: + return None diff --git a/src/ai/backend/manager/services/deployment/actions/search_user_deployments.py b/src/ai/backend/manager/services/deployment/actions/search_user_deployments.py new file mode 100644 index 00000000000..9c3c25ba2dd --- /dev/null +++ b/src/ai/backend/manager/services/deployment/actions/search_user_deployments.py @@ -0,0 +1,58 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import override + +from ai.backend.common.data.permission.types import RBACElementType, ScopeType +from ai.backend.manager.actions.action import BaseActionResult +from ai.backend.manager.actions.types import ActionOperationType +from ai.backend.manager.data.deployment.types import ModelDeploymentData +from ai.backend.manager.data.permission.types import RBACElementRef +from ai.backend.manager.repositories.base import BatchQuerier +from ai.backend.manager.repositories.deployment.types import UserDeploymentSearchScope +from ai.backend.manager.services.deployment.actions.base import DeploymentScopeAction + + +@dataclass +class SearchUserDeploymentsAction(DeploymentScopeAction): + """Search deployments created by a specific user. + + Internal name uses the ``User`` scope semantics; the v2 adapter exposes + this as the user-facing ``my_search`` operation, resolving the current + user before constructing the scope. + """ + + scope: UserDeploymentSearchScope + querier: BatchQuerier + + @override + @classmethod + def operation_type(cls) -> ActionOperationType: + return ActionOperationType.SEARCH + + @override + def scope_type(self) -> ScopeType: + return ScopeType.USER + + @override + def scope_id(self) -> str: + return str(self.scope.user_id) + + @override + def target_element(self) -> RBACElementRef: + return RBACElementRef( + element_type=RBACElementType.USER, + element_id=str(self.scope.user_id), + ) + + +@dataclass +class SearchUserDeploymentsActionResult(BaseActionResult): + data: list[ModelDeploymentData] + total_count: int + has_next_page: bool + has_previous_page: bool + + @override + def entity_id(self) -> str | None: + return None diff --git a/src/ai/backend/manager/services/deployment/processors.py b/src/ai/backend/manager/services/deployment/processors.py index 805a2da547e..1f82483615f 100644 --- a/src/ai/backend/manager/services/deployment/processors.py +++ b/src/ai/backend/manager/services/deployment/processors.py @@ -128,10 +128,18 @@ SearchLegacyDeploymentsAction, SearchLegacyDeploymentsActionResult, ) +from ai.backend.manager.services.deployment.actions.search_project_deployments import ( + SearchProjectDeploymentsAction, + SearchProjectDeploymentsActionResult, +) from ai.backend.manager.services.deployment.actions.search_replicas import ( SearchReplicasAction, SearchReplicasActionResult, ) +from ai.backend.manager.services.deployment.actions.search_user_deployments import ( + SearchUserDeploymentsAction, + SearchUserDeploymentsActionResult, +) from ai.backend.manager.services.deployment.actions.sync_replicas import ( SyncReplicaAction, SyncReplicaActionResult, @@ -166,6 +174,12 @@ class DeploymentProcessors(AbstractProcessorPackage): search_legacy_deployments: ActionProcessor[ SearchLegacyDeploymentsAction, SearchLegacyDeploymentsActionResult ] + search_user_deployments: ActionProcessor[ + SearchUserDeploymentsAction, SearchUserDeploymentsActionResult + ] + search_project_deployments: ActionProcessor[ + SearchProjectDeploymentsAction, SearchProjectDeploymentsActionResult + ] search_deployments_in_project: ActionProcessor[ SearchDeploymentsInProjectAction, SearchDeploymentsInProjectActionResult ] @@ -258,6 +272,16 @@ def __init__( self.search_legacy_deployments = ActionProcessor( service.search_legacy_deployments, action_monitors ) + self.search_user_deployments = ActionProcessor( + service.search_user_deployments, + action_monitors, + validators=[cast(ActionValidator, validators.rbac.scope)], + ) + self.search_project_deployments = ActionProcessor( + service.search_project_deployments, + action_monitors, + validators=[cast(ActionValidator, validators.rbac.scope)], + ) self.search_deployments_in_project = ActionProcessor( service.search_deployments_in_project, action_monitors, @@ -334,6 +358,8 @@ def supported_actions(self) -> list[ActionSpec]: ReplaceDeploymentOptionsAction.spec(), DestroyDeploymentAction.spec(), SearchLegacyDeploymentsAction.spec(), + SearchUserDeploymentsAction.spec(), + SearchProjectDeploymentsAction.spec(), SearchDeploymentsInProjectAction.spec(), GetDeploymentByIdAction.spec(), GetDeploymentPolicyAction.spec(), diff --git a/src/ai/backend/manager/services/deployment/service.py b/src/ai/backend/manager/services/deployment/service.py index cc7fb308b4d..adabd07f91d 100644 --- a/src/ai/backend/manager/services/deployment/service.py +++ b/src/ai/backend/manager/services/deployment/service.py @@ -184,10 +184,18 @@ SearchLegacyDeploymentsAction, SearchLegacyDeploymentsActionResult, ) +from ai.backend.manager.services.deployment.actions.search_project_deployments import ( + SearchProjectDeploymentsAction, + SearchProjectDeploymentsActionResult, +) from ai.backend.manager.services.deployment.actions.search_replicas import ( SearchReplicasAction, SearchReplicasActionResult, ) +from ai.backend.manager.services.deployment.actions.search_user_deployments import ( + SearchUserDeploymentsAction, + SearchUserDeploymentsActionResult, +) from ai.backend.manager.services.deployment.actions.sync_replicas import ( SyncReplicaAction, SyncReplicaActionResult, @@ -549,6 +557,34 @@ async def search_legacy_deployments( has_previous_page=result.has_previous_page, ) + async def search_user_deployments( + self, action: SearchUserDeploymentsAction + ) -> SearchUserDeploymentsActionResult: + """Search the current user's deployments — backs ``my_search``.""" + result = await self._deployment_repository.search_user_deployments( + action.querier, action.scope + ) + return SearchUserDeploymentsActionResult( + data=result.items, + total_count=result.total_count, + has_next_page=result.has_next_page, + has_previous_page=result.has_previous_page, + ) + + async def search_project_deployments( + self, action: SearchProjectDeploymentsAction + ) -> SearchProjectDeploymentsActionResult: + """Search deployments inside a project, returning full ``ModelDeploymentData``.""" + result = await self._deployment_repository.search_project_deployments( + action.querier, action.scope + ) + return SearchProjectDeploymentsActionResult( + data=result.items, + total_count=result.total_count, + has_next_page=result.has_next_page, + has_previous_page=result.has_previous_page, + ) + async def search_deployments_in_project( self, action: SearchDeploymentsInProjectAction ) -> SearchDeploymentsInProjectActionResult: From c0292eda8c3b22229d6806daf2796b7dd7ee933d Mon Sep 17 00:00:00 2001 From: jopemachine Date: Fri, 8 May 2026 16:59:37 +0900 Subject: [PATCH 6/8] fix: route GraphQL DataLoader batch_load_by_ids back through search_legacy_deployments Going through ``DeploymentAdminProcessors.admin_search_deployments`` made the DataLoader path admin-only, which breaks any non-admin GraphQL query that resolves a deployment reference (e.g. ``ModelDeploymentNode`` from a sibling entity). The admin processor exists to mark callers that *must* be admin-authorised; a DataLoader inherits the parent resolver's authorisation and runs for every user, so it cannot live there. Route ``batch_load_by_ids`` through the regular ``DeploymentProcessors.search_legacy_deployments`` until a dedicated non-admin batch action lands (the action's ``legacy`` name is awkward for a v2 DataLoader path; renaming is left to a follow-up so the v1 REST contract stays untouched in this PR). --- .../manager/api/adapters/deployment/adapter.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/ai/backend/manager/api/adapters/deployment/adapter.py b/src/ai/backend/manager/api/adapters/deployment/adapter.py index 0f724e502c0..f827cacb5c3 100644 --- a/src/ai/backend/manager/api/adapters/deployment/adapter.py +++ b/src/ai/backend/manager/api/adapters/deployment/adapter.py @@ -315,6 +315,9 @@ from ai.backend.manager.services.deployment.actions.route.update_route_traffic_status import ( UpdateRouteTrafficStatusAction, ) +from ai.backend.manager.services.deployment.actions.search_legacy_deployments import ( + SearchLegacyDeploymentsAction, +) from ai.backend.manager.services.deployment.actions.search_project_deployments import ( SearchProjectDeploymentsAction, ) @@ -1288,9 +1291,13 @@ async def batch_load_by_ids( ) -> list[DeploymentNode | None]: """Batch load deployments by ID for DataLoader use. - Routed through the admin (no-scope) search since the DataLoader - path runs under an already-authorised parent query and the - ``by_ids`` filter is itself the bound on what gets returned. + Routed through the regular ``search_legacy_deployments`` processor + on purpose: the admin processor is reserved for admin-authorised + callers, but a DataLoader fires under whatever user triggered the + parent GraphQL query (admin or not). The ``by_ids`` filter is the + bound on the result set; the parent resolver has already authorised + access to whatever entity references these IDs. + Output is aligned with the input ``deployment_ids`` order; missing IDs come back as ``None``. """ @@ -1301,8 +1308,8 @@ async def batch_load_by_ids( conditions=[DeploymentConditions.by_ids(deployment_ids)], ) action_result = ( - await self._processors.deployment_admin.admin_search_deployments.wait_for_complete( - AdminSearchDeploymentsAction(querier=querier) + await self._processors.deployment.search_legacy_deployments.wait_for_complete( + SearchLegacyDeploymentsAction(querier=querier) ) ) deployment_map = { From f4ef8994cde0e37fa60d5606c94d3a9b0f24ff59 Mon Sep 17 00:00:00 2001 From: jopemachine Date: Fri, 8 May 2026 17:02:51 +0900 Subject: [PATCH 7/8] refactor: drop unused storage_manager from DeploymentDBSource MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dependency was held on the DB source but never read by any of its methods — storage I/O lives on ``DeploymentStorageSource`` (still owned by ``DeploymentRepository``). Drop the field, the ctor parameter, and the forwarding through both repositories. --- .../manager/repositories/deployment/admin_repository.py | 4 +--- .../manager/repositories/deployment/db_source/db_source.py | 4 ---- .../backend/manager/repositories/deployment/repositories.py | 2 +- src/ai/backend/manager/repositories/deployment/repository.py | 2 +- 4 files changed, 3 insertions(+), 9 deletions(-) diff --git a/src/ai/backend/manager/repositories/deployment/admin_repository.py b/src/ai/backend/manager/repositories/deployment/admin_repository.py index dc5c68ac840..5f3ee9590a2 100644 --- a/src/ai/backend/manager/repositories/deployment/admin_repository.py +++ b/src/ai/backend/manager/repositories/deployment/admin_repository.py @@ -20,7 +20,6 @@ from ai.backend.manager.repositories.deployment.db_source.db_source import DeploymentDBSource if TYPE_CHECKING: - from ai.backend.manager.models.storage import StorageSessionManager from ai.backend.manager.models.utils import ExtendedAsyncSAEngine __all__ = ("DeploymentAdminRepository",) @@ -59,9 +58,8 @@ class DeploymentAdminRepository: def __init__( self, db: ExtendedAsyncSAEngine, - storage_manager: StorageSessionManager, ) -> None: - self._db_source = DeploymentDBSource(db, storage_manager) + self._db_source = DeploymentDBSource(db) @deployment_admin_repository_resilience.apply() async def search_model_deployments( diff --git a/src/ai/backend/manager/repositories/deployment/db_source/db_source.py b/src/ai/backend/manager/repositories/deployment/db_source/db_source.py index dcc3918fc08..3fb8ea4c3be 100644 --- a/src/ai/backend/manager/repositories/deployment/db_source/db_source.py +++ b/src/ai/backend/manager/repositories/deployment/db_source/db_source.py @@ -147,7 +147,6 @@ RouteHistoryRow, ) from ai.backend.manager.models.session import SessionRow -from ai.backend.manager.models.storage import StorageSessionManager from ai.backend.manager.models.user import UserRow from ai.backend.manager.models.utils import ExtendedAsyncSAEngine from ai.backend.manager.models.vfolder import VFolderRow @@ -318,15 +317,12 @@ class DeploymentDBSource: """Database source for deployment-related operations.""" _db: ExtendedAsyncSAEngine - _storage_manager: StorageSessionManager def __init__( self, db: ExtendedAsyncSAEngine, - storage_manager: StorageSessionManager, ) -> None: self._db = db - self._storage_manager = storage_manager @actxmgr async def _begin_readonly_read_committed(self) -> AsyncIterator[SAConnection]: diff --git a/src/ai/backend/manager/repositories/deployment/repositories.py b/src/ai/backend/manager/repositories/deployment/repositories.py index 6e75c795737..b4076730977 100644 --- a/src/ai/backend/manager/repositories/deployment/repositories.py +++ b/src/ai/backend/manager/repositories/deployment/repositories.py @@ -26,5 +26,5 @@ def create(cls, args: RepositoryArgs) -> Self: args.valkey_live_client, args.valkey_schedule_client, ) - admin_repository = DeploymentAdminRepository(args.db, args.storage_manager) + admin_repository = DeploymentAdminRepository(args.db) return cls(repository=repository, admin_repository=admin_repository) diff --git a/src/ai/backend/manager/repositories/deployment/repository.py b/src/ai/backend/manager/repositories/deployment/repository.py index ac9428454b2..3e8160042f0 100644 --- a/src/ai/backend/manager/repositories/deployment/repository.py +++ b/src/ai/backend/manager/repositories/deployment/repository.py @@ -167,7 +167,7 @@ def __init__( valkey_live: ValkeyLiveClient, valkey_schedule: ValkeyScheduleClient, ) -> None: - self._db_source = DeploymentDBSource(db, storage_manager) + self._db_source = DeploymentDBSource(db) self._storage_source = DeploymentStorageSource(storage_manager) self._valkey_stat = valkey_stat self._valkey_live = valkey_live From 7dbdfb12b3abf655561648ed5de14a048811f8ca Mon Sep 17 00:00:00 2001 From: jopemachine Date: Fri, 8 May 2026 17:07:41 +0900 Subject: [PATCH 8/8] refactor: rename ModelDeploymentData-returning paths to use `model_deployments` consistently The admin repo had `search_model_deployments`, but the new user/project counterparts and matching actions/handlers/processors used the shorter `deployments`. Align them: methods, actions, handlers, processor fields, and action file names that return `ModelDeploymentData` now all read `*_model_deployments` / `*ModelDeployments*`. The legacy `search_legacy_deployments` (returns `ModelDeploymentData` via the `DeploymentInfo` converter) keeps its name to preserve the v1 REST contract surface, and `*DeploymentSearchScope` keeps its name because the scope describes the entity being filtered, not the return type. Renames: - AdminSearchDeploymentsAction -> AdminSearchModelDeploymentsAction - SearchUserDeploymentsAction -> SearchUserModelDeploymentsAction - SearchProjectDeploymentsAction -> SearchProjectModelDeploymentsAction - service.{admin_search,search_user,search_project}_deployments -> *_model_deployments - processor fields and action file paths follow. --- .../api/adapters/deployment/adapter.py | 28 +++++------ .../deployment/db_source/db_source.py | 4 +- .../repositories/deployment/repository.py | 8 ++-- ...s.py => admin_search_model_deployments.py} | 4 +- ...py => search_project_model_deployments.py} | 4 +- ...ts.py => search_user_model_deployments.py} | 4 +- .../services/deployment/admin_service.py | 14 +++--- .../manager/services/deployment/processors.py | 48 +++++++++---------- .../manager/services/deployment/service.py | 32 ++++++------- 9 files changed, 73 insertions(+), 73 deletions(-) rename src/ai/backend/manager/services/deployment/actions/{admin_search_deployments.py => admin_search_model_deployments.py} (89%) rename src/ai/backend/manager/services/deployment/actions/{search_project_deployments.py => search_project_model_deployments.py} (92%) rename src/ai/backend/manager/services/deployment/actions/{search_user_deployments.py => search_user_model_deployments.py} (93%) diff --git a/src/ai/backend/manager/api/adapters/deployment/adapter.py b/src/ai/backend/manager/api/adapters/deployment/adapter.py index f827cacb5c3..443534b1dbd 100644 --- a/src/ai/backend/manager/api/adapters/deployment/adapter.py +++ b/src/ai/backend/manager/api/adapters/deployment/adapter.py @@ -250,8 +250,8 @@ from ai.backend.manager.services.deployment.actions.access_token.search_access_tokens import ( SearchAccessTokensAction, ) -from ai.backend.manager.services.deployment.actions.admin_search_deployments import ( - AdminSearchDeploymentsAction, +from ai.backend.manager.services.deployment.actions.admin_search_model_deployments import ( + AdminSearchModelDeploymentsAction, ) from ai.backend.manager.services.deployment.actions.auto_scaling_rule.bulk_delete_auto_scaling_rules import ( BulkDeleteAutoScalingRulesAction, @@ -318,12 +318,12 @@ from ai.backend.manager.services.deployment.actions.search_legacy_deployments import ( SearchLegacyDeploymentsAction, ) -from ai.backend.manager.services.deployment.actions.search_project_deployments import ( - SearchProjectDeploymentsAction, +from ai.backend.manager.services.deployment.actions.search_project_model_deployments import ( + SearchProjectModelDeploymentsAction, ) from ai.backend.manager.services.deployment.actions.search_replicas import SearchReplicasAction -from ai.backend.manager.services.deployment.actions.search_user_deployments import ( - SearchUserDeploymentsAction, +from ai.backend.manager.services.deployment.actions.search_user_model_deployments import ( + SearchUserModelDeploymentsAction, ) from ai.backend.manager.services.deployment.actions.sync_replicas import SyncReplicaAction from ai.backend.manager.services.deployment.actions.update_deployment import UpdateDeploymentAction @@ -589,10 +589,8 @@ async def admin_search( ) -> AdminSearchDeploymentsPayload: """Search deployments (admin, no scope).""" querier = self._build_deployment_querier(input) - action_result = ( - await self._processors.deployment_admin.admin_search_deployments.wait_for_complete( - AdminSearchDeploymentsAction(querier=querier) - ) + action_result = await self._processors.deployment_admin.admin_search_model_deployments.wait_for_complete( + AdminSearchModelDeploymentsAction(querier=querier) ) return AdminSearchDeploymentsPayload( items=[self._deployment_data_to_dto(item) for item in action_result.data], @@ -627,8 +625,10 @@ async def my_search( limit=input.limit, offset=input.offset, ) - action_result = await self._processors.deployment.search_user_deployments.wait_for_complete( - SearchUserDeploymentsAction(scope=scope, querier=querier) + action_result = ( + await self._processors.deployment.search_user_model_deployments.wait_for_complete( + SearchUserModelDeploymentsAction(scope=scope, querier=querier) + ) ) return AdminSearchDeploymentsPayload( items=[self._deployment_data_to_dto(item) for item in action_result.data], @@ -662,8 +662,8 @@ async def project_search( offset=input.offset, ) action_result = ( - await self._processors.deployment.search_project_deployments.wait_for_complete( - SearchProjectDeploymentsAction(scope=scope, querier=querier) + await self._processors.deployment.search_project_model_deployments.wait_for_complete( + SearchProjectModelDeploymentsAction(scope=scope, querier=querier) ) ) return AdminSearchDeploymentsPayload( diff --git a/src/ai/backend/manager/repositories/deployment/db_source/db_source.py b/src/ai/backend/manager/repositories/deployment/db_source/db_source.py index 3fb8ea4c3be..2ded3aa72f6 100644 --- a/src/ai/backend/manager/repositories/deployment/db_source/db_source.py +++ b/src/ai/backend/manager/repositories/deployment/db_source/db_source.py @@ -1311,7 +1311,7 @@ async def search_model_deployments( has_previous_page=result.has_previous_page, ) - async def search_user_deployments( + async def search_user_model_deployments( self, querier: BatchQuerier, scope: UserDeploymentSearchScope, @@ -1344,7 +1344,7 @@ async def search_user_deployments( has_previous_page=result.has_previous_page, ) - async def search_project_deployments( + async def search_project_model_deployments( self, querier: BatchQuerier, scope: ProjectDeploymentSearchScope, diff --git a/src/ai/backend/manager/repositories/deployment/repository.py b/src/ai/backend/manager/repositories/deployment/repository.py index 3e8160042f0..81b371ad1d4 100644 --- a/src/ai/backend/manager/repositories/deployment/repository.py +++ b/src/ai/backend/manager/repositories/deployment/repository.py @@ -1532,22 +1532,22 @@ async def search_endpoints( return await self._db_source.search_endpoints(querier) @deployment_repository_resilience.apply() - async def search_user_deployments( + async def search_user_model_deployments( self, querier: BatchQuerier, scope: UserDeploymentSearchScope, ) -> ModelDeploymentDataSearchResult: """Search a user's own deployments — backs the v2 ``my_search`` path.""" - return await self._db_source.search_user_deployments(querier, scope) + return await self._db_source.search_user_model_deployments(querier, scope) @deployment_repository_resilience.apply() - async def search_project_deployments( + async def search_project_model_deployments( self, querier: BatchQuerier, scope: ProjectDeploymentSearchScope, ) -> ModelDeploymentDataSearchResult: """Search a project's deployments returning full ``ModelDeploymentData``.""" - return await self._db_source.search_project_deployments(querier, scope) + return await self._db_source.search_project_model_deployments(querier, scope) @deployment_repository_resilience.apply() async def search_deployments_in_project( diff --git a/src/ai/backend/manager/services/deployment/actions/admin_search_deployments.py b/src/ai/backend/manager/services/deployment/actions/admin_search_model_deployments.py similarity index 89% rename from src/ai/backend/manager/services/deployment/actions/admin_search_deployments.py rename to src/ai/backend/manager/services/deployment/actions/admin_search_model_deployments.py index 3d2baba3aba..21b31879f83 100644 --- a/src/ai/backend/manager/services/deployment/actions/admin_search_deployments.py +++ b/src/ai/backend/manager/services/deployment/actions/admin_search_model_deployments.py @@ -9,7 +9,7 @@ @dataclass -class AdminSearchDeploymentsAction(DeploymentBaseAction): +class AdminSearchModelDeploymentsAction(DeploymentBaseAction): """Search every deployment with no scope (superadmin path). Routed through ``DeploymentAdminProcessors`` so callers make the @@ -30,7 +30,7 @@ def operation_type(cls) -> ActionOperationType: @dataclass -class AdminSearchDeploymentsActionResult(BaseActionResult): +class AdminSearchModelDeploymentsActionResult(BaseActionResult): data: list[ModelDeploymentData] total_count: int has_next_page: bool diff --git a/src/ai/backend/manager/services/deployment/actions/search_project_deployments.py b/src/ai/backend/manager/services/deployment/actions/search_project_model_deployments.py similarity index 92% rename from src/ai/backend/manager/services/deployment/actions/search_project_deployments.py rename to src/ai/backend/manager/services/deployment/actions/search_project_model_deployments.py index 96ce334e085..01c54c22134 100644 --- a/src/ai/backend/manager/services/deployment/actions/search_project_deployments.py +++ b/src/ai/backend/manager/services/deployment/actions/search_project_model_deployments.py @@ -14,7 +14,7 @@ @dataclass -class SearchProjectDeploymentsAction(DeploymentScopeAction): +class SearchProjectModelDeploymentsAction(DeploymentScopeAction): """Search deployments within a project, returning ``ModelDeploymentData``. Distinct from :class:`SearchDeploymentsInProjectAction`, which returns @@ -44,7 +44,7 @@ def target_element(self) -> RBACElementRef: @dataclass -class SearchProjectDeploymentsActionResult(BaseActionResult): +class SearchProjectModelDeploymentsActionResult(BaseActionResult): data: list[ModelDeploymentData] total_count: int has_next_page: bool diff --git a/src/ai/backend/manager/services/deployment/actions/search_user_deployments.py b/src/ai/backend/manager/services/deployment/actions/search_user_model_deployments.py similarity index 93% rename from src/ai/backend/manager/services/deployment/actions/search_user_deployments.py rename to src/ai/backend/manager/services/deployment/actions/search_user_model_deployments.py index 9c3c25ba2dd..d146e5da3b7 100644 --- a/src/ai/backend/manager/services/deployment/actions/search_user_deployments.py +++ b/src/ai/backend/manager/services/deployment/actions/search_user_model_deployments.py @@ -14,7 +14,7 @@ @dataclass -class SearchUserDeploymentsAction(DeploymentScopeAction): +class SearchUserModelDeploymentsAction(DeploymentScopeAction): """Search deployments created by a specific user. Internal name uses the ``User`` scope semantics; the v2 adapter exposes @@ -47,7 +47,7 @@ def target_element(self) -> RBACElementRef: @dataclass -class SearchUserDeploymentsActionResult(BaseActionResult): +class SearchUserModelDeploymentsActionResult(BaseActionResult): data: list[ModelDeploymentData] total_count: int has_next_page: bool diff --git a/src/ai/backend/manager/services/deployment/admin_service.py b/src/ai/backend/manager/services/deployment/admin_service.py index 31fc061eb35..9bda1d8e855 100644 --- a/src/ai/backend/manager/services/deployment/admin_service.py +++ b/src/ai/backend/manager/services/deployment/admin_service.py @@ -3,9 +3,9 @@ from ai.backend.manager.repositories.deployment.admin_repository import ( DeploymentAdminRepository, ) -from ai.backend.manager.services.deployment.actions.admin_search_deployments import ( - AdminSearchDeploymentsAction, - AdminSearchDeploymentsActionResult, +from ai.backend.manager.services.deployment.actions.admin_search_model_deployments import ( + AdminSearchModelDeploymentsAction, + AdminSearchModelDeploymentsActionResult, ) __all__ = ("DeploymentAdminService",) @@ -24,12 +24,12 @@ class DeploymentAdminService: def __init__(self, admin_repository: DeploymentAdminRepository) -> None: self._admin_repository = admin_repository - async def admin_search_deployments( - self, action: AdminSearchDeploymentsAction - ) -> AdminSearchDeploymentsActionResult: + async def admin_search_model_deployments( + self, action: AdminSearchModelDeploymentsAction + ) -> AdminSearchModelDeploymentsActionResult: """Search every deployment without a scope filter.""" result = await self._admin_repository.search_model_deployments(action.querier) - return AdminSearchDeploymentsActionResult( + return AdminSearchModelDeploymentsActionResult( data=result.items, total_count=result.total_count, has_next_page=result.has_next_page, diff --git a/src/ai/backend/manager/services/deployment/processors.py b/src/ai/backend/manager/services/deployment/processors.py index 1f82483615f..5f81c692e56 100644 --- a/src/ai/backend/manager/services/deployment/processors.py +++ b/src/ai/backend/manager/services/deployment/processors.py @@ -30,9 +30,9 @@ SearchAccessTokensAction, SearchAccessTokensActionResult, ) -from ai.backend.manager.services.deployment.actions.admin_search_deployments import ( - AdminSearchDeploymentsAction, - AdminSearchDeploymentsActionResult, +from ai.backend.manager.services.deployment.actions.admin_search_model_deployments import ( + AdminSearchModelDeploymentsAction, + AdminSearchModelDeploymentsActionResult, ) from ai.backend.manager.services.deployment.actions.auto_scaling_rule.bulk_delete_auto_scaling_rules import ( BulkDeleteAutoScalingRulesAction, @@ -128,17 +128,17 @@ SearchLegacyDeploymentsAction, SearchLegacyDeploymentsActionResult, ) -from ai.backend.manager.services.deployment.actions.search_project_deployments import ( - SearchProjectDeploymentsAction, - SearchProjectDeploymentsActionResult, +from ai.backend.manager.services.deployment.actions.search_project_model_deployments import ( + SearchProjectModelDeploymentsAction, + SearchProjectModelDeploymentsActionResult, ) from ai.backend.manager.services.deployment.actions.search_replicas import ( SearchReplicasAction, SearchReplicasActionResult, ) -from ai.backend.manager.services.deployment.actions.search_user_deployments import ( - SearchUserDeploymentsAction, - SearchUserDeploymentsActionResult, +from ai.backend.manager.services.deployment.actions.search_user_model_deployments import ( + SearchUserModelDeploymentsAction, + SearchUserModelDeploymentsActionResult, ) from ai.backend.manager.services.deployment.actions.sync_replicas import ( SyncReplicaAction, @@ -174,11 +174,11 @@ class DeploymentProcessors(AbstractProcessorPackage): search_legacy_deployments: ActionProcessor[ SearchLegacyDeploymentsAction, SearchLegacyDeploymentsActionResult ] - search_user_deployments: ActionProcessor[ - SearchUserDeploymentsAction, SearchUserDeploymentsActionResult + search_user_model_deployments: ActionProcessor[ + SearchUserModelDeploymentsAction, SearchUserModelDeploymentsActionResult ] - search_project_deployments: ActionProcessor[ - SearchProjectDeploymentsAction, SearchProjectDeploymentsActionResult + search_project_model_deployments: ActionProcessor[ + SearchProjectModelDeploymentsAction, SearchProjectModelDeploymentsActionResult ] search_deployments_in_project: ActionProcessor[ SearchDeploymentsInProjectAction, SearchDeploymentsInProjectActionResult @@ -272,13 +272,13 @@ def __init__( self.search_legacy_deployments = ActionProcessor( service.search_legacy_deployments, action_monitors ) - self.search_user_deployments = ActionProcessor( - service.search_user_deployments, + self.search_user_model_deployments = ActionProcessor( + service.search_user_model_deployments, action_monitors, validators=[cast(ActionValidator, validators.rbac.scope)], ) - self.search_project_deployments = ActionProcessor( - service.search_project_deployments, + self.search_project_model_deployments = ActionProcessor( + service.search_project_model_deployments, action_monitors, validators=[cast(ActionValidator, validators.rbac.scope)], ) @@ -358,8 +358,8 @@ def supported_actions(self) -> list[ActionSpec]: ReplaceDeploymentOptionsAction.spec(), DestroyDeploymentAction.spec(), SearchLegacyDeploymentsAction.spec(), - SearchUserDeploymentsAction.spec(), - SearchProjectDeploymentsAction.spec(), + SearchUserModelDeploymentsAction.spec(), + SearchProjectModelDeploymentsAction.spec(), SearchDeploymentsInProjectAction.spec(), GetDeploymentByIdAction.spec(), GetDeploymentPolicyAction.spec(), @@ -403,8 +403,8 @@ class DeploymentAdminProcessors(AbstractProcessorPackage): stay on :class:`DeploymentProcessors`. """ - admin_search_deployments: ActionProcessor[ - AdminSearchDeploymentsAction, AdminSearchDeploymentsActionResult + admin_search_model_deployments: ActionProcessor[ + AdminSearchModelDeploymentsAction, AdminSearchModelDeploymentsActionResult ] def __init__( @@ -412,12 +412,12 @@ def __init__( service: DeploymentAdminService, action_monitors: list[ActionMonitor], ) -> None: - self.admin_search_deployments = ActionProcessor( - service.admin_search_deployments, action_monitors + self.admin_search_model_deployments = ActionProcessor( + service.admin_search_model_deployments, action_monitors ) @override def supported_actions(self) -> list[ActionSpec]: return [ - AdminSearchDeploymentsAction.spec(), + AdminSearchModelDeploymentsAction.spec(), ] diff --git a/src/ai/backend/manager/services/deployment/service.py b/src/ai/backend/manager/services/deployment/service.py index adabd07f91d..7bbb4710ab0 100644 --- a/src/ai/backend/manager/services/deployment/service.py +++ b/src/ai/backend/manager/services/deployment/service.py @@ -184,17 +184,17 @@ SearchLegacyDeploymentsAction, SearchLegacyDeploymentsActionResult, ) -from ai.backend.manager.services.deployment.actions.search_project_deployments import ( - SearchProjectDeploymentsAction, - SearchProjectDeploymentsActionResult, +from ai.backend.manager.services.deployment.actions.search_project_model_deployments import ( + SearchProjectModelDeploymentsAction, + SearchProjectModelDeploymentsActionResult, ) from ai.backend.manager.services.deployment.actions.search_replicas import ( SearchReplicasAction, SearchReplicasActionResult, ) -from ai.backend.manager.services.deployment.actions.search_user_deployments import ( - SearchUserDeploymentsAction, - SearchUserDeploymentsActionResult, +from ai.backend.manager.services.deployment.actions.search_user_model_deployments import ( + SearchUserModelDeploymentsAction, + SearchUserModelDeploymentsActionResult, ) from ai.backend.manager.services.deployment.actions.sync_replicas import ( SyncReplicaAction, @@ -557,28 +557,28 @@ async def search_legacy_deployments( has_previous_page=result.has_previous_page, ) - async def search_user_deployments( - self, action: SearchUserDeploymentsAction - ) -> SearchUserDeploymentsActionResult: + async def search_user_model_deployments( + self, action: SearchUserModelDeploymentsAction + ) -> SearchUserModelDeploymentsActionResult: """Search the current user's deployments — backs ``my_search``.""" - result = await self._deployment_repository.search_user_deployments( + result = await self._deployment_repository.search_user_model_deployments( action.querier, action.scope ) - return SearchUserDeploymentsActionResult( + return SearchUserModelDeploymentsActionResult( data=result.items, total_count=result.total_count, has_next_page=result.has_next_page, has_previous_page=result.has_previous_page, ) - async def search_project_deployments( - self, action: SearchProjectDeploymentsAction - ) -> SearchProjectDeploymentsActionResult: + async def search_project_model_deployments( + self, action: SearchProjectModelDeploymentsAction + ) -> SearchProjectModelDeploymentsActionResult: """Search deployments inside a project, returning full ``ModelDeploymentData``.""" - result = await self._deployment_repository.search_project_deployments( + result = await self._deployment_repository.search_project_model_deployments( action.querier, action.scope ) - return SearchProjectDeploymentsActionResult( + return SearchProjectModelDeploymentsActionResult( data=result.items, total_count=result.total_count, has_next_page=result.has_next_page,