Skip to content

Commit ce7ae34

Browse files
jopemachineclaude
andcommitted
feat(BA-5978): make BackendAIModel.build_validation_error overridable
Add a public ``build_validation_error`` classmethod on ``BackendAIModel`` that returns the ``BackendAIError`` instance to raise when ``model_validate*`` fails. Default surfaces the generic ``BackendAIModelValidationFailed``; subclasses override the method to inject a domain-specific 400 directly, without any caller-side try/except re-wrap. Apply the override on the two models that previously needed wrapping: * ``ModelDefinition`` raises ``ModelDefinitionValidationError``. Moved that exception class from ``ai.backend.agent.errors.agent`` to ``ai.backend.common.exception`` (and dropped the agent re-export) so the model — which lives in ``common.config`` — can construct it without an upward-layer import. The agent-specific error_type URL segment is dropped in the move. * ``SessionSpec`` raises ``IncompleteSessionSpec`` with the existing ``extra_data["missing"]`` shape, using a module-local ``_format_loc`` helper. The caller-side try/except wrappers around ``ModelDefinition.model_validate`` in ``agent/agent.py`` and ``manager/services/model_card/service.py``, and the wrapper in ``sokovan/scheduling_controller/preparers/session_spec_preparer.py``, are all removed — the models now raise the right domain error directly. Tests stay unchanged: they still expect ``IncompleteSessionSpec`` / ``ModelDefinitionValidationError`` because the override raises the same types. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 202f333 commit ce7ae34

10 files changed

Lines changed: 149 additions & 85 deletions

File tree

src/ai/backend/agent/agent.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,6 @@
158158
AbstractEvent,
159159
)
160160
from ai.backend.common.exception import (
161-
BackendAIModelValidationFailed,
162161
ConfigurationError,
163162
VolumeMountFailed,
164163
)
@@ -236,7 +235,6 @@
236235
ImagePullTimeoutError,
237236
ModelDefinitionEmptyError,
238237
ModelDefinitionNotFoundError,
239-
ModelDefinitionValidationError,
240238
ModelFolderNotSpecifiedError,
241239
PortConflictError,
242240
ReservedPortError,
@@ -3295,13 +3293,7 @@ async def _load_model_definition(
32953293
f" vFolder {model_folder.name} (ID {model_folder.vfid})",
32963294
)
32973295

3298-
try:
3299-
parsed = ModelDefinition.model_validate(inlined)
3300-
except BackendAIModelValidationFailed as e:
3301-
raise ModelDefinitionValidationError(
3302-
"Failed to validate model definition for vFolder"
3303-
f" {model_folder.name} (ID {model_folder.vfid})",
3304-
) from e
3296+
parsed = ModelDefinition.model_validate(inlined)
33053297
if not parsed.models:
33063298
raise ModelDefinitionEmptyError
33073299
model_definition = parsed.model_dump(mode="json")

src/ai/backend/agent/errors/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
ModelDefinitionEmptyError,
2121
ModelDefinitionInvalidYAMLError,
2222
ModelDefinitionNotFoundError,
23-
ModelDefinitionValidationError,
2423
ModelFolderNotSpecifiedError,
2524
PortConflictError,
2625
ReservedPortError,
@@ -64,7 +63,6 @@
6463
"ModelDefinitionEmptyError",
6564
"ModelDefinitionInvalidYAMLError",
6665
"ModelDefinitionNotFoundError",
67-
"ModelDefinitionValidationError",
6866
"ModelFolderNotSpecifiedError",
6967
"PortConflictError",
7068
"ReservedPortError",

src/ai/backend/agent/errors/agent.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -169,20 +169,6 @@ def error_code(self) -> ErrorCode:
169169
)
170170

171171

172-
class ModelDefinitionValidationError(BackendAIError, web.HTTPBadRequest):
173-
"""Raised when model definition validation fails."""
174-
175-
error_type = "https://api.backend.ai/probs/agent/model-definition-validation-failed"
176-
error_title = "Model definition validation failed."
177-
178-
def error_code(self) -> ErrorCode:
179-
return ErrorCode(
180-
domain=ErrorDomain.MODEL_SERVICE,
181-
operation=ErrorOperation.ACCESS,
182-
error_detail=ErrorDetail.INVALID_PARAMETERS,
183-
)
184-
185-
186172
class ModelFolderNotSpecifiedError(BackendAIError, web.HTTPBadRequest):
187173
"""Raised when no model virtual folder is specified."""
188174

src/ai/backend/common/config.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import sys
66
from collections.abc import Mapping, MutableMapping
77
from pathlib import Path
8-
from typing import Any
8+
from typing import Any, override
99

1010
import humps
1111
import tomli
@@ -19,8 +19,8 @@
1919

2020
from . import validators as tx
2121
from .etcd import AsyncEtcd, ConfigScopes
22-
from .exception import ConfigurationError
23-
from .types import BackendAIModel, RedisHelperConfig
22+
from .exception import BackendAIError, ConfigurationError, ModelDefinitionValidationError
23+
from .types import BackendAIModel, ModelValidationFailureInfo, RedisHelperConfig
2424

2525
__all__ = (
2626
"ConfigurationError",
@@ -477,6 +477,14 @@ class ModelDefinition(BaseConfigModel):
477477
description="List of models in the model definition.",
478478
)
479479

480+
@override
481+
@classmethod
482+
def build_validation_error(cls, info: ModelValidationFailureInfo) -> BackendAIError:
483+
return ModelDefinitionValidationError(
484+
extra_msg=info.summary,
485+
extra_data={"errors": info.errors},
486+
)
487+
480488
def merge(self, override: ModelDefinition) -> ModelDefinition:
481489
"""Merge the given override into this definition, returning a new instance."""
482490
return _merge_definition(self, override)
@@ -664,6 +672,14 @@ class ModelDefinitionDraft(BaseConfigModel):
664672

665673
models: list[ModelConfigDraft] | None = None
666674

675+
@override
676+
@classmethod
677+
def build_validation_error(cls, info: ModelValidationFailureInfo) -> BackendAIError:
678+
return ModelDefinitionValidationError(
679+
extra_msg=info.summary,
680+
extra_data={"errors": info.errors},
681+
)
682+
667683
def merge(self, override: ModelDefinitionDraft) -> ModelDefinitionDraft:
668684
"""Merge ``override`` over ``self`` and return a new draft.
669685

src/ai/backend/common/exception.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,24 @@ def error_code(self) -> ErrorCode:
467467
)
468468

469469

470+
class ModelDefinitionValidationError(BackendAIError, web.HTTPBadRequest):
471+
"""Raised by :class:`ai.backend.common.config.ModelDefinition` when
472+
its ``model_validate`` call fails. Lives in ``common`` so the model
473+
itself (also in ``common``) can construct it via
474+
:meth:`BackendAIModel.build_validation_error`, with no caller-side
475+
re-wrap needed."""
476+
477+
error_type = "https://api.backend.ai/probs/model-definition-validation-failed"
478+
error_title = "Model definition validation failed."
479+
480+
def error_code(self) -> ErrorCode:
481+
return ErrorCode(
482+
domain=ErrorDomain.MODEL_SERVICE,
483+
operation=ErrorOperation.ACCESS,
484+
error_detail=ErrorDetail.INVALID_PARAMETERS,
485+
)
486+
487+
470488
class DeprecatedAPI(BackendAIError, web.HTTPBadRequest):
471489
error_type = "https://api.backend.ai/probs/deprecated"
472490
error_title = "This API is deprecated."

src/ai/backend/common/types.py

Lines changed: 59 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,12 @@
5151
TypeAdapter,
5252
ValidationError,
5353
)
54+
from pydantic_core import ErrorDetails
5455
from redis.asyncio import Redis
5556

5657
from .defs import UNKNOWN_CONTAINER_ID, RedisRole
5758
from .exception import (
59+
BackendAIError,
5860
BackendAIModelValidationFailed,
5961
GenericNotImplementedError,
6062
InvalidIpAddressValue,
@@ -116,6 +118,7 @@
116118
"MetricValue",
117119
"ModelServiceProfile",
118120
"ModelServiceStatus",
121+
"ModelValidationFailureInfo",
119122
"MountExpression",
120123
"MountInfoEntry",
121124
"MountPermission",
@@ -181,15 +184,52 @@
181184
)
182185

183186

187+
@dataclass(frozen=True)
188+
class ModelValidationFailureInfo:
189+
"""Stable representation of a failed pydantic ``model_validate*``
190+
call. Passed to :meth:`BackendAIModel.build_validation_error` so
191+
subclasses can produce a domain-specific error without depending
192+
on pydantic's ``ValidationError`` internals.
193+
194+
``summary`` is the multi-line human-readable form (the same string
195+
``str(pydantic.ValidationError)`` produces). ``errors`` is the
196+
per-field list produced by ``exc.errors()``; each entry carries
197+
``type``/``loc``/``msg``/``input``/``ctx``/``url``.
198+
"""
199+
200+
summary: str
201+
errors: list[ErrorDetails]
202+
203+
184204
class BackendAIModel(BaseModel):
185205
"""Project-wide Pydantic base for Backend.AI models.
186206
187207
Overrides ``model_validate`` / ``model_validate_json`` /
188208
``model_validate_strings`` so a ``ValidationError`` is auto-mapped
189-
to :class:`BackendAIModelValidationFailed` (HTTP 400) carrying the structured
209+
to a :class:`BackendAIError` (HTTP 4xx) carrying the structured
190210
per-field error list. Call sites get a clean 4xx without repeating
191211
``try / except ValidationError`` at every site.
192212
213+
The exception instance is produced by :meth:`build_validation_error`,
214+
which defaults to :class:`BackendAIModelValidationFailed`. Subclasses
215+
override the classmethod to surface a domain-specific 400 directly
216+
(no caller-side re-wrap needed). The override receives a
217+
:class:`ModelValidationFailureInfo` (not a raw ``pydantic.ValidationError``)
218+
so subclasses do not depend on pydantic's exception API::
219+
220+
class MyConfig(BackendAIModel):
221+
...
222+
223+
@override
224+
@classmethod
225+
def build_validation_error(
226+
cls, info: ModelValidationFailureInfo
227+
) -> BackendAIError:
228+
return MyConfigParseError(
229+
extra_msg=info.summary,
230+
extra_data={"errors": info.errors},
231+
)
232+
193233
Notes:
194234
195235
* Pydantic v2 routes nested validation through
@@ -202,35 +242,41 @@ class BackendAIModel(BaseModel):
202242
stock Pydantic.
203243
"""
204244

245+
@classmethod
246+
def build_validation_error(cls, info: ModelValidationFailureInfo) -> BackendAIError:
247+
"""Produce the :class:`BackendAIError` to raise when a
248+
``model_validate*`` call fails. Default surfaces a generic
249+
:class:`BackendAIModelValidationFailed`; override on subclasses
250+
to inject a domain-specific 400."""
251+
return BackendAIModelValidationFailed(
252+
extra_msg=info.summary,
253+
extra_data={"errors": info.errors},
254+
)
255+
256+
@classmethod
257+
def _validation_failure_info(cls, exc: ValidationError) -> ModelValidationFailureInfo:
258+
return ModelValidationFailureInfo(summary=str(exc), errors=exc.errors())
259+
205260
@classmethod
206261
def model_validate(cls, *args: Any, **kwargs: Any) -> Self:
207262
try:
208263
return super().model_validate(*args, **kwargs)
209264
except ValidationError as e:
210-
raise BackendAIModelValidationFailed(
211-
extra_msg=str(e),
212-
extra_data={"errors": e.errors()},
213-
) from e
265+
raise cls.build_validation_error(cls._validation_failure_info(e)) from e
214266

215267
@classmethod
216268
def model_validate_json(cls, *args: Any, **kwargs: Any) -> Self:
217269
try:
218270
return super().model_validate_json(*args, **kwargs)
219271
except ValidationError as e:
220-
raise BackendAIModelValidationFailed(
221-
extra_msg=str(e),
222-
extra_data={"errors": e.errors()},
223-
) from e
272+
raise cls.build_validation_error(cls._validation_failure_info(e)) from e
224273

225274
@classmethod
226275
def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Self:
227276
try:
228277
return super().model_validate_strings(*args, **kwargs)
229278
except ValidationError as e:
230-
raise BackendAIModelValidationFailed(
231-
extra_msg=str(e),
232-
extra_data={"errors": e.errors()},
233-
) from e
279+
raise cls.build_validation_error(cls._validation_failure_info(e)) from e
234280

235281

236282
class aobject:

src/ai/backend/manager/data/session/spec.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,22 +17,30 @@
1717
from __future__ import annotations
1818

1919
from collections.abc import Mapping
20-
from typing import Any
20+
from typing import Any, override
2121
from uuid import UUID
2222

2323
import yarl
2424
from pydantic import ConfigDict, Field
2525

26+
from ai.backend.common.exception import BackendAIError
2627
from ai.backend.common.identifier.domain import DomainName
2728
from ai.backend.common.identifier.project import ProjectID
2829
from ai.backend.common.identifier.resource_group import ResourceGroupName
2930
from ai.backend.common.identifier.session import SessionID
30-
from ai.backend.common.types import AccessKey, BackendAIModel, SessionTypes, VFolderMount
31+
from ai.backend.common.types import (
32+
AccessKey,
33+
BackendAIModel,
34+
ModelValidationFailureInfo,
35+
SessionTypes,
36+
VFolderMount,
37+
)
3138
from ai.backend.manager.data.session.options import (
3239
InternalDataExtras,
3340
KernelExecutionSpec,
3441
SessionOptions,
3542
)
43+
from ai.backend.manager.errors.kernel import IncompleteSessionSpec
3644
from ai.backend.manager.models.network import NetworkType
3745

3846

@@ -138,3 +146,22 @@ class SessionSpec(_SpecBaseModel):
138146
options: SessionOptions
139147
kernel_specs: tuple[KernelSpec, ...]
140148
internal_data_extras: InternalDataExtras = Field(default_factory=InternalDataExtras)
149+
150+
@override
151+
@classmethod
152+
def build_validation_error(cls, info: ModelValidationFailureInfo) -> BackendAIError:
153+
missing_paths = [cls._format_loc(tuple(err["loc"])) for err in info.errors]
154+
return IncompleteSessionSpec(
155+
extra_msg="SessionSpec fields not resolved: " + ", ".join(missing_paths),
156+
extra_data={"missing": missing_paths},
157+
)
158+
159+
@staticmethod
160+
def _format_loc(loc: tuple[object, ...]) -> str:
161+
parts: list[str] = []
162+
for item in loc:
163+
if isinstance(item, int):
164+
parts.append(f"[{item}]")
165+
else:
166+
parts.append(f".{item}" if parts else str(item))
167+
return "".join(parts)

src/ai/backend/manager/repositories/deployment/storage_source/storage_source.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,19 @@
11
"""Storage source implementation for deployment repository."""
22

33
from collections.abc import Mapping
4-
from typing import Any
4+
from typing import Any, override
55

66
import tomli
7-
from pydantic import BaseModel, ValidationError
87
from ruamel.yaml import YAML
98

109
from ai.backend.common.config import ModelDefinitionDraft
11-
from ai.backend.common.exception import InvalidAPIParameters
12-
from ai.backend.common.types import VFolderID
10+
from ai.backend.common.exception import BackendAIError, InvalidAPIParameters
11+
from ai.backend.common.types import BackendAIModel, ModelValidationFailureInfo, VFolderID
1312
from ai.backend.manager.data.vfolder.types import VFolderLocation
1413
from ai.backend.manager.models.storage import StorageSessionManager
1514

1615

17-
class DeploymentConfigInput(BaseModel):
16+
class DeploymentConfigInput(BackendAIModel):
1817
"""Validated ``deployment-config.yaml`` / ``service-definition.toml`` payload.
1918
2019
Shared shape across the new yaml name and the legacy toml name — storage
@@ -31,6 +30,14 @@ class DeploymentConfigInput(BaseModel):
3130
resource_opts: dict[str, Any] | None = None
3231
environ: dict[str, str] | None = None
3332

33+
@override
34+
@classmethod
35+
def build_validation_error(cls, info: ModelValidationFailureInfo) -> BackendAIError:
36+
return InvalidAPIParameters(
37+
f"Invalid deployment config: {info.summary}",
38+
extra_data={"errors": info.errors},
39+
)
40+
3441

3542
class DeploymentStorageSource:
3643
"""Storage source for deployment-related file operations."""
@@ -53,10 +60,7 @@ async def fetch_deployment_config(
5360
raw = await self._fetch_config_file_in_candidates(vfolder_location, candidates)
5461
if raw is None:
5562
return None
56-
try:
57-
return DeploymentConfigInput.model_validate(dict(raw))
58-
except ValidationError as e:
59-
raise InvalidAPIParameters(f"Invalid deployment config: {e}") from e
63+
return DeploymentConfigInput.model_validate(dict(raw))
6064

6165
async def fetch_model_definition(
6266
self,
@@ -73,10 +77,7 @@ async def fetch_model_definition(
7377
raw = await self._fetch_config_file_in_candidates(vfolder_location, candidates)
7478
if raw is None:
7579
return None
76-
try:
77-
return ModelDefinitionDraft.model_validate(dict(raw))
78-
except ValidationError as e:
79-
raise InvalidAPIParameters(f"Invalid model definition: {e}") from e
80+
return ModelDefinitionDraft.model_validate(dict(raw))
8081

8182
async def _fetch_config_file_in_candidates(
8283
self,

0 commit comments

Comments
 (0)