Skip to content

Commit 7e3616d

Browse files
jopemachineclaude
andcommitted
feat(BA-5978): make BackendAIModel.build_validation_error overridable
Add a public ``build_validation_error`` classmethod on ``BackendAIModel`` that returns the ``BackendAIError`` instance to raise when ``model_validate*`` fails. Default surfaces the generic ``BackendAIModelValidationFailed``; subclasses override the method to inject a domain-specific 400 directly, without any caller-side try/except re-wrap. Apply the override on the two models that previously needed wrapping: * ``ModelDefinition`` raises ``ModelDefinitionValidationError``. Moved that exception class from ``ai.backend.agent.errors.agent`` to ``ai.backend.common.exception`` (and dropped the agent re-export) so the model — which lives in ``common.config`` — can construct it without an upward-layer import. The agent-specific error_type URL segment is dropped in the move. * ``SessionSpec`` raises ``IncompleteSessionSpec`` with the existing ``extra_data["missing"]`` shape, using a module-local ``_format_loc`` helper. The caller-side try/except wrappers around ``ModelDefinition.model_validate`` in ``agent/agent.py`` and ``manager/services/model_card/service.py``, and the wrapper in ``sokovan/scheduling_controller/preparers/session_spec_preparer.py``, are all removed — the models now raise the right domain error directly. Tests stay unchanged: they still expect ``IncompleteSessionSpec`` / ``ModelDefinitionValidationError`` because the override raises the same types. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 202f333 commit 7e3616d

9 files changed

Lines changed: 126 additions & 77 deletions

File tree

src/ai/backend/agent/agent.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,6 @@
158158
AbstractEvent,
159159
)
160160
from ai.backend.common.exception import (
161-
BackendAIModelValidationFailed,
162161
ConfigurationError,
163162
VolumeMountFailed,
164163
)
@@ -236,7 +235,6 @@
236235
ImagePullTimeoutError,
237236
ModelDefinitionEmptyError,
238237
ModelDefinitionNotFoundError,
239-
ModelDefinitionValidationError,
240238
ModelFolderNotSpecifiedError,
241239
PortConflictError,
242240
ReservedPortError,
@@ -3295,13 +3293,7 @@ async def _load_model_definition(
32953293
f" vFolder {model_folder.name} (ID {model_folder.vfid})",
32963294
)
32973295

3298-
try:
3299-
parsed = ModelDefinition.model_validate(inlined)
3300-
except BackendAIModelValidationFailed as e:
3301-
raise ModelDefinitionValidationError(
3302-
"Failed to validate model definition for vFolder"
3303-
f" {model_folder.name} (ID {model_folder.vfid})",
3304-
) from e
3296+
parsed = ModelDefinition.model_validate(inlined)
33053297
if not parsed.models:
33063298
raise ModelDefinitionEmptyError
33073299
model_definition = parsed.model_dump(mode="json")

src/ai/backend/agent/errors/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
ModelDefinitionEmptyError,
2121
ModelDefinitionInvalidYAMLError,
2222
ModelDefinitionNotFoundError,
23-
ModelDefinitionValidationError,
2423
ModelFolderNotSpecifiedError,
2524
PortConflictError,
2625
ReservedPortError,
@@ -64,7 +63,6 @@
6463
"ModelDefinitionEmptyError",
6564
"ModelDefinitionInvalidYAMLError",
6665
"ModelDefinitionNotFoundError",
67-
"ModelDefinitionValidationError",
6866
"ModelFolderNotSpecifiedError",
6967
"PortConflictError",
7068
"ReservedPortError",

src/ai/backend/agent/errors/agent.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -169,20 +169,6 @@ def error_code(self) -> ErrorCode:
169169
)
170170

171171

172-
class ModelDefinitionValidationError(BackendAIError, web.HTTPBadRequest):
173-
"""Raised when model definition validation fails."""
174-
175-
error_type = "https://api.backend.ai/probs/agent/model-definition-validation-failed"
176-
error_title = "Model definition validation failed."
177-
178-
def error_code(self) -> ErrorCode:
179-
return ErrorCode(
180-
domain=ErrorDomain.MODEL_SERVICE,
181-
operation=ErrorOperation.ACCESS,
182-
error_detail=ErrorDetail.INVALID_PARAMETERS,
183-
)
184-
185-
186172
class ModelFolderNotSpecifiedError(BackendAIError, web.HTTPBadRequest):
187173
"""Raised when no model virtual folder is specified."""
188174

src/ai/backend/common/config.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import sys
66
from collections.abc import Mapping, MutableMapping
77
from pathlib import Path
8-
from typing import Any
8+
from typing import Any, override
99

1010
import humps
1111
import tomli
@@ -19,8 +19,8 @@
1919

2020
from . import validators as tx
2121
from .etcd import AsyncEtcd, ConfigScopes
22-
from .exception import ConfigurationError
23-
from .types import BackendAIModel, RedisHelperConfig
22+
from .exception import BackendAIError, ConfigurationError, ModelDefinitionValidationError
23+
from .types import BackendAIModel, ModelValidationFailureInfo, RedisHelperConfig
2424

2525
__all__ = (
2626
"ConfigurationError",
@@ -477,6 +477,14 @@ class ModelDefinition(BaseConfigModel):
477477
description="List of models in the model definition.",
478478
)
479479

480+
@override
481+
@classmethod
482+
def build_validation_error(cls, info: ModelValidationFailureInfo) -> BackendAIError:
483+
return ModelDefinitionValidationError(
484+
extra_msg=info.summary,
485+
extra_data={"errors": info.errors},
486+
)
487+
480488
def merge(self, override: ModelDefinition) -> ModelDefinition:
481489
"""Merge the given override into this definition, returning a new instance."""
482490
return _merge_definition(self, override)

src/ai/backend/common/exception.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,24 @@ def error_code(self) -> ErrorCode:
467467
)
468468

469469

470+
class ModelDefinitionValidationError(BackendAIError, web.HTTPBadRequest):
471+
"""Raised by :class:`ai.backend.common.config.ModelDefinition` when
472+
its ``model_validate`` call fails. Lives in ``common`` so the model
473+
itself (also in ``common``) can construct it via
474+
:meth:`BackendAIModel.build_validation_error`, with no caller-side
475+
re-wrap needed."""
476+
477+
error_type = "https://api.backend.ai/probs/model-definition-validation-failed"
478+
error_title = "Model definition validation failed."
479+
480+
def error_code(self) -> ErrorCode:
481+
return ErrorCode(
482+
domain=ErrorDomain.MODEL_SERVICE,
483+
operation=ErrorOperation.ACCESS,
484+
error_detail=ErrorDetail.INVALID_PARAMETERS,
485+
)
486+
487+
470488
class DeprecatedAPI(BackendAIError, web.HTTPBadRequest):
471489
error_type = "https://api.backend.ai/probs/deprecated"
472490
error_title = "This API is deprecated."

src/ai/backend/common/types.py

Lines changed: 59 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,12 @@
5151
TypeAdapter,
5252
ValidationError,
5353
)
54+
from pydantic_core import ErrorDetails
5455
from redis.asyncio import Redis
5556

5657
from .defs import UNKNOWN_CONTAINER_ID, RedisRole
5758
from .exception import (
59+
BackendAIError,
5860
BackendAIModelValidationFailed,
5961
GenericNotImplementedError,
6062
InvalidIpAddressValue,
@@ -116,6 +118,7 @@
116118
"MetricValue",
117119
"ModelServiceProfile",
118120
"ModelServiceStatus",
121+
"ModelValidationFailureInfo",
119122
"MountExpression",
120123
"MountInfoEntry",
121124
"MountPermission",
@@ -181,15 +184,52 @@
181184
)
182185

183186

187+
@dataclass(frozen=True)
188+
class ModelValidationFailureInfo:
189+
"""Stable representation of a failed pydantic ``model_validate*``
190+
call. Passed to :meth:`BackendAIModel.build_validation_error` so
191+
subclasses can produce a domain-specific error without depending
192+
on pydantic's ``ValidationError`` internals.
193+
194+
``summary`` is the multi-line human-readable form (the same string
195+
``str(pydantic.ValidationError)`` produces). ``errors`` is the
196+
per-field list produced by ``exc.errors()``; each entry carries
197+
``type``/``loc``/``msg``/``input``/``ctx``/``url``.
198+
"""
199+
200+
summary: str
201+
errors: list[ErrorDetails]
202+
203+
184204
class BackendAIModel(BaseModel):
185205
"""Project-wide Pydantic base for Backend.AI models.
186206
187207
Overrides ``model_validate`` / ``model_validate_json`` /
188208
``model_validate_strings`` so a ``ValidationError`` is auto-mapped
189-
to :class:`BackendAIModelValidationFailed` (HTTP 400) carrying the structured
209+
to a :class:`BackendAIError` (HTTP 4xx) carrying the structured
190210
per-field error list. Call sites get a clean 4xx without repeating
191211
``try / except ValidationError`` at every site.
192212
213+
The exception instance is produced by :meth:`build_validation_error`,
214+
which defaults to :class:`BackendAIModelValidationFailed`. Subclasses
215+
override the classmethod to surface a domain-specific 400 directly
216+
(no caller-side re-wrap needed). The override receives a
217+
:class:`ModelValidationFailureInfo` (not a raw ``pydantic.ValidationError``)
218+
so subclasses do not depend on pydantic's exception API::
219+
220+
class MyConfig(BackendAIModel):
221+
...
222+
223+
@override
224+
@classmethod
225+
def build_validation_error(
226+
cls, info: ModelValidationFailureInfo
227+
) -> BackendAIError:
228+
return MyConfigParseError(
229+
extra_msg=info.summary,
230+
extra_data={"errors": info.errors},
231+
)
232+
193233
Notes:
194234
195235
* Pydantic v2 routes nested validation through
@@ -202,35 +242,41 @@ class BackendAIModel(BaseModel):
202242
stock Pydantic.
203243
"""
204244

245+
@classmethod
246+
def build_validation_error(cls, info: ModelValidationFailureInfo) -> BackendAIError:
247+
"""Produce the :class:`BackendAIError` to raise when a
248+
``model_validate*`` call fails. Default surfaces a generic
249+
:class:`BackendAIModelValidationFailed`; override on subclasses
250+
to inject a domain-specific 400."""
251+
return BackendAIModelValidationFailed(
252+
extra_msg=info.summary,
253+
extra_data={"errors": info.errors},
254+
)
255+
256+
@classmethod
257+
def _validation_failure_info(cls, exc: ValidationError) -> ModelValidationFailureInfo:
258+
return ModelValidationFailureInfo(summary=str(exc), errors=exc.errors())
259+
205260
@classmethod
206261
def model_validate(cls, *args: Any, **kwargs: Any) -> Self:
207262
try:
208263
return super().model_validate(*args, **kwargs)
209264
except ValidationError as e:
210-
raise BackendAIModelValidationFailed(
211-
extra_msg=str(e),
212-
extra_data={"errors": e.errors()},
213-
) from e
265+
raise cls.build_validation_error(cls._validation_failure_info(e)) from e
214266

215267
@classmethod
216268
def model_validate_json(cls, *args: Any, **kwargs: Any) -> Self:
217269
try:
218270
return super().model_validate_json(*args, **kwargs)
219271
except ValidationError as e:
220-
raise BackendAIModelValidationFailed(
221-
extra_msg=str(e),
222-
extra_data={"errors": e.errors()},
223-
) from e
272+
raise cls.build_validation_error(cls._validation_failure_info(e)) from e
224273

225274
@classmethod
226275
def model_validate_strings(cls, *args: Any, **kwargs: Any) -> Self:
227276
try:
228277
return super().model_validate_strings(*args, **kwargs)
229278
except ValidationError as e:
230-
raise BackendAIModelValidationFailed(
231-
extra_msg=str(e),
232-
extra_data={"errors": e.errors()},
233-
) from e
279+
raise cls.build_validation_error(cls._validation_failure_info(e)) from e
234280

235281

236282
class aobject:

src/ai/backend/manager/data/session/spec.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,22 +17,30 @@
1717
from __future__ import annotations
1818

1919
from collections.abc import Mapping
20-
from typing import Any
20+
from typing import Any, override
2121
from uuid import UUID
2222

2323
import yarl
2424
from pydantic import ConfigDict, Field
2525

26+
from ai.backend.common.exception import BackendAIError
2627
from ai.backend.common.identifier.domain import DomainName
2728
from ai.backend.common.identifier.project import ProjectID
2829
from ai.backend.common.identifier.resource_group import ResourceGroupName
2930
from ai.backend.common.identifier.session import SessionID
30-
from ai.backend.common.types import AccessKey, BackendAIModel, SessionTypes, VFolderMount
31+
from ai.backend.common.types import (
32+
AccessKey,
33+
BackendAIModel,
34+
ModelValidationFailureInfo,
35+
SessionTypes,
36+
VFolderMount,
37+
)
3138
from ai.backend.manager.data.session.options import (
3239
InternalDataExtras,
3340
KernelExecutionSpec,
3441
SessionOptions,
3542
)
43+
from ai.backend.manager.errors.kernel import IncompleteSessionSpec
3644
from ai.backend.manager.models.network import NetworkType
3745

3846

@@ -138,3 +146,22 @@ class SessionSpec(_SpecBaseModel):
138146
options: SessionOptions
139147
kernel_specs: tuple[KernelSpec, ...]
140148
internal_data_extras: InternalDataExtras = Field(default_factory=InternalDataExtras)
149+
150+
@override
151+
@classmethod
152+
def build_validation_error(cls, info: ModelValidationFailureInfo) -> BackendAIError:
153+
missing_paths = [cls._format_loc(tuple(err["loc"])) for err in info.errors]
154+
return IncompleteSessionSpec(
155+
extra_msg="SessionSpec fields not resolved: " + ", ".join(missing_paths),
156+
extra_data={"missing": missing_paths},
157+
)
158+
159+
@staticmethod
160+
def _format_loc(loc: tuple[object, ...]) -> str:
161+
parts: list[str] = []
162+
for item in loc:
163+
if isinstance(item, int):
164+
parts.append(f"[{item}]")
165+
else:
166+
parts.append(f".{item}" if parts else str(item))
167+
return "".join(parts)

src/ai/backend/manager/services/model_card/service.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from ruamel.yaml import YAML
55

66
from ai.backend.common.config import ModelDefinition
7-
from ai.backend.common.exception import BackendAIModelValidationFailed
87
from ai.backend.common.types import VFolderID
98
from ai.backend.logging.utils import BraceStyleAdapter
109
from ai.backend.manager.clients.storage_proxy.session_manager import StorageSessionManager
@@ -182,12 +181,7 @@ async def _scan_vfolder(
182181
except Exception as e:
183182
raise ModelCardParseError(extra_msg=f"invalid YAML in {model_def_filename}: {e}") from e
184183

185-
try:
186-
model_def = ModelDefinition.model_validate(parsed)
187-
except BackendAIModelValidationFailed as e:
188-
raise ModelCardParseError(
189-
extra_msg=f"invalid model definition in {model_def_filename}: {e}"
190-
) from e
184+
model_def = ModelDefinition.model_validate(parsed)
191185
if not model_def.models:
192186
raise ModelCardParseError(extra_msg=f"no models defined in {model_def_filename}")
193187

src/ai/backend/manager/sokovan/scheduling_controller/preparers/session_spec_preparer.py

Lines changed: 7 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
feeds :meth:`SessionSpec.model_validate`, so the spec schema is the
77
single source of truth for both "what must be set" and error-path
88
reporting. Any field that should have been resolved but still sits at
9-
``None`` surfaces as a ``ValidationError`` entry whose ``loc`` is the
10-
exact attribute path, re-wrapped into :class:`IncompleteSessionSpec`
11-
without any hand-maintained path strings.
9+
``None`` is surfaced by ``BackendAIModel.model_validate`` as a
10+
:class:`BackendAIModelValidationFailed` (HTTP 400) whose ``loc``
11+
carries the exact attribute path — no hand-maintained path strings.
1212
1313
The prior dict-based ``SessionPreparer`` (producing ``SessionEnqueueData``)
1414
has been retired — this runner is the only path from caller input
@@ -19,10 +19,8 @@
1919

2020
from collections.abc import Iterable
2121

22-
from ai.backend.common.exception import BackendAIModelValidationFailed
2322
from ai.backend.manager.data.session.draft import SessionSpecDraft
2423
from ai.backend.manager.data.session.spec import SessionSpec
25-
from ai.backend.manager.errors.kernel import IncompleteSessionSpec
2624
from ai.backend.manager.sokovan.scheduling_controller.preparers.draft_rule import (
2725
SessionSpecDraftRule,
2826
SessionSpecPreparationContext,
@@ -59,26 +57,8 @@ def _finalize(self, draft: SessionSpecDraft) -> SessionSpec:
5957
"""Project a fully-prepared draft into a frozen ``SessionSpec``.
6058
6159
Draft fields left at ``None`` (never populated by a rule) drop
62-
out of the dump and surface as ``ValidationError`` entries
63-
pointing at the exact attribute path on the spec. Those are
64-
collected and re-raised as :class:`IncompleteSessionSpec`.
60+
out of the dump and surface as a
61+
:class:`BackendAIModelValidationFailed` entry whose ``loc`` is
62+
the exact attribute path on the spec.
6563
"""
66-
try:
67-
return SessionSpec.model_validate(draft.model_dump(exclude_none=True))
68-
except BackendAIModelValidationFailed as exc:
69-
errors = (exc.extra_data or {}).get("errors", [])
70-
missing_paths = [self._format_loc(err["loc"]) for err in errors]
71-
raise IncompleteSessionSpec(
72-
extra_msg="SessionSpec fields not resolved: " + ", ".join(missing_paths),
73-
extra_data={"missing": missing_paths},
74-
) from exc
75-
76-
@staticmethod
77-
def _format_loc(loc: tuple[object, ...]) -> str:
78-
parts: list[str] = []
79-
for item in loc:
80-
if isinstance(item, int):
81-
parts.append(f"[{item}]")
82-
else:
83-
parts.append(f".{item}" if parts else str(item))
84-
return "".join(parts)
64+
return SessionSpec.model_validate(draft.model_dump(exclude_none=True))

0 commit comments

Comments
 (0)