Skip to content

Commit 51d6b26

Browse files
committed
fix: tighten pydantic validation contracts
1 parent 2ab7c94 commit 51d6b26

6 files changed

Lines changed: 183 additions & 46 deletions

File tree

src/excelalchemy/adapters/pydantic.py

Lines changed: 110 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,65 @@ def _normalize_validation_message(
132132
return NormalizedValidationMessage(normalized)
133133

134134

135+
def _normalize_pydantic_error(
136+
error: Mapping[str, object],
137+
field_def: FieldMetaInfo | None = None,
138+
*,
139+
excel_codec: type[ExcelFieldCodec] | None = None,
140+
) -> NormalizedValidationMessage:
141+
normalized_message = _normalize_constraint_error(error, field_def, excel_codec=excel_codec)
142+
if normalized_message is not None:
143+
return normalized_message
144+
return _normalize_validation_message(error.get('msg', ''), field_def, excel_codec=excel_codec)
145+
146+
147+
def _normalize_constraint_error(
148+
error: Mapping[str, object],
149+
field_def: FieldMetaInfo | None,
150+
*,
151+
excel_codec: type[ExcelFieldCodec] | None = None,
152+
) -> NormalizedValidationMessage | None:
153+
if field_def is None:
154+
return None
155+
156+
error_type = error.get('type')
157+
ctx = error.get('ctx')
158+
if not isinstance(ctx, Mapping):
159+
return None
160+
161+
ctx = cast(Mapping[object, object], ctx)
162+
field_type = ctx.get('field_type')
163+
constraints = field_def.constraints
164+
if error_type == 'too_short' and field_type == 'List':
165+
min_items = _int_from_context(ctx, 'min_length') or constraints.min_items
166+
if min_items is not None:
167+
return NormalizedValidationMessage.from_key(MessageKey.MIN_ITEMS_REQUIRED, min_items=min_items)
168+
169+
if error_type == 'too_long' and field_type == 'List':
170+
max_items = _int_from_context(ctx, 'max_length') or constraints.max_items
171+
if max_items is not None:
172+
return NormalizedValidationMessage.from_key(MessageKey.MAX_ITEMS_ALLOWED, max_items=max_items)
173+
174+
if error_type == 'string_too_short':
175+
min_length = _int_from_context(ctx, 'min_length') or constraints.min_length
176+
if min_length is not None:
177+
return NormalizedValidationMessage.from_key(MessageKey.MIN_LENGTH_CHARACTERS, min_length=min_length)
178+
179+
if error_type == 'string_too_long':
180+
max_length = _int_from_context(ctx, 'max_length') or constraints.max_length
181+
if max_length is not None:
182+
return NormalizedValidationMessage.from_key(MessageKey.MAX_LENGTH_CHARACTERS, max_length=max_length)
183+
184+
return _normalize_constraint_message(str(error.get('msg', '')), field_def, excel_codec=excel_codec)
185+
186+
187+
def _int_from_context(ctx: Mapping[object, object], key: str) -> int | None:
188+
value = ctx.get(key)
189+
if isinstance(value, int):
190+
return value
191+
return None
192+
193+
135194
def _normalize_constraint_message(
136195
message: str,
137196
field_def: FieldMetaInfo | None,
@@ -293,10 +352,50 @@ def fields(self) -> Iterable[PydanticFieldAdapter]:
293352
def field(self, name: str) -> PydanticFieldAdapter:
294353
return PydanticFieldAdapter(name=name, raw_field=self.model.model_fields[name])
295354

355+
def field_for_validation_location(self, location: str) -> PydanticFieldAdapter | None:
356+
field_name = self._field_name_for_validation_location(location)
357+
if field_name is None:
358+
return None
359+
return self.field(field_name)
360+
361+
def _field_name_for_validation_location(self, location: str) -> str | None:
362+
if location in self.model.model_fields:
363+
return location
364+
365+
for name, field_info in self.model.model_fields.items():
366+
if location in _validation_locations_for_field(field_info):
367+
return name
368+
369+
return None
370+
296371
def field_names(self) -> list[str]:
297372
return list(self.model.model_fields.keys())
298373

299374

375+
def _validation_locations_for_field(field_info: FieldInfo) -> set[str]:
376+
locations: set[str] = set()
377+
if isinstance(field_info.alias, str):
378+
locations.add(field_info.alias)
379+
_collect_validation_alias_locations(field_info.validation_alias, locations)
380+
return locations
381+
382+
383+
def _collect_validation_alias_locations(alias: object, locations: set[str]) -> None:
384+
if isinstance(alias, str):
385+
locations.add(alias)
386+
return
387+
388+
choices = getattr(alias, 'choices', None)
389+
if isinstance(choices, (list, tuple)):
390+
for choice in cast(Iterable[object], choices):
391+
_collect_validation_alias_locations(choice, locations)
392+
return
393+
394+
path = getattr(alias, 'path', None)
395+
if isinstance(path, (list, tuple)) and path and isinstance(path[0], str):
396+
locations.add(path[0])
397+
398+
300399
def extract_pydantic_model(
301400
model: type[BaseModel] | None,
302401
) -> list[FieldMetaInfo]:
@@ -383,7 +482,7 @@ def _model_validate[ModelT: BaseModel](
383482
failed_fields: set[str],
384483
) -> ModelT | list[ExcelCellError | ExcelRowError]:
385484
try:
386-
return model.model_validate(data)
485+
return model.model_validate(data, by_alias=False, by_name=True)
387486
except ValidationError as exc:
388487
return _map_validation_error(exc, model_adapter, failed_fields)
389488

@@ -397,21 +496,26 @@ def _map_validation_error(
397496
for error in exc.errors():
398497
loc = error.get('loc', ())
399498
if not loc:
400-
normalized = _normalize_validation_message(str(error['msg']))
499+
normalized = _normalize_pydantic_error(error)
401500
mapped.append(_build_row_error(normalized))
402501
continue
403502

404503
field_name = loc[0]
405504
if not isinstance(field_name, str):
406-
normalized = _normalize_validation_message(str(error['msg']))
505+
normalized = _normalize_pydantic_error(error)
407506
mapped.append(_build_row_error(normalized))
408507
continue
409508
if field_name in failed_fields:
410509
continue
411510

412-
field_adapter = model_adapter.field(field_name)
413-
normalized = _normalize_validation_message(
414-
str(error['msg']),
511+
field_adapter = model_adapter.field_for_validation_location(field_name)
512+
if field_adapter is None:
513+
normalized = _normalize_pydantic_error(error)
514+
mapped.append(_build_row_error(normalized))
515+
continue
516+
517+
normalized = _normalize_pydantic_error(
518+
error,
415519
field_adapter.declared_metadata,
416520
excel_codec=field_adapter.excel_codec,
417521
)

src/excelalchemy/results/import_result.py

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -22,29 +22,21 @@ class ValidateResult(StrEnum):
2222
class ImportResult(BaseModel):
2323
"""Structured result returned from an import run."""
2424

25-
model_config = ConfigDict(extra='allow')
25+
# fmt: off
26+
model_config = ConfigDict(extra='forbid')
2627

2728
result: ValidateResult = Field(description='Overall import result.')
2829

2930
is_required_missing: bool = Field(default=False, description='Whether required headers are missing.')
30-
missing_required: list[Label] = Field(
31-
default_factory=empty_labels, description='Required headers missing from the workbook.'
32-
)
33-
missing_primary: list[Label] = Field(
34-
default_factory=empty_labels, description='Primary-key headers missing from the workbook.'
35-
)
36-
unrecognized: list[Label] = Field(
37-
default_factory=empty_labels, description='Headers present in the workbook but unknown to the schema.'
38-
)
39-
duplicated: list[Label] = Field(
40-
default_factory=empty_labels, description='Headers that appear more than once in the workbook.'
41-
)
42-
43-
url: str | None = Field(
44-
default=None, description='Download URL for the import result workbook when one is produced.'
45-
)
31+
missing_required: list[Label] = Field(default_factory=empty_labels, description='Required headers missing from the workbook.')
32+
missing_primary: list[Label] = Field(default_factory=empty_labels, description='Primary-key headers missing from the workbook.')
33+
unrecognized: list[Label] = Field(default_factory=empty_labels, description='Headers present in the workbook but unknown to the schema.')
34+
duplicated: list[Label] = Field(default_factory=empty_labels, description='Headers that appear more than once in the workbook.')
35+
36+
url: str | None = Field(default=None, description='Download URL for the import result workbook when one is produced.')
4637
success_count: int = Field(default=0, description='Number of rows imported successfully.')
4738
fail_count: int = Field(default=0, description='Number of rows that failed to import.')
39+
# fmt: on
4840

4941
@property
5042
def is_success(self) -> bool:

src/excelalchemy/results/preflight.py

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,13 @@ def empty_labels() -> list[Label]:
1414
class ValidateHeaderResult(BaseModel):
1515
"""Header validation result."""
1616

17+
# fmt: off
1718
missing_required: list[Label] = Field(description='Required headers missing from the workbook.')
1819
missing_primary: list[Label] = Field(description='Primary-key headers missing from the workbook.')
1920
unrecognized: list[Label] = Field(description='Headers present in the workbook but unknown to the schema.')
2021
duplicated: list[Label] = Field(description='Headers that appear more than once in the workbook.')
2122
is_valid: bool = Field(default=True, description='Whether header validation succeeded.')
23+
# fmt: on
2224

2325
@property
2426
def is_required_missing(self) -> bool:
@@ -38,32 +40,20 @@ class ImportPreflightStatus(StrEnum):
3840
class ImportPreflightResult(BaseModel):
3941
"""Structured result returned from lightweight import preflight."""
4042

43+
# fmt: off
4144
status: ImportPreflightStatus = Field(description='Overall preflight result.')
4245
sheet_name: str = Field(description='Configured worksheet name used for preflight.')
4346
sheet_exists: bool = Field(description='Whether the configured worksheet was found.')
44-
has_merged_header: bool | None = Field(
45-
default=None,
46-
description='Whether the workbook uses a merged two-row header when the header block was readable.',
47-
)
47+
has_merged_header: bool | None = Field(default=None, description='Whether the workbook uses a merged two-row header when the header block was readable.')
4848
estimated_row_count: int = Field(default=0, description='Estimated number of data rows for a later import run.')
49-
structural_issue_codes: list[str] = Field(
50-
default_factory=list,
51-
description='Stable structural issue codes emitted for non-header preflight failures.',
52-
)
49+
structural_issue_codes: list[str] = Field(default_factory=list, description='Stable structural issue codes emitted for non-header preflight failures.')
5350

5451
is_required_missing: bool = Field(default=False, description='Whether required headers are missing.')
55-
missing_required: list[Label] = Field(
56-
default_factory=empty_labels, description='Required headers missing from the workbook.'
57-
)
58-
missing_primary: list[Label] = Field(
59-
default_factory=empty_labels, description='Primary-key headers missing from the workbook.'
60-
)
61-
unrecognized: list[Label] = Field(
62-
default_factory=empty_labels, description='Headers present in the workbook but unknown to the schema.'
63-
)
64-
duplicated: list[Label] = Field(
65-
default_factory=empty_labels, description='Headers that appear more than once in the workbook.'
66-
)
52+
missing_required: list[Label] = Field(default_factory=empty_labels, description='Required headers missing from the workbook.')
53+
missing_primary: list[Label] = Field(default_factory=empty_labels, description='Primary-key headers missing from the workbook.')
54+
unrecognized: list[Label] = Field(default_factory=empty_labels, description='Headers present in the workbook but unknown to the schema.')
55+
duplicated: list[Label] = Field(default_factory=empty_labels, description='Headers that appear more than once in the workbook.')
56+
# fmt: on
6757

6858
@property
6959
def is_valid(self) -> bool:

src/excelalchemy/worksheet/header.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,11 @@
1010
class ExcelHeader(BaseModel):
1111
"""Normalized worksheet header extracted from user input."""
1212

13+
# fmt: off
1314
label: Label = Field(description='Worksheet header label.')
14-
parent_label: Label = Field(
15-
description='Parent worksheet header label. Falls back to the label itself for flat headers.'
16-
)
15+
parent_label: Label = Field(description='Parent worksheet header label. Falls back to the label itself for flat headers.')
1716
offset: int = Field(default=0, description='Child-column offset under a merged parent header.')
17+
# fmt: on
1818

1919
@property
2020
def unique_label(self) -> UniqueLabel:

tests/contracts/test_pydantic_contract.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
ExcelFieldCodec,
1313
ExcelRowError,
1414
Label,
15+
MultiChoiceCodec,
16+
Option,
17+
OptionId,
1518
ProgrammaticError,
1619
)
1720
from excelalchemy.adapters.pydantic import extract_pydantic_model, instantiate_pydantic_model
@@ -132,6 +135,49 @@ def must_use_company_domain(cls, value: str) -> str:
132135
assert isinstance(wrong_domain, list)
133136
assert wrong_domain == [ExcelCellError(label=Label('邮箱'), message='Must use the company domain')]
134137

138+
def test_instantiate_pydantic_model_validates_by_model_field_name_when_field_has_alias(self):
139+
class AliasedModel(BaseModel):
140+
full_name: Annotated[str, Field(alias='fullName'), ExcelColumn(label='姓名', order=1)]
141+
142+
result = instantiate_pydantic_model({'full_name': 'Alice'}, AliasedModel)
143+
144+
assert isinstance(result, AliasedModel)
145+
assert result.full_name == 'Alice'
146+
147+
def test_instantiate_pydantic_model_maps_aliased_field_errors_to_excel_labels(self):
148+
class AliasedModel(BaseModel):
149+
full_name: Annotated[str, Field(alias='fullName', min_length=5), ExcelColumn(label='姓名', order=1)]
150+
151+
result = instantiate_pydantic_model({'full_name': 'Al'}, AliasedModel)
152+
153+
assert isinstance(result, list)
154+
assert result == [
155+
ExcelCellError(label=Label('姓名'), message='The minimum length is 5 characters', min_length=5)
156+
]
157+
158+
def test_instantiate_pydantic_model_normalizes_list_constraint_errors_from_pydantic_context(self):
159+
options = [
160+
Option(id=OptionId('a'), name='A'),
161+
Option(id=OptionId('b'), name='B'),
162+
Option(id=OptionId('c'), name='C'),
163+
]
164+
165+
class MultiChoiceModel(BaseModel):
166+
choices: Annotated[
167+
list[str],
168+
Field(min_length=2, max_length=2),
169+
ExcelColumn(codec=MultiChoiceCodec(), label='选项', order=1, options=options),
170+
]
171+
172+
too_short = instantiate_pydantic_model({'choices': ['A']}, MultiChoiceModel)
173+
too_long = instantiate_pydantic_model({'choices': ['A', 'B', 'C']}, MultiChoiceModel)
174+
175+
assert isinstance(too_short, list)
176+
assert too_short == [ExcelCellError(label=Label('选项'), message='Select at least 2 items', min_items=2)]
177+
178+
assert isinstance(too_long, list)
179+
assert too_long == [ExcelCellError(label=Label('选项'), message='Select no more than 2 items', max_items=2)]
180+
135181
def test_instantiate_pydantic_model_maps_model_validators_to_row_errors(self):
136182
class ModelValidatedContract(BaseModel):
137183
email: Annotated[str, ExcelColumn(codec=EmailCodec(), label='邮箱', order=1)]

tests/contracts/test_result_contract.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import pytest
2+
from pydantic import ValidationError
23

34
from excelalchemy import ImportPreflightStatus, Label, ProgrammaticError, ValidateResult
45
from excelalchemy.results import (
@@ -57,6 +58,10 @@ def test_import_result_returns_success_defaults_for_success_case(self):
5758
assert result.is_header_invalid is False
5859
assert result.is_data_invalid is False
5960

61+
def test_import_result_rejects_unknown_fields(self):
62+
with pytest.raises(ValidationError):
63+
ImportResult(result=ValidateResult.SUCCESS, legacy_field='kept')
64+
6065
def test_import_result_to_api_payload_for_success_case(self):
6166
result = ImportResult(result=ValidateResult.SUCCESS, success_count=1, fail_count=0, url='memory://result.xlsx')
6267

0 commit comments

Comments
 (0)