Skip to content

Commit 576de6a

Browse files
mvansegbroeckclaude
andcommitted
fix(validator): tolerate null proposed_label in RawValidationDecisionSchema
Mirrors the _coerce_proposed_label validator already on ValidationDecisionSchema. The chunked-validation path (added in #126, merged into this PR via ca08fb2) routes LLM output through RawValidationDecisionSchema instead of the wire-loose ValidationDecisionSchema, so the existing coercer didn't apply on that path. Failure mode (gemma4-e4b on the post-merge bench): model emits ``proposed_label: null`` for ``decision: keep`` decisions; pydantic rejects the entire chunk's records with ``string_type`` errors; DataDesigner drops the row. On a 5-note × 2-mode bench across 5 candidate models, this caused gemma4-e4b's success rate to crater to 1/5 redact + 2/5 rewrite even though the model otherwise delivers best-in-class quality (util 0.96 / leak 0.03 on the rows that survived). Tests cover null and int-typed proposed_label drift. 501 tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent dc0bc84 commit 576de6a

2 files changed

Lines changed: 48 additions & 0 deletions

File tree

src/anonymizer/engine/schemas/detection.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,23 @@ class RawValidationDecisionSchema(BaseModel):
6060
proposed_label: str = Field(default="")
6161
reason: str | None = None
6262

63+
@field_validator("proposed_label", mode="before")
64+
@classmethod
65+
def _coerce_proposed_label(cls, v: object) -> object:
66+
"""Mirror ValidationDecisionSchema._coerce_proposed_label.
67+
68+
Small models (gemma4-e4b on the chunked-validation path) emit
69+
``proposed_label: null`` when the decision is "keep" — pydantic
70+
otherwise rejects None for the str-typed field, dropping the
71+
whole record. The validator chunk schema needs the same loose
72+
coercion the wire ValidationDecisionSchema already has.
73+
"""
74+
if v is None:
75+
return ""
76+
if isinstance(v, (int, float, bool)):
77+
return str(v)
78+
return v
79+
6380

6481
class RawValidationDecisionsSchema(BaseModel):
6582
decisions: list[RawValidationDecisionSchema] = Field(default_factory=list)

tests/engine/test_schemas.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,37 @@ def test_raw_validation_decisions_payload_from_raw_list() -> None:
126126
assert payload.decisions[0].decision.value == "keep"
127127

128128

129+
def test_raw_validation_decisions_coerces_null_proposed_label() -> None:
130+
"""gemma4-e4b on the chunked-validation path emits ``proposed_label: null``
131+
for ``decision: keep``. The schema's _coerce_proposed_label normalizes
132+
None to '' so the record survives instead of dropping (which lost the
133+
whole row's worth of validator work, ~13–50s wall, in the pre-fix bench).
134+
"""
135+
payload = RawValidationDecisionsSchema.from_raw(
136+
{
137+
"decisions": [
138+
{"id": "city_3_10", "decision": "keep", "proposed_label": None, "reason": None},
139+
{"id": "name_5_10", "decision": "keep", "proposed_label": None, "reason": "kept"},
140+
]
141+
}
142+
)
143+
assert len(payload.decisions) == 2
144+
assert payload.decisions[0].proposed_label == ""
145+
assert payload.decisions[1].proposed_label == ""
146+
147+
148+
def test_raw_validation_decisions_coerces_int_proposed_label() -> None:
149+
"""Same coercer also handles int / bool drift (matches ValidationDecisionSchema)."""
150+
payload = RawValidationDecisionsSchema.from_raw(
151+
{
152+
"decisions": [
153+
{"id": "x", "decision": "keep", "proposed_label": 42, "reason": "nope"},
154+
]
155+
}
156+
)
157+
assert payload.decisions[0].proposed_label == "42"
158+
159+
129160
def test_raw_validation_decisions_payload_from_malformed_list_returns_empty() -> None:
130161
payload = RawValidationDecisionsSchema.from_raw({"decisions": ["bad-item"]})
131162
assert payload.decisions == []

0 commit comments

Comments
 (0)