spacedock-dev · kentwelcome · Jun 21, 2026 · Jun 21, 2026 · Jun 21, 2026
diff --git a/packages/razorback-plugin-dab/src/razorback_plugin_dab/verify/verify_batch.py b/packages/razorback-plugin-dab/src/razorback_plugin_dab/verify/verify_batch.py
@@ -24,7 +24,15 @@ def emit_reward(
         answer = answers.get(key, "") if isinstance(answers, dict) else ""
         validate_fn = _load_validate(validators[query_id])
         if answer:
-            is_valid, reason = validate_fn(answer)
+            # Isolate per-query validator failures: a single validator raising
+            # (e.g. a validator calling .lower() on a non-string answer) must NOT
+            # abort grading for the whole dataset — that would write no reward.json
+            # and silently drop the entire dataset from the run (RewardFileNotFoundError).
+            # Score the offending query 0 with the error as the reason and continue.
+            try:
+                is_valid, reason = validate_fn(answer)
+            except Exception as exc:  # noqa: BLE001 — robustness boundary, any validator error
+                is_valid, reason = False, f"validator error: {type(exc).__name__}: {exc}"
         else:
             is_valid, reason = False, "empty answer"
         reward = 1.0 if is_valid else 0.0

diff --git a/packages/razorback-plugin-dab/tests/unit/test_verify_batch_reward_shape.py b/packages/razorback-plugin-dab/tests/unit/test_verify_batch_reward_shape.py
@@ -114,3 +114,39 @@ def test_batch_verify_does_not_mask_validator_import_errors(tmp_path: Path) -> N
     assert "missing_verifier_dependency" in result.stderr
     assert not reward_out.exists()
     assert not per_query_out.exists()
+
+
+def test_batch_verify_isolates_per_query_runtime_validator_error(tmp_path: Path) -> None:
+    """A single query's validator raising at call time (e.g. a non-string answer)
+    must score that query 0 and continue grading the rest — not abort the whole
+    dataset (which would drop it from the run as a RewardFileNotFoundError)."""
+    tests_dir = tmp_path / "tests"
+    tests_dir.mkdir()
+    shutil.copy2(Path(verify_batch_module.__file__), tests_dir / "verify_batch.py")
+    # q1 validator crashes on a non-string answer; q2 validator is well-behaved.
+    (tests_dir / "validate_q1.py").write_text(
+        "def validate(answer):\n"
+        "    return (answer.lower() == 'x', 'checked')\n"
+    )
+    (tests_dir / "validate_q2.py").write_text(
+        "def validate(answer):\n"
+        "    return (answer == 'ok', 'checked')\n"
+    )
+    answers = tmp_path / "answers.json"
+    answers.write_text(json.dumps({"q1": ["a", "b"], "q2": "ok"}))  # q1 is a LIST
+    reward_out = tmp_path / "reward.json"
+    per_query_out = tmp_path / "reward_per_query.json"
+
+    result = _run_generated_verify_batch(
+        tests_dir=tests_dir,
+        answers_path=answers,
+        reward_out=reward_out,
+        per_query_out=per_query_out,
+    )
+
+    assert result.returncode == 0, result.stderr
+    per_query = json.loads(per_query_out.read_text())
+    assert per_query["q1"]["reward"] == 0.0
+    assert "validator error" in per_query["q1"]["reason"]
+    assert per_query["q2"]["reward"] == 1.0  # the good query still graded
+    assert json.loads(reward_out.read_text()) == {"reward": 0.5}