Skip to content

Commit e46eb49

Browse files
MaxGhenisclaude
andcommitted
Test zero_rate_per_column breakdown is populated on every stage result
Adds coverage for the per-column zero-rate field added earlier. Verifies: - every target column is present - real / synth / abs_diff entries are shaped and bounded correctly - abs_diff is consistent with the real/synth difference - scalar zero_rate_mae is in the same ballpark as per-column diffs All 8 bakeoff tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 31bae2a commit e46eb49

1 file changed

Lines changed: 27 additions & 0 deletions

File tree

tests/bakeoff/test_scale_up.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,3 +161,30 @@ def test_incremental_jsonl_persists_each_method(
161161
for line in lines:
162162
d = _json.loads(line)
163163
assert {"method", "stage", "coverage", "fit_wall_seconds"} <= set(d)
164+
165+
166+
def test_zero_rate_per_column_populated(small_config: ScaleUpStageConfig) -> None:
167+
"""Per-column zero-rate breakdown is recorded for every target column."""
168+
runner = ScaleUpRunner(small_config)
169+
results = runner.run()
170+
assert len(results) == 1
171+
r = results[0]
172+
assert r.zero_rate_per_column, "Expected non-empty zero_rate_per_column"
173+
for col, entry in r.zero_rate_per_column.items():
174+
assert set(entry) == {"real", "synth", "abs_diff"}
175+
assert 0.0 <= entry["real"] <= 1.0
176+
assert 0.0 <= entry["synth"] <= 1.0
177+
assert entry["abs_diff"] >= 0.0
178+
# abs_diff should be consistent with real/synth values.
179+
assert abs(entry["abs_diff"] - abs(entry["real"] - entry["synth"])) < 1e-9
180+
# Confirm all target columns are covered.
181+
covered = set(r.zero_rate_per_column)
182+
assert set(small_config.target_cols) <= covered
183+
# And that the scalar MAE is close to the mean of abs_diff over target cols.
184+
target_diffs = [
185+
r.zero_rate_per_column[c]["abs_diff"] for c in small_config.target_cols
186+
]
187+
# MAE is averaged over all shared columns (conditioning + target), so this
188+
# is only a rough consistency check: the per-target mean should be
189+
# within the scalar MAE's ballpark.
190+
assert min(target_diffs) <= r.zero_rate_mae + 1e-9

0 commit comments

Comments
 (0)