|
| 1 | +"""fill_prof_email.py regression tests. |
| 2 | +
|
| 3 | +Covers the three outcome shapes (PASS / FAIL / partial) across |
| 4 | +the two YAML kinds (hydration / binding), plus the failure |
| 5 | +modes that matter most: missing fields warn, NaN pred doesn't |
| 6 | +crash, bench name auto-detect picks the right template. |
| 7 | +""" |
| 8 | +from __future__ import annotations |
| 9 | + |
| 10 | +import csv |
| 11 | +import io |
| 12 | +import subprocess |
| 13 | +import sys |
| 14 | +import tempfile |
| 15 | +from pathlib import Path |
| 16 | + |
| 17 | +REPO_ROOT = Path(__file__).resolve().parents[2] |
| 18 | + |
| 19 | + |
| 20 | +def _write_report_with_overrides(dir_path: Path, kind: str, |
| 21 | + verdict: str, |
| 22 | + rows_table: str, |
| 23 | + mae: str = "0.42", |
| 24 | + pearson: str = "+0.993", |
| 25 | + ghmc: str = "overall mean 77%, " |
| 26 | + "worst 71%, 3 " |
| 27 | + "compounds report"): |
| 28 | + """Synthesise a report.md with controllable fields so we can |
| 29 | + exercise filler edge cases without running the sampler.""" |
| 30 | + title = {"hydration": "Hydration FEP report", |
| 31 | + "binding": "Binding FEP report"}[kind] |
| 32 | + report = f"""\ |
| 33 | +# {title} — {verdict} |
| 34 | +
|
| 35 | +- source: `/tmp/synth` |
| 36 | +- compounds: 12 ok / 12 total |
| 37 | +
|
| 38 | +## Gate verdict |
| 39 | +
|
| 40 | +- **MAE gate** (≤ 1.5 kcal/mol): PASS (MAE = {mae} kcal/mol) |
| 41 | +- **GHMC acceptance gate** (≥ 70% per compound): PASS ({ghmc}) |
| 42 | +
|
| 43 | +## Aggregate accuracy |
| 44 | +
|
| 45 | +- MAE = {mae} kcal/mol |
| 46 | +- RMSE = 0.447 kcal/mol |
| 47 | +- Pearson r = {pearson} |
| 48 | +- Spearman ρ = +0.965 |
| 49 | +- Kendall τ = +0.879 |
| 50 | +
|
| 51 | +## Per-compound results |
| 52 | +
|
| 53 | +| name | smiles | expt | pred | ± | residual | |resid| | within σ | wall (s) | flags | |
| 54 | +|------|--------|------|------|---|----------|-----:|---------:|---------:|-------| |
| 55 | +{rows_table} |
| 56 | +""" |
| 57 | + (dir_path / "report.md").write_text(report, encoding="utf-8") |
| 58 | + |
| 59 | + |
| 60 | +def _run_fill(dir_path: Path, *extra_args: str) -> tuple[int, str]: |
| 61 | + r = subprocess.run( |
| 62 | + ["python", "scripts/fill_prof_email.py", str(dir_path), |
| 63 | + *extra_args], |
| 64 | + cwd=REPO_ROOT, |
| 65 | + capture_output=True, text=True, |
| 66 | + ) |
| 67 | + return r.returncode, r.stdout + r.stderr |
| 68 | + |
| 69 | + |
| 70 | +def test_hydration_pass_all_fields_fill(): |
| 71 | + with tempfile.TemporaryDirectory( |
| 72 | + prefix="cellsim_fill_") as tmp: |
| 73 | + tmp = Path(tmp) |
| 74 | + _write_report_with_overrides( |
| 75 | + tmp, kind="hydration", verdict="PASS", |
| 76 | + rows_table=( |
| 77 | + "| methane | `C` | +2.00 | +1.72 | 0.28 | -0.28 | " |
| 78 | + "0.28 | ✓ | 420 | |\n" |
| 79 | + "| acetamide | `CC(=O)N` | -9.71 | -8.90 | 0.55 | " |
| 80 | + "+0.81 | 0.81 | | 560 | |")) |
| 81 | + rc, out = _run_fill(tmp) |
| 82 | + assert rc == 0, f"hydration pass should exit 0; got {rc}\n{out}" |
| 83 | + assert "Subject: Milestone A — FreeSolv FEP results" in out |
| 84 | + assert "Sign-correct on methane: PASS" in out |
| 85 | + assert "Sign-correct on acetamide: PASS" in out |
| 86 | + assert "<missing>" not in out |
| 87 | + |
| 88 | + |
| 89 | +def test_hydration_fail_methane_sign(): |
| 90 | + """Methane predicted negative — report FAILs sign-critical.""" |
| 91 | + with tempfile.TemporaryDirectory( |
| 92 | + prefix="cellsim_fill_") as tmp: |
| 93 | + tmp = Path(tmp) |
| 94 | + _write_report_with_overrides( |
| 95 | + tmp, kind="hydration", verdict="FAIL", |
| 96 | + rows_table=( |
| 97 | + "| methane | `C` | +2.00 | -1.85 | 0.45 | -3.85 | " |
| 98 | + "3.85 | | 180 | SIGN WRONG |\n" |
| 99 | + "| acetamide | `CC(=O)N` | -9.71 | -11.20 | 0.85 | " |
| 100 | + "-1.49 | 1.49 | | 265 | |")) |
| 101 | + rc, out = _run_fill(tmp) |
| 102 | + # The fill itself succeeds even when the report says FAIL — |
| 103 | + # the email just conveys FAIL to the prof. Exit 0 = all fields |
| 104 | + # filled; the FAIL is in the email content, not the exit code. |
| 105 | + assert rc == 0, out |
| 106 | + assert "Overall verdict: FAIL" in out |
| 107 | + assert "Sign-correct on methane: FAIL" in out |
| 108 | + assert "Sign-correct on acetamide: PASS" in out |
| 109 | + |
| 110 | + |
| 111 | +def test_binding_streptavidin_template_selected(): |
| 112 | + """Binding report with biotin/desthiobiotin rows → binding |
| 113 | + template, subject says 'streptavidin binding'.""" |
| 114 | + with tempfile.TemporaryDirectory( |
| 115 | + prefix="cellsim_fill_") as tmp: |
| 116 | + tmp = Path(tmp) |
| 117 | + _write_report_with_overrides( |
| 118 | + tmp, kind="binding", verdict="PASS", |
| 119 | + mae="0.95", |
| 120 | + rows_table=( |
| 121 | + "| biotin | `O=C1N...` | -18.30 | -17.90 | 0.50 | " |
| 122 | + "+0.40 | 0.40 | ✓ | 1200 | |\n" |
| 123 | + "| desthiobiotin | `C[C@H]1...` | -13.20 | -12.50 | " |
| 124 | + "0.60 | +0.70 | 0.70 | | 1300 | |")) |
| 125 | + rc, out = _run_fill(tmp) |
| 126 | + # rc 0 or 2 — 2 only if GHMC/other field is missing from the |
| 127 | + # synth report, which is OK for this test (we're checking the |
| 128 | + # template + subject + values, not the warning). |
| 129 | + assert rc in (0, 2), out |
| 130 | + assert "Subject: Milestone B — streptavidin binding FEP" in out, ( |
| 131 | + f"expected 'streptavidin binding' subject; got:\n{out[:500]}") |
| 132 | + # MAE line — use loose whitespace match. |
| 133 | + import re as _re |
| 134 | + assert _re.search( |
| 135 | + r"MAE vs published ΔG_bind:\s+0\.95 kcal/mol", out), ( |
| 136 | + f"MAE line missing / mismatched; got:\n{out[:500]}") |
| 137 | + assert "(gate <= 2.0)" in out |
| 138 | + |
| 139 | + |
| 140 | +def test_binding_egfr_template_selected(): |
| 141 | + with tempfile.TemporaryDirectory( |
| 142 | + prefix="cellsim_fill_") as tmp: |
| 143 | + tmp = Path(tmp) |
| 144 | + _write_report_with_overrides( |
| 145 | + tmp, kind="binding", verdict="PASS", |
| 146 | + rows_table=( |
| 147 | + "| erlotinib | `COCCOc1...` | -11.86 | -11.42 | 0.44 | " |
| 148 | + "+0.44 | 0.44 | ✓ | 3456 | |\n" |
| 149 | + "| gefitinib | `COc1cc...` | -10.20 | -10.01 | 0.38 | " |
| 150 | + "+0.19 | 0.19 | ✓ | 2890 | |")) |
| 151 | + rc, out = _run_fill(tmp) |
| 152 | + assert rc == 0 |
| 153 | + assert "EGFR kinase binding FEP" in out, ( |
| 154 | + f"expected EGFR bench name; got:\n{out[:400]}") |
| 155 | + assert "Cheng-Prusoff" in out, ( |
| 156 | + "binding template should mention the IC50-offset caveat") |
| 157 | + |
| 158 | + |
| 159 | +def test_binding_non_binder_flagged_in_email(): |
| 160 | + """Binding rule: any compound with ΔG_pred >= 0 fails. |
| 161 | + Email must surface the non-binder explicitly.""" |
| 162 | + with tempfile.TemporaryDirectory( |
| 163 | + prefix="cellsim_fill_") as tmp: |
| 164 | + tmp = Path(tmp) |
| 165 | + _write_report_with_overrides( |
| 166 | + tmp, kind="binding", verdict="FAIL", |
| 167 | + rows_table=( |
| 168 | + "| biotin | `O=C1N...` | -18.30 | -17.90 | 0.50 | " |
| 169 | + "+0.40 | 0.40 | ✓ | 1200 | |\n" |
| 170 | + "| desthiobiotin | `C[C@H]1...` | -13.20 | +0.50 | " |
| 171 | + "0.60 | +13.70 | 13.70 | | 1300 | SIGN WRONG |")) |
| 172 | + rc, out = _run_fill(tmp) |
| 173 | + assert rc == 0 |
| 174 | + assert "desthiobiotin predicted non-binder" in out, ( |
| 175 | + f"email must call out the non-binder; got:\n{out[-600:]}") |
| 176 | + |
| 177 | + |
| 178 | +def test_missing_numbers_returns_exit_2(): |
| 179 | + """A malformed report (e.g. MAE line corrupted) leaves |
| 180 | + <missing> markers — exit 2 so the biologist doesn't silently |
| 181 | + send a placeholder email.""" |
| 182 | + with tempfile.TemporaryDirectory( |
| 183 | + prefix="cellsim_fill_") as tmp: |
| 184 | + tmp = Path(tmp) |
| 185 | + # Deliberately break the MAE line. |
| 186 | + (tmp / "report.md").write_text( |
| 187 | + "# Hydration FEP report — PASS\n\n" |
| 188 | + "- compounds: 12 ok / 12 total\n\n" |
| 189 | + "## Aggregate accuracy\n\n" |
| 190 | + "- MAE: (corrupted)\n" # No '= X' → unparseable |
| 191 | + "- Pearson r = +0.993\n", |
| 192 | + encoding="utf-8") |
| 193 | + rc, out = _run_fill(tmp) |
| 194 | + assert rc == 2, ( |
| 195 | + f"missing MAE should exit 2 (warn); got rc={rc}\n{out}") |
| 196 | + assert "<missing>" in out |
| 197 | + assert "WARNING" in out |
| 198 | + |
| 199 | + |
| 200 | +def test_no_report_md_returns_exit_1(): |
| 201 | + with tempfile.TemporaryDirectory( |
| 202 | + prefix="cellsim_fill_") as tmp: |
| 203 | + rc, out = _run_fill(Path(tmp)) |
| 204 | + assert rc == 1, ( |
| 205 | + f"no report.md should exit 1 (hard error); got {rc}\n{out}") |
| 206 | + assert "no report.md" in out |
| 207 | + |
| 208 | + |
| 209 | +def test_prof_name_override(): |
| 210 | + with tempfile.TemporaryDirectory( |
| 211 | + prefix="cellsim_fill_") as tmp: |
| 212 | + tmp = Path(tmp) |
| 213 | + _write_report_with_overrides( |
| 214 | + tmp, kind="hydration", verdict="PASS", |
| 215 | + rows_table=( |
| 216 | + "| methane | `C` | +2.00 | +1.72 | 0.28 | -0.28 | " |
| 217 | + "0.28 | ✓ | 420 | |\n" |
| 218 | + "| acetamide | `CC(=O)N` | -9.71 | -8.90 | 0.55 | " |
| 219 | + "+0.81 | 0.81 | | 560 | |")) |
| 220 | + rc, out = _run_fill(tmp, "--prof-name", "Dr. Chen") |
| 221 | + assert rc == 0 |
| 222 | + assert "Hi Dr. Chen," in out |
| 223 | + assert "Hi [Prof]," not in out |
| 224 | + |
| 225 | + |
| 226 | +def test_fail_case_fixture_hydration(): |
| 227 | + """End-to-end on the committed fail_case fixture — methane |
| 228 | + sign flipped, MAE 1.67. Email reflects both failures.""" |
| 229 | + with tempfile.TemporaryDirectory( |
| 230 | + prefix="cellsim_fill_") as tmp: |
| 231 | + tmp = Path(tmp) |
| 232 | + # Generate report via fep-report on the bundled fixture. |
| 233 | + r = subprocess.run( |
| 234 | + ["python", "-m", "src.fep.report", |
| 235 | + "tests/fep/fixtures/fail_case", |
| 236 | + "--yaml", "benchmarks/fep/freesolv_12.yaml", |
| 237 | + "--out-dir", str(tmp), |
| 238 | + "--quiet"], |
| 239 | + cwd=REPO_ROOT, capture_output=True, text=True, |
| 240 | + ) |
| 241 | + # fep-report exits 1 on fail_case (by design), but the |
| 242 | + # files are written either way. |
| 243 | + assert (tmp / "report.md").exists(), ( |
| 244 | + f"fep-report didn't write report.md; stderr:\n{r.stderr}") |
| 245 | + rc, out = _run_fill(tmp) |
| 246 | + # fail_case fixture has no run.log with GHMC lines, so |
| 247 | + # ghmc_mean/worst come back <missing> → exit 2 (warn). Both |
| 248 | + # rc 0 and 2 are acceptable; what matters is the email |
| 249 | + # content has the right FAIL verdict. |
| 250 | + assert rc in (0, 2), ( |
| 251 | + f"fill should succeed on fail_case; got {rc}\n{out}") |
| 252 | + assert "Sign-correct on methane: FAIL" in out |
| 253 | + |
| 254 | + |
| 255 | +if __name__ == "__main__": |
| 256 | + funcs = [ |
| 257 | + test_hydration_pass_all_fields_fill, |
| 258 | + test_hydration_fail_methane_sign, |
| 259 | + test_binding_streptavidin_template_selected, |
| 260 | + test_binding_egfr_template_selected, |
| 261 | + test_binding_non_binder_flagged_in_email, |
| 262 | + test_missing_numbers_returns_exit_2, |
| 263 | + test_no_report_md_returns_exit_1, |
| 264 | + test_prof_name_override, |
| 265 | + test_fail_case_fixture_hydration, |
| 266 | + ] |
| 267 | + fails = [] |
| 268 | + for f in funcs: |
| 269 | + try: |
| 270 | + f() |
| 271 | + print(f"[PASS] {f.__name__}") |
| 272 | + except AssertionError as e: |
| 273 | + print(f"[FAIL] {f.__name__}: {e}") |
| 274 | + fails.append(f.__name__) |
| 275 | + except Exception as e: |
| 276 | + import traceback |
| 277 | + traceback.print_exc() |
| 278 | + print(f"[ERROR] {f.__name__}: {e}") |
| 279 | + fails.append(f.__name__) |
| 280 | + print(f"{len(funcs) - len(fails)}/{len(funcs)} PASS") |
| 281 | + sys.exit(0 if not fails else 1) |
0 commit comments