Skip to content

Commit d684b93

Browse files
committed
tests: fill_prof_email 9/9 smoke + CI wiring
Regression coverage across the matrix: Hydration kind: PASS + all fields fill → rc 0 FAIL + methane sign flip → email shows FAIL on methane fail_case bundled fixture → end-to-end works (rc 0 or 2) Binding kind: biotin/desthiobiotin rows → 'streptavidin binding' subject erlotinib/gefitinib rows → 'EGFR kinase binding' subject non-binder prediction → 'desthiobiotin predicted non-binder' flagged in email Failure modes: malformed report (unparseable MAE) → rc 2 + WARNING banner no report.md at all → rc 1 (hard error) Ergonomics: --prof-name 'Dr. Chen' → 'Hi Dr. Chen,' instead of '[Prof]' Wired into smoke.yml so every PR guards the Milestone-A → email pipeline the moment the tarball lands.
1 parent a833e4e commit d684b93

2 files changed

Lines changed: 284 additions & 0 deletions

File tree

.github/workflows/smoke.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,9 @@ jobs:
184184
- name: bridge — Campaign-1 → Campaign-2 rate-law emitter
185185
run: python -u tests/bridge/test_binding_to_hill_smoke.py
186186

187+
- name: fill_prof_email auto-fill regression (9/9)
188+
run: python -u tests/fep/test_fill_prof_email_smoke.py
189+
187190
- name: fep sampled binding smoke (opt-in, ~10 min, manual)
188191
if: >
189192
github.event_name == 'workflow_dispatch' &&
Lines changed: 281 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,281 @@
1+
"""fill_prof_email.py regression tests.
2+
3+
Covers the three outcome shapes (PASS / FAIL / partial) across
4+
the two YAML kinds (hydration / binding), plus the failure
5+
modes that matter most: missing fields warn, NaN pred doesn't
6+
crash, bench name auto-detect picks the right template.
7+
"""
8+
from __future__ import annotations
9+
10+
import csv
11+
import io
12+
import subprocess
13+
import sys
14+
import tempfile
15+
from pathlib import Path
16+
17+
REPO_ROOT = Path(__file__).resolve().parents[2]
18+
19+
20+
def _write_report_with_overrides(dir_path: Path, kind: str,
21+
verdict: str,
22+
rows_table: str,
23+
mae: str = "0.42",
24+
pearson: str = "+0.993",
25+
ghmc: str = "overall mean 77%, "
26+
"worst 71%, 3 "
27+
"compounds report"):
28+
"""Synthesise a report.md with controllable fields so we can
29+
exercise filler edge cases without running the sampler."""
30+
title = {"hydration": "Hydration FEP report",
31+
"binding": "Binding FEP report"}[kind]
32+
report = f"""\
33+
# {title}{verdict}
34+
35+
- source: `/tmp/synth`
36+
- compounds: 12 ok / 12 total
37+
38+
## Gate verdict
39+
40+
- **MAE gate** (≤ 1.5 kcal/mol): PASS (MAE = {mae} kcal/mol)
41+
- **GHMC acceptance gate** (≥ 70% per compound): PASS ({ghmc})
42+
43+
## Aggregate accuracy
44+
45+
- MAE = {mae} kcal/mol
46+
- RMSE = 0.447 kcal/mol
47+
- Pearson r = {pearson}
48+
- Spearman ρ = +0.965
49+
- Kendall τ = +0.879
50+
51+
## Per-compound results
52+
53+
| name | smiles | expt | pred | ± | residual | |resid| | within σ | wall (s) | flags |
54+
|------|--------|------|------|---|----------|-----:|---------:|---------:|-------|
55+
{rows_table}
56+
"""
57+
(dir_path / "report.md").write_text(report, encoding="utf-8")
58+
59+
60+
def _run_fill(dir_path: Path, *extra_args: str) -> tuple[int, str]:
61+
r = subprocess.run(
62+
["python", "scripts/fill_prof_email.py", str(dir_path),
63+
*extra_args],
64+
cwd=REPO_ROOT,
65+
capture_output=True, text=True,
66+
)
67+
return r.returncode, r.stdout + r.stderr
68+
69+
70+
def test_hydration_pass_all_fields_fill():
71+
with tempfile.TemporaryDirectory(
72+
prefix="cellsim_fill_") as tmp:
73+
tmp = Path(tmp)
74+
_write_report_with_overrides(
75+
tmp, kind="hydration", verdict="PASS",
76+
rows_table=(
77+
"| methane | `C` | +2.00 | +1.72 | 0.28 | -0.28 | "
78+
"0.28 | ✓ | 420 | |\n"
79+
"| acetamide | `CC(=O)N` | -9.71 | -8.90 | 0.55 | "
80+
"+0.81 | 0.81 | | 560 | |"))
81+
rc, out = _run_fill(tmp)
82+
assert rc == 0, f"hydration pass should exit 0; got {rc}\n{out}"
83+
assert "Subject: Milestone A — FreeSolv FEP results" in out
84+
assert "Sign-correct on methane: PASS" in out
85+
assert "Sign-correct on acetamide: PASS" in out
86+
assert "<missing>" not in out
87+
88+
89+
def test_hydration_fail_methane_sign():
90+
"""Methane predicted negative — report FAILs sign-critical."""
91+
with tempfile.TemporaryDirectory(
92+
prefix="cellsim_fill_") as tmp:
93+
tmp = Path(tmp)
94+
_write_report_with_overrides(
95+
tmp, kind="hydration", verdict="FAIL",
96+
rows_table=(
97+
"| methane | `C` | +2.00 | -1.85 | 0.45 | -3.85 | "
98+
"3.85 | | 180 | SIGN WRONG |\n"
99+
"| acetamide | `CC(=O)N` | -9.71 | -11.20 | 0.85 | "
100+
"-1.49 | 1.49 | | 265 | |"))
101+
rc, out = _run_fill(tmp)
102+
# The fill itself succeeds even when the report says FAIL —
103+
# the email just conveys FAIL to the prof. Exit 0 = all fields
104+
# filled; the FAIL is in the email content, not the exit code.
105+
assert rc == 0, out
106+
assert "Overall verdict: FAIL" in out
107+
assert "Sign-correct on methane: FAIL" in out
108+
assert "Sign-correct on acetamide: PASS" in out
109+
110+
111+
def test_binding_streptavidin_template_selected():
112+
"""Binding report with biotin/desthiobiotin rows → binding
113+
template, subject says 'streptavidin binding'."""
114+
with tempfile.TemporaryDirectory(
115+
prefix="cellsim_fill_") as tmp:
116+
tmp = Path(tmp)
117+
_write_report_with_overrides(
118+
tmp, kind="binding", verdict="PASS",
119+
mae="0.95",
120+
rows_table=(
121+
"| biotin | `O=C1N...` | -18.30 | -17.90 | 0.50 | "
122+
"+0.40 | 0.40 | ✓ | 1200 | |\n"
123+
"| desthiobiotin | `C[C@H]1...` | -13.20 | -12.50 | "
124+
"0.60 | +0.70 | 0.70 | | 1300 | |"))
125+
rc, out = _run_fill(tmp)
126+
# rc 0 or 2 — 2 only if GHMC/other field is missing from the
127+
# synth report, which is OK for this test (we're checking the
128+
# template + subject + values, not the warning).
129+
assert rc in (0, 2), out
130+
assert "Subject: Milestone B — streptavidin binding FEP" in out, (
131+
f"expected 'streptavidin binding' subject; got:\n{out[:500]}")
132+
# MAE line — use loose whitespace match.
133+
import re as _re
134+
assert _re.search(
135+
r"MAE vs published ΔG_bind:\s+0\.95 kcal/mol", out), (
136+
f"MAE line missing / mismatched; got:\n{out[:500]}")
137+
assert "(gate <= 2.0)" in out
138+
139+
140+
def test_binding_egfr_template_selected():
141+
with tempfile.TemporaryDirectory(
142+
prefix="cellsim_fill_") as tmp:
143+
tmp = Path(tmp)
144+
_write_report_with_overrides(
145+
tmp, kind="binding", verdict="PASS",
146+
rows_table=(
147+
"| erlotinib | `COCCOc1...` | -11.86 | -11.42 | 0.44 | "
148+
"+0.44 | 0.44 | ✓ | 3456 | |\n"
149+
"| gefitinib | `COc1cc...` | -10.20 | -10.01 | 0.38 | "
150+
"+0.19 | 0.19 | ✓ | 2890 | |"))
151+
rc, out = _run_fill(tmp)
152+
assert rc == 0
153+
assert "EGFR kinase binding FEP" in out, (
154+
f"expected EGFR bench name; got:\n{out[:400]}")
155+
assert "Cheng-Prusoff" in out, (
156+
"binding template should mention the IC50-offset caveat")
157+
158+
159+
def test_binding_non_binder_flagged_in_email():
160+
"""Binding rule: any compound with ΔG_pred >= 0 fails.
161+
Email must surface the non-binder explicitly."""
162+
with tempfile.TemporaryDirectory(
163+
prefix="cellsim_fill_") as tmp:
164+
tmp = Path(tmp)
165+
_write_report_with_overrides(
166+
tmp, kind="binding", verdict="FAIL",
167+
rows_table=(
168+
"| biotin | `O=C1N...` | -18.30 | -17.90 | 0.50 | "
169+
"+0.40 | 0.40 | ✓ | 1200 | |\n"
170+
"| desthiobiotin | `C[C@H]1...` | -13.20 | +0.50 | "
171+
"0.60 | +13.70 | 13.70 | | 1300 | SIGN WRONG |"))
172+
rc, out = _run_fill(tmp)
173+
assert rc == 0
174+
assert "desthiobiotin predicted non-binder" in out, (
175+
f"email must call out the non-binder; got:\n{out[-600:]}")
176+
177+
178+
def test_missing_numbers_returns_exit_2():
179+
"""A malformed report (e.g. MAE line corrupted) leaves
180+
<missing> markers — exit 2 so the biologist doesn't silently
181+
send a placeholder email."""
182+
with tempfile.TemporaryDirectory(
183+
prefix="cellsim_fill_") as tmp:
184+
tmp = Path(tmp)
185+
# Deliberately break the MAE line.
186+
(tmp / "report.md").write_text(
187+
"# Hydration FEP report — PASS\n\n"
188+
"- compounds: 12 ok / 12 total\n\n"
189+
"## Aggregate accuracy\n\n"
190+
"- MAE: (corrupted)\n" # No '= X' → unparseable
191+
"- Pearson r = +0.993\n",
192+
encoding="utf-8")
193+
rc, out = _run_fill(tmp)
194+
assert rc == 2, (
195+
f"missing MAE should exit 2 (warn); got rc={rc}\n{out}")
196+
assert "<missing>" in out
197+
assert "WARNING" in out
198+
199+
200+
def test_no_report_md_returns_exit_1():
201+
with tempfile.TemporaryDirectory(
202+
prefix="cellsim_fill_") as tmp:
203+
rc, out = _run_fill(Path(tmp))
204+
assert rc == 1, (
205+
f"no report.md should exit 1 (hard error); got {rc}\n{out}")
206+
assert "no report.md" in out
207+
208+
209+
def test_prof_name_override():
210+
with tempfile.TemporaryDirectory(
211+
prefix="cellsim_fill_") as tmp:
212+
tmp = Path(tmp)
213+
_write_report_with_overrides(
214+
tmp, kind="hydration", verdict="PASS",
215+
rows_table=(
216+
"| methane | `C` | +2.00 | +1.72 | 0.28 | -0.28 | "
217+
"0.28 | ✓ | 420 | |\n"
218+
"| acetamide | `CC(=O)N` | -9.71 | -8.90 | 0.55 | "
219+
"+0.81 | 0.81 | | 560 | |"))
220+
rc, out = _run_fill(tmp, "--prof-name", "Dr. Chen")
221+
assert rc == 0
222+
assert "Hi Dr. Chen," in out
223+
assert "Hi [Prof]," not in out
224+
225+
226+
def test_fail_case_fixture_hydration():
227+
"""End-to-end on the committed fail_case fixture — methane
228+
sign flipped, MAE 1.67. Email reflects both failures."""
229+
with tempfile.TemporaryDirectory(
230+
prefix="cellsim_fill_") as tmp:
231+
tmp = Path(tmp)
232+
# Generate report via fep-report on the bundled fixture.
233+
r = subprocess.run(
234+
["python", "-m", "src.fep.report",
235+
"tests/fep/fixtures/fail_case",
236+
"--yaml", "benchmarks/fep/freesolv_12.yaml",
237+
"--out-dir", str(tmp),
238+
"--quiet"],
239+
cwd=REPO_ROOT, capture_output=True, text=True,
240+
)
241+
# fep-report exits 1 on fail_case (by design), but the
242+
# files are written either way.
243+
assert (tmp / "report.md").exists(), (
244+
f"fep-report didn't write report.md; stderr:\n{r.stderr}")
245+
rc, out = _run_fill(tmp)
246+
# fail_case fixture has no run.log with GHMC lines, so
247+
# ghmc_mean/worst come back <missing> → exit 2 (warn). Both
248+
# rc 0 and 2 are acceptable; what matters is the email
249+
# content has the right FAIL verdict.
250+
assert rc in (0, 2), (
251+
f"fill should succeed on fail_case; got {rc}\n{out}")
252+
assert "Sign-correct on methane: FAIL" in out
253+
254+
255+
if __name__ == "__main__":
256+
funcs = [
257+
test_hydration_pass_all_fields_fill,
258+
test_hydration_fail_methane_sign,
259+
test_binding_streptavidin_template_selected,
260+
test_binding_egfr_template_selected,
261+
test_binding_non_binder_flagged_in_email,
262+
test_missing_numbers_returns_exit_2,
263+
test_no_report_md_returns_exit_1,
264+
test_prof_name_override,
265+
test_fail_case_fixture_hydration,
266+
]
267+
fails = []
268+
for f in funcs:
269+
try:
270+
f()
271+
print(f"[PASS] {f.__name__}")
272+
except AssertionError as e:
273+
print(f"[FAIL] {f.__name__}: {e}")
274+
fails.append(f.__name__)
275+
except Exception as e:
276+
import traceback
277+
traceback.print_exc()
278+
print(f"[ERROR] {f.__name__}: {e}")
279+
fails.append(f.__name__)
280+
print(f"{len(funcs) - len(fails)}/{len(funcs)} PASS")
281+
sys.exit(0 if not fails else 1)

0 commit comments

Comments
 (0)