Skip to content

Commit d9177f6

Browse files
committed
fep/sampling: biologist-actionable MBAR-failure messages
When MBAR raises internally (singular matrix, column-sum-zero, NaN, convergence stall) the exception text is meaningful to stat-mech experts and opaque to wet-lab biologists. Translate the common failure modes to physical causes ("adjacent λ-windows don't overlap") and concrete parameter bumps ("try --n-windows 21 --n-production-steps 25000; GPU recommended"), so someone who just burned 6 hours of GPU time knows what knob to turn. - sampling.py: _biologist_reason_for_mbar_error() catches column- sum / w_nk / overlap / singular / convergence / NaN / bounds phrases and emits bumped-parameter hints, capped at 31 windows and 100 k prod steps so the advice is realistic, not scary. - sample_alchemical_windows now wraps estimate_free_energy and populates result.reason via the translator (previously the raw pymbar exception bubbled up as "binding sampling failed: ..."). - Test: 10/10 regression over column-sum, singular, NaN, bounds, fallback, and the suggestion-cap boundaries. - CI: wired into smoke.yml next to the existing sampling smoke. Happy path: existing sampling smoke still PASSes at smoke params.
1 parent 766ef5b commit d9177f6

3 files changed

Lines changed: 199 additions & 2 deletions

File tree

.github/workflows/smoke.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,9 @@ jobs:
160160
- name: fep sampling smoke (Layer 1.3, methane vacuum Langevin+MBAR)
161161
run: python -u tests/fep/test_sampling_smoke.py
162162

163+
- name: fep MBAR-error translator (biologist-actionable failure messages)
164+
run: python -u tests/fep/test_mbar_error_translation_smoke.py
165+
163166
- name: fep end-to-end smoke (Layer 1.3, methane hydration ΔG pipeline)
164167
run: python -u tests/fep/test_hydration_dg_smoke.py
165168

src/fep/sampling.py

Lines changed: 67 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,15 @@ def _apply_alchemical_params(ctx, comp_state):
285285
result.ghmc_acceptance.append(0.0)
286286
del _eval_ctx, _eval_int
287287

288-
dG_kT, ddG_kT = estimate_free_energy(u_kn, N_k)
288+
try:
289+
dG_kT, ddG_kT = estimate_free_energy(u_kn, N_k)
290+
except Exception as e:
291+
result.reason = _biologist_reason_for_mbar_error(
292+
e, n_windows=n_windows,
293+
n_production_steps=n_production_steps)
294+
result.n_samples_per_window = n_samples_per
295+
result.wall_seconds = time.time() - t0
296+
return result
289297
kT = 0.0019872041 * temperature_K
290298
result.ok = True
291299
result.n_samples_per_window = n_samples_per
@@ -295,8 +303,65 @@ def _apply_alchemical_params(ctx, comp_state):
295303
return result
296304

297305

306+
def _biologist_reason_for_mbar_error(
307+
exc: BaseException,
308+
*,
309+
n_windows: int,
310+
n_production_steps: int,
311+
) -> str:
312+
"""Translate pymbar/MBAR internal exceptions into biologist-
313+
actionable guidance. The raw pymbar text says things like
314+
"Column sum W_nk = 0" which is not meaningful to anyone who
315+
hasn't read the MBAR paper. The translation names the physical
316+
cause (adjacent λ-windows don't overlap, sampling too short)
317+
and suggests exact parameter bumps.
318+
"""
319+
msg = str(exc).lower()
320+
# Suggested bumps: 2× windows, 3× prod steps, capped so we
321+
# don't print absurdly large numbers that scare off users on
322+
# laptops. These are the Milestone-A-tier numbers that
323+
# reliably converge streptavidin-class binders on GPU.
324+
suggest_windows = min(max(n_windows * 2, 21), 31)
325+
suggest_prod = min(max(n_production_steps * 3, 25000), 100000)
326+
hint = (
327+
f" → try --n-windows {suggest_windows} "
328+
f"--n-production-steps {suggest_prod} "
329+
"(Milestone-A-tier sampling; GPU recommended)")
330+
331+
# Pattern-match the distinctive pymbar failure strings.
332+
if ("column sum" in msg) or ("w_nk" in msg) or (
333+
"overlap" in msg):
334+
return (
335+
"adjacent λ-windows don't overlap — sampling was "
336+
"insufficient for MBAR to reweight between states. "
337+
"This is common for tight binders (|ΔG| > 10 kcal/mol) "
338+
"at smoke-tier parameters." + hint)
339+
if ("singular" in msg) or ("convergence" in msg):
340+
return (
341+
"MBAR self-consistent iteration did not converge — "
342+
"typically caused by states with near-identical "
343+
"reduced potentials (redundant λ schedule) or NaN/Inf "
344+
"samples." + hint)
345+
if ("bounds" in msg) or ("negative uncertainty" in msg):
346+
return (
347+
"MBAR returned non-physical uncertainty — too few "
348+
"independent samples per window." + hint)
349+
if ("nan" in msg) or ("inf" in msg):
350+
return (
351+
"reduced-potential matrix contains NaN/Inf — the "
352+
"integrator blew up on at least one window. Try a "
353+
"smaller timestep (--timestep-fs 0.5) or verify the "
354+
"input structure has no steric clashes." + hint)
355+
# Fallback: preserve the original text but frame it clearly.
356+
return (
357+
f"MBAR failed with: {str(exc)[:200]}." + hint)
358+
359+
298360
def estimate_free_energy(u_kn, N_k) -> tuple[float, float]:
299-
"""Run MBAR and return (ΔG_λ=0→λ=1, uncertainty) in kT."""
361+
"""Run MBAR and return (ΔG_λ=0→λ=1, uncertainty) in kT.
362+
363+
Raises on convergence / overlap failure. Callers should wrap
364+
with _biologist_reason_for_mbar_error to translate."""
300365
from pymbar import MBAR
301366

302367
mbar = MBAR(u_kn, N_k)
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
"""Regression tests for the biologist-readable MBAR-failure
2+
translator in src/fep/sampling.py.
3+
4+
The underlying problem: pymbar's built-in error strings ("Column
5+
sum W_nk = 0 for state 3 and 8 other columns") are meaningful to
6+
statistical mechanics experts and meaningless to a wet-lab biologist
7+
running `cellsim fep-binding` for the first time. After a 6-hour
8+
GPU burn, the biologist needs to know (a) *why* it failed and (b)
9+
*what parameter to change* — not what a reweighting matrix is.
10+
11+
These tests pin the translator behaviour so we never regress to
12+
the raw pymbar string."""
13+
from __future__ import annotations
14+
15+
import sys
16+
from pathlib import Path
17+
18+
REPO = Path(__file__).resolve().parents[2]
19+
sys.path.insert(0, str(REPO))
20+
21+
from src.fep.sampling import _biologist_reason_for_mbar_error
22+
23+
24+
def _msg(exc_text: str, **kwargs) -> str:
25+
defaults = dict(n_windows=11, n_production_steps=1500)
26+
defaults.update(kwargs)
27+
return _biologist_reason_for_mbar_error(
28+
Exception(exc_text), **defaults)
29+
30+
31+
def test_column_sum_triggers_overlap_message():
32+
out = _msg(
33+
"Column sum W_nk = 0 for state 0 and 4 other columns; "
34+
"check that the states have sufficient overlap")
35+
assert "λ-windows don't overlap" in out, out
36+
assert "tight binders" in out, out
37+
assert "--n-windows" in out, out
38+
assert "--n-production-steps" in out, out
39+
40+
41+
def test_w_nk_phrase_triggers_overlap_message():
42+
# Some pymbar versions lowercase the matrix name.
43+
out = _msg("w_nk matrix has columns that do not overlap")
44+
assert "λ-windows don't overlap" in out
45+
46+
47+
def test_singular_matrix_triggers_convergence_message():
48+
out = _msg("Singular matrix encountered in MBAR iteration")
49+
assert "did not converge" in out or "convergence" in out.lower(), out
50+
assert "--n-windows" in out
51+
52+
53+
def test_nan_reduced_potentials_mentions_integrator():
54+
out = _msg("Reduced potential contains NaN values")
55+
assert "NaN" in out or "integrator" in out.lower(), out
56+
assert "timestep" in out.lower(), out
57+
58+
59+
def test_bounds_error_mentions_samples():
60+
out = _msg("BoundsError: squared uncertainty is negative")
61+
assert "uncertainty" in out.lower()
62+
assert "samples per window" in out.lower() or (
63+
"samples" in out.lower())
64+
65+
66+
def test_fallback_preserves_original_text():
67+
out = _msg("Mystery error the translator hasn't seen")
68+
# Fallback must include a truncated copy of the original so
69+
# debug info isn't lost, but also still suggest bumped params.
70+
assert "Mystery error" in out
71+
assert "--n-windows" in out
72+
73+
74+
def test_suggested_windows_bumped_at_least_21():
75+
# Tight-binder-tier floor: 21 windows (Milestone-A practice).
76+
out = _msg("column sum 0", n_windows=11, n_production_steps=1500)
77+
assert "--n-windows 22" in out or "--n-windows 21" in out, out
78+
assert "--n-production-steps" in out
79+
80+
81+
def test_suggested_prod_capped_at_100k():
82+
# Don't print scare numbers — 100k is a reasonable ceiling
83+
# for the suggestion text regardless of user input.
84+
out = _msg("column sum 0", n_windows=11,
85+
n_production_steps=50000)
86+
# Bump would be 3× = 150000, but capped at 100000.
87+
assert "--n-production-steps 100000" in out, out
88+
89+
90+
def test_suggested_windows_capped_at_31():
91+
out = _msg("column sum 0", n_windows=21,
92+
n_production_steps=1500)
93+
# 2× = 42, capped at 31.
94+
assert "--n-windows 31" in out, out
95+
96+
97+
def test_gpu_recommended_for_tight_binders():
98+
out = _msg("Column sum W_nk = 0")
99+
assert "GPU" in out
100+
101+
102+
if __name__ == "__main__":
103+
funcs = [
104+
test_column_sum_triggers_overlap_message,
105+
test_w_nk_phrase_triggers_overlap_message,
106+
test_singular_matrix_triggers_convergence_message,
107+
test_nan_reduced_potentials_mentions_integrator,
108+
test_bounds_error_mentions_samples,
109+
test_fallback_preserves_original_text,
110+
test_suggested_windows_bumped_at_least_21,
111+
test_suggested_prod_capped_at_100k,
112+
test_suggested_windows_capped_at_31,
113+
test_gpu_recommended_for_tight_binders,
114+
]
115+
fails = []
116+
for f in funcs:
117+
try:
118+
f()
119+
print(f"[PASS] {f.__name__}")
120+
except AssertionError as e:
121+
print(f"[FAIL] {f.__name__}: {e}")
122+
fails.append(f.__name__)
123+
except Exception as e:
124+
import traceback
125+
traceback.print_exc()
126+
print(f"[ERROR] {f.__name__}: {e}")
127+
fails.append(f.__name__)
128+
print(f"{len(funcs) - len(fails)}/{len(funcs)} PASS")
129+
sys.exit(0 if not fails else 1)

0 commit comments

Comments
 (0)