Skip to content

Commit 1fea828

Browse files
committed
fep/binding: sampling wall-time preview after scaffold runs
Add estimate_sampling_wall_hours + format_wall_estimate_block so any scaffold-only fep-binding dg run automatically prints the projected Phase-2 wall time across CPU, Metal M5 Max, and CUDA H100. Biologist workflow: run scaffold first (sub-minute), read the preview, then only kick off --sample if the projected cost is acceptable. Model: total_steps = 2 × n_windows × (equil + prod). Effective throughput scales inversely with n_atoms; anchors calibrated against measured FreeSolv M5 Max wall (~13 k effective steps/sec on ~2 k atoms). 25% overhead surcharge for per-λ minimise + MBAR eval. Accuracy: ±2-3×, reported as such in the output — this is planning data, not a guarantee. Formatter auto-flags "CPU-only is not viable" when CPU projection exceeds 48 h (catches "I'll just try --sample overnight" mistakes where the user wanted GPU but forgot the flag). 10/10 regression covers: small/medium/large system scaling, formatter gate, minute/hour/day unit choice, inverse-atoms and linear-steps scaling invariants, overhead surcharge sanity check.
1 parent e3289e5 commit 1fea828

3 files changed

Lines changed: 292 additions & 0 deletions

File tree

.github/workflows/smoke.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,9 @@ jobs:
163163
- name: fep MBAR-error translator (biologist-actionable failure messages)
164164
run: python -u tests/fep/test_mbar_error_translation_smoke.py
165165

166+
- name: fep wall-time estimator (GPU-cost preview anchors stay honest)
167+
run: python -u tests/fep/test_wall_estimator_smoke.py
168+
166169
- name: fep end-to-end smoke (Layer 1.3, methane hydration ΔG pipeline)
167170
run: python -u tests/fep/test_hydration_dg_smoke.py
168171

src/fep/binding.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,91 @@ def _module_logger():
318318
return logging.getLogger(__name__)
319319

320320

321+
# Platform throughput anchors for the wall-time estimator.
322+
# Units: steps × atoms per second, measured empirically and used
323+
# as throughput[platform] / n_atoms → effective steps/sec for a
324+
# system of size n_atoms. The anchors assume a 2 fs timestep with
325+
# GHMC + HMR constraints (standard CellSim profile); large atomic
326+
# systems drop linearly in steps/sec with atom count.
327+
#
328+
# Anchors calibrated against:
329+
# - CPU: 24-atom methane in vacuum, single-core measured at
330+
# ~80 k steps/sec → 2 M steps·atom/sec
331+
# - Metal: ~2 k-atom FreeSolv M5 Max run, ~13 k steps/sec
332+
# effective on the full Phase-2 sampler → ~26 M
333+
# - CUDA: ~150 M is the paper-quality anchor for an H100
334+
# running dhfr-class 23k-atom systems at ~6 k steps/sec
335+
#
336+
# These are anchors with ±2-3× error bars. Reported as such in
337+
# the CLI output so biologists don't treat them as precise.
338+
_PLATFORM_STEP_ATOMS_PER_SECOND = {
339+
"cpu": 2_000_000,
340+
"metal_m5max": 26_000_000,
341+
"cuda_h100": 150_000_000,
342+
}
343+
344+
# Wall-time amortisation for per-λ overheads (minimisation,
345+
# context switches, MBAR eval). Empirically ~15-30% of pure MD
346+
# wall, so add a 25% surcharge.
347+
_OVERHEAD_SURCHARGE = 1.25
348+
349+
350+
def estimate_sampling_wall_hours(
351+
n_atoms: int,
352+
n_windows: int,
353+
n_production_steps: int,
354+
n_equilibration_steps: int,
355+
n_legs: int = 2,
356+
) -> dict[str, float]:
357+
"""Rough estimate of sample_alchemical_windows wall time across
358+
reference platforms. Returns {'cpu': h, 'metal_m5max': h,
359+
'cuda_h100': h}. Accuracy: ±2-3× — this is a sanity-check
360+
preview, not a guarantee.
361+
362+
Model: total_steps = n_legs × n_windows × (n_equil + n_prod).
363+
Effective throughput on a given platform scales inversely with
364+
n_atoms; multiply by a 25% overhead surcharge for per-λ
365+
minimise + MBAR eval.
366+
"""
367+
total_steps = n_legs * n_windows * (
368+
n_equilibration_steps + n_production_steps)
369+
out: dict[str, float] = {}
370+
for plat, const in _PLATFORM_STEP_ATOMS_PER_SECOND.items():
371+
steps_per_sec = const / max(n_atoms, 1)
372+
seconds = total_steps / max(steps_per_sec, 1e-6)
373+
seconds *= _OVERHEAD_SURCHARGE
374+
out[plat] = seconds / 3600.0
375+
return out
376+
377+
378+
def format_wall_estimate_block(
379+
est: dict[str, float], *, gate_hours: float = 48.0,
380+
) -> str:
381+
"""Pretty-print the wall estimate as a biologist-readable block.
382+
Flags anything over `gate_hours` on CPU as "CPU-only is not
383+
viable" — a 48-hour run on CPU usually means the user forgot
384+
to flag GPU, not that they want a CPU run of that length."""
385+
def _fmt(h: float) -> str:
386+
if h < 1.0:
387+
return f"{h * 60:.0f} min"
388+
if h < 48.0:
389+
return f"{h:.1f} h"
390+
return f"{h / 24:.1f} d"
391+
392+
lines = [
393+
" ~ Estimated sampling wall time "
394+
"(±2-3× accuracy; for planning, not guarantees):",
395+
f" CPU : {_fmt(est['cpu'])}",
396+
f" Metal (M5 Max) : {_fmt(est['metal_m5max'])}",
397+
f" CUDA H100 : {_fmt(est['cuda_h100'])}",
398+
]
399+
if est["cpu"] > gate_hours:
400+
lines.append(
401+
" ! CPU-only is not viable for this config "
402+
"(> 48 h). Use Metal or CUDA.")
403+
return "\n".join(lines)
404+
405+
321406
def _find_ca_indices_near(positions_nm, center_nm, radius_nm,
322407
ca_candidate_indices):
323408
"""Return the subset of Cα indices within `radius_nm` of the
@@ -1220,6 +1305,25 @@ def main(argv=None) -> int:
12201305
print(_json.dumps(asdict(r), indent=2, default=str))
12211306
else:
12221307
print(r.summary())
1308+
# Biologist preview: when a scaffold-only run completes
1309+
# (no --sample), print how long a sampled run of this config
1310+
# would take on the reference platforms. Prevents "I'll just
1311+
# try --sample overnight" surprises that turn into 5-day CPU
1312+
# burns.
1313+
if (not getattr(args, "sample", False)
1314+
and args.cmd == "dg"
1315+
and isinstance(r, BindingDGResult)
1316+
and r.ok
1317+
and r.n_total_atoms_complex):
1318+
est = estimate_sampling_wall_hours(
1319+
n_atoms=r.n_total_atoms_complex,
1320+
n_windows=args.n_windows,
1321+
n_production_steps=25000,
1322+
n_equilibration_steps=2500)
1323+
print()
1324+
print(" (reference: 11 × 25 000 prod + 2 500 equil "
1325+
"× 2 legs — Milestone-A-tier sampling)")
1326+
print(format_wall_estimate_block(est))
12231327
return 0 if r.ok else 1
12241328

12251329

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
"""Regression tests for src.fep.binding.estimate_sampling_wall_hours.
2+
3+
Purpose: prevent wasted GPU runs. Biologists about to spend 6-48
4+
hours of GPU time on a binding FEP benefit from a cost preview;
5+
these tests pin the estimator so the preview stays roughly honest
6+
(±2-3× is the advertised accuracy).
7+
8+
Anchors:
9+
- CPU : 2 M steps·atom/sec
10+
- Metal : 26 M steps·atom/sec
11+
- CUDA : 150 M steps·atom/sec
12+
- overhead surcharge : 1.25×
13+
14+
Independent re-derivation for a streptavidin-class 30k-atom
15+
system with 11 × (25 000 prod + 2 500 equil) × 2 legs:
16+
total_steps = 2 × 11 × 27 500 = 605 000
17+
CPU = 605 000 × 30 000 / 2e6 × 1.25 / 3600 ≈ 3.15 h
18+
Metal= same / (26/2) ≈ 0.24 h
19+
CUDA = same / (150/2) ≈ 0.042 h ≈ 2.5 min
20+
"""
21+
from __future__ import annotations
22+
23+
import sys
24+
from pathlib import Path
25+
26+
REPO = Path(__file__).resolve().parents[2]
27+
sys.path.insert(0, str(REPO))
28+
29+
from src.fep.binding import (
30+
estimate_sampling_wall_hours,
31+
format_wall_estimate_block,
32+
)
33+
34+
35+
def test_methane_smoke_runs_in_minutes_on_metal():
36+
"""FreeSolv-class small system — should be minutes on Metal."""
37+
est = estimate_sampling_wall_hours(
38+
n_atoms=2000, n_windows=11,
39+
n_production_steps=25000,
40+
n_equilibration_steps=2500)
41+
assert est["metal_m5max"] < 0.1, est # < 6 min
42+
assert est["cuda_h100"] < 0.02, est # < 72 s
43+
# CPU on small system should also finish overnight.
44+
assert est["cpu"] < 2.0, est # < 2 h
45+
46+
47+
def test_streptavidin_class_30k_atoms_reasonable():
48+
est = estimate_sampling_wall_hours(
49+
n_atoms=30000, n_windows=11,
50+
n_production_steps=25000,
51+
n_equilibration_steps=2500)
52+
# Sanity ratios: CUDA ~ 75× faster than CPU, Metal ~ 13× faster.
53+
cpu_h = est["cpu"]
54+
metal_h = est["metal_m5max"]
55+
cuda_h = est["cuda_h100"]
56+
assert 1.0 < cpu_h < 10.0, est # 3 h ballpark
57+
assert 0.1 < metal_h < 1.0, est # ~15 min ballpark
58+
assert cuda_h < 0.1, est # < 6 min
59+
# Platform ordering.
60+
assert cuda_h < metal_h < cpu_h, est
61+
62+
63+
def test_egfr_class_40k_atoms_cpu_flagged_as_infeasible():
64+
"""EGFR kinase series: ~40k atoms, 6 compounds → CPU-only is
65+
not a viable plan (days). The formatter must warn."""
66+
est = estimate_sampling_wall_hours(
67+
n_atoms=40000, n_windows=11,
68+
# Paper-grade sampling: 50 ps per window.
69+
n_production_steps=50000,
70+
n_equilibration_steps=5000)
71+
assert est["cpu"] > 8.0, est
72+
block = format_wall_estimate_block(est, gate_hours=48.0)
73+
# CPU line must appear. Metal/CUDA must appear.
74+
assert "CPU" in block
75+
assert "Metal" in block
76+
assert "CUDA" in block
77+
78+
79+
def test_formatter_flags_cpu_infeasible_over_gate():
80+
"""Force a huge config so CPU > 48h — the warning must fire."""
81+
est = estimate_sampling_wall_hours(
82+
n_atoms=100000, n_windows=21,
83+
n_production_steps=100000,
84+
n_equilibration_steps=10000)
85+
block = format_wall_estimate_block(est, gate_hours=48.0)
86+
assert "CPU-only is not viable" in block, block
87+
88+
89+
def test_formatter_does_not_flag_short_cpu_runs():
90+
est = estimate_sampling_wall_hours(
91+
n_atoms=1000, n_windows=5,
92+
n_production_steps=1000,
93+
n_equilibration_steps=500)
94+
block = format_wall_estimate_block(est, gate_hours=48.0)
95+
assert "CPU-only is not viable" not in block, block
96+
97+
98+
def test_formatter_uses_minutes_for_short_runs():
99+
est = estimate_sampling_wall_hours(
100+
n_atoms=500, n_windows=3,
101+
n_production_steps=500,
102+
n_equilibration_steps=100)
103+
block = format_wall_estimate_block(est)
104+
# All three platforms should report in "min" for this tiny config.
105+
assert "min" in block, block
106+
107+
108+
def test_formatter_uses_days_for_very_long_runs():
109+
est = estimate_sampling_wall_hours(
110+
n_atoms=200000, n_windows=21,
111+
n_production_steps=500000,
112+
n_equilibration_steps=50000)
113+
block = format_wall_estimate_block(est)
114+
# CPU line should render in days, not hours.
115+
assert " d" in block, block
116+
117+
118+
def test_scaling_inversely_with_atoms():
119+
"""Double the atoms → roughly double the wall."""
120+
small = estimate_sampling_wall_hours(
121+
n_atoms=5000, n_windows=11,
122+
n_production_steps=10000, n_equilibration_steps=1000)
123+
big = estimate_sampling_wall_hours(
124+
n_atoms=10000, n_windows=11,
125+
n_production_steps=10000, n_equilibration_steps=1000)
126+
for plat in ("cpu", "metal_m5max", "cuda_h100"):
127+
ratio = big[plat] / small[plat]
128+
assert 1.9 < ratio < 2.1, (plat, ratio, small[plat], big[plat])
129+
130+
131+
def test_scaling_linearly_with_steps():
132+
"""Double the prod steps → roughly double the wall."""
133+
short = estimate_sampling_wall_hours(
134+
n_atoms=5000, n_windows=11,
135+
n_production_steps=10000, n_equilibration_steps=1000)
136+
long = estimate_sampling_wall_hours(
137+
n_atoms=5000, n_windows=11,
138+
n_production_steps=21000, n_equilibration_steps=1000)
139+
# (21000+1000) / (10000+1000) = 22/11 = 2.0
140+
for plat in ("cpu", "metal_m5max", "cuda_h100"):
141+
ratio = long[plat] / short[plat]
142+
assert 1.9 < ratio < 2.1, (plat, ratio)
143+
144+
145+
def test_overhead_surcharge_in_result():
146+
"""Sanity: the overhead surcharge actually raises the estimate
147+
above the pure MD time. A 1000-atom, 100-step run on CUDA at
148+
150M/1000 steps/sec = 150k steps/sec would be 100/150000 =
149+
6.67e-4 s pure. With 1.25× surcharge + n_legs=2: 1.67e-3 s."""
150+
est = estimate_sampling_wall_hours(
151+
n_atoms=1000, n_windows=1,
152+
n_production_steps=100, n_equilibration_steps=0)
153+
cuda_seconds = est["cuda_h100"] * 3600
154+
# 2 legs × 100 steps × 1.25 overhead / (150M / 1k) = 1.67e-3 s
155+
assert 1.5e-3 < cuda_seconds < 2.0e-3, cuda_seconds
156+
157+
158+
if __name__ == "__main__":
159+
funcs = [
160+
test_methane_smoke_runs_in_minutes_on_metal,
161+
test_streptavidin_class_30k_atoms_reasonable,
162+
test_egfr_class_40k_atoms_cpu_flagged_as_infeasible,
163+
test_formatter_flags_cpu_infeasible_over_gate,
164+
test_formatter_does_not_flag_short_cpu_runs,
165+
test_formatter_uses_minutes_for_short_runs,
166+
test_formatter_uses_days_for_very_long_runs,
167+
test_scaling_inversely_with_atoms,
168+
test_scaling_linearly_with_steps,
169+
test_overhead_surcharge_in_result,
170+
]
171+
fails = []
172+
for f in funcs:
173+
try:
174+
f()
175+
print(f"[PASS] {f.__name__}")
176+
except AssertionError as e:
177+
print(f"[FAIL] {f.__name__}: {e}")
178+
fails.append(f.__name__)
179+
except Exception as e:
180+
import traceback
181+
traceback.print_exc()
182+
print(f"[ERROR] {f.__name__}: {e}")
183+
fails.append(f.__name__)
184+
print(f"{len(funcs) - len(fails)}/{len(funcs)} PASS")
185+
sys.exit(0 if not fails else 1)

0 commit comments

Comments
 (0)