|
| 1 | +"""Regression tests for src.fep.binding.estimate_sampling_wall_hours. |
| 2 | +
|
| 3 | +Purpose: prevent wasted GPU runs. Biologists about to spend 6-48 |
| 4 | +hours of GPU time on a binding FEP benefit from a cost preview; |
| 5 | +these tests pin the estimator so the preview stays roughly honest |
| 6 | +(±2-3× is the advertised accuracy). |
| 7 | +
|
| 8 | +Anchors: |
| 9 | + - CPU : 2 M steps·atom/sec |
| 10 | + - Metal : 26 M steps·atom/sec |
| 11 | + - CUDA : 150 M steps·atom/sec |
| 12 | + - overhead surcharge : 1.25× |
| 13 | +
|
| 14 | +Independent re-derivation for a streptavidin-class 30k-atom |
| 15 | +system with 11 × (25 000 prod + 2 500 equil) × 2 legs: |
| 16 | + total_steps = 2 × 11 × 27 500 = 605 000 |
| 17 | + CPU = 605 000 × 30 000 / 2e6 × 1.25 / 3600 ≈ 3.15 h |
| 18 | + Metal= same / (26/2) ≈ 0.24 h |
| 19 | + CUDA = same / (150/2) ≈ 0.042 h ≈ 2.5 min |
| 20 | +""" |
| 21 | +from __future__ import annotations |
| 22 | + |
| 23 | +import sys |
| 24 | +from pathlib import Path |
| 25 | + |
| 26 | +REPO = Path(__file__).resolve().parents[2] |
| 27 | +sys.path.insert(0, str(REPO)) |
| 28 | + |
| 29 | +from src.fep.binding import ( |
| 30 | + estimate_sampling_wall_hours, |
| 31 | + format_wall_estimate_block, |
| 32 | +) |
| 33 | + |
| 34 | + |
| 35 | +def test_methane_smoke_runs_in_minutes_on_metal(): |
| 36 | + """FreeSolv-class small system — should be minutes on Metal.""" |
| 37 | + est = estimate_sampling_wall_hours( |
| 38 | + n_atoms=2000, n_windows=11, |
| 39 | + n_production_steps=25000, |
| 40 | + n_equilibration_steps=2500) |
| 41 | + assert est["metal_m5max"] < 0.1, est # < 6 min |
| 42 | + assert est["cuda_h100"] < 0.02, est # < 72 s |
| 43 | + # CPU on small system should also finish overnight. |
| 44 | + assert est["cpu"] < 2.0, est # < 2 h |
| 45 | + |
| 46 | + |
| 47 | +def test_streptavidin_class_30k_atoms_reasonable(): |
| 48 | + est = estimate_sampling_wall_hours( |
| 49 | + n_atoms=30000, n_windows=11, |
| 50 | + n_production_steps=25000, |
| 51 | + n_equilibration_steps=2500) |
| 52 | + # Sanity ratios: CUDA ~ 75× faster than CPU, Metal ~ 13× faster. |
| 53 | + cpu_h = est["cpu"] |
| 54 | + metal_h = est["metal_m5max"] |
| 55 | + cuda_h = est["cuda_h100"] |
| 56 | + assert 1.0 < cpu_h < 10.0, est # 3 h ballpark |
| 57 | + assert 0.1 < metal_h < 1.0, est # ~15 min ballpark |
| 58 | + assert cuda_h < 0.1, est # < 6 min |
| 59 | + # Platform ordering. |
| 60 | + assert cuda_h < metal_h < cpu_h, est |
| 61 | + |
| 62 | + |
| 63 | +def test_egfr_class_40k_atoms_cpu_flagged_as_infeasible(): |
| 64 | + """EGFR kinase series: ~40k atoms, 6 compounds → CPU-only is |
| 65 | + not a viable plan (days). The formatter must warn.""" |
| 66 | + est = estimate_sampling_wall_hours( |
| 67 | + n_atoms=40000, n_windows=11, |
| 68 | + # Paper-grade sampling: 50 ps per window. |
| 69 | + n_production_steps=50000, |
| 70 | + n_equilibration_steps=5000) |
| 71 | + assert est["cpu"] > 8.0, est |
| 72 | + block = format_wall_estimate_block(est, gate_hours=48.0) |
| 73 | + # CPU line must appear. Metal/CUDA must appear. |
| 74 | + assert "CPU" in block |
| 75 | + assert "Metal" in block |
| 76 | + assert "CUDA" in block |
| 77 | + |
| 78 | + |
| 79 | +def test_formatter_flags_cpu_infeasible_over_gate(): |
| 80 | + """Force a huge config so CPU > 48h — the warning must fire.""" |
| 81 | + est = estimate_sampling_wall_hours( |
| 82 | + n_atoms=100000, n_windows=21, |
| 83 | + n_production_steps=100000, |
| 84 | + n_equilibration_steps=10000) |
| 85 | + block = format_wall_estimate_block(est, gate_hours=48.0) |
| 86 | + assert "CPU-only is not viable" in block, block |
| 87 | + |
| 88 | + |
| 89 | +def test_formatter_does_not_flag_short_cpu_runs(): |
| 90 | + est = estimate_sampling_wall_hours( |
| 91 | + n_atoms=1000, n_windows=5, |
| 92 | + n_production_steps=1000, |
| 93 | + n_equilibration_steps=500) |
| 94 | + block = format_wall_estimate_block(est, gate_hours=48.0) |
| 95 | + assert "CPU-only is not viable" not in block, block |
| 96 | + |
| 97 | + |
| 98 | +def test_formatter_uses_minutes_for_short_runs(): |
| 99 | + est = estimate_sampling_wall_hours( |
| 100 | + n_atoms=500, n_windows=3, |
| 101 | + n_production_steps=500, |
| 102 | + n_equilibration_steps=100) |
| 103 | + block = format_wall_estimate_block(est) |
| 104 | + # All three platforms should report in "min" for this tiny config. |
| 105 | + assert "min" in block, block |
| 106 | + |
| 107 | + |
| 108 | +def test_formatter_uses_days_for_very_long_runs(): |
| 109 | + est = estimate_sampling_wall_hours( |
| 110 | + n_atoms=200000, n_windows=21, |
| 111 | + n_production_steps=500000, |
| 112 | + n_equilibration_steps=50000) |
| 113 | + block = format_wall_estimate_block(est) |
| 114 | + # CPU line should render in days, not hours. |
| 115 | + assert " d" in block, block |
| 116 | + |
| 117 | + |
| 118 | +def test_scaling_inversely_with_atoms(): |
| 119 | + """Double the atoms → roughly double the wall.""" |
| 120 | + small = estimate_sampling_wall_hours( |
| 121 | + n_atoms=5000, n_windows=11, |
| 122 | + n_production_steps=10000, n_equilibration_steps=1000) |
| 123 | + big = estimate_sampling_wall_hours( |
| 124 | + n_atoms=10000, n_windows=11, |
| 125 | + n_production_steps=10000, n_equilibration_steps=1000) |
| 126 | + for plat in ("cpu", "metal_m5max", "cuda_h100"): |
| 127 | + ratio = big[plat] / small[plat] |
| 128 | + assert 1.9 < ratio < 2.1, (plat, ratio, small[plat], big[plat]) |
| 129 | + |
| 130 | + |
| 131 | +def test_scaling_linearly_with_steps(): |
| 132 | + """Double the prod steps → roughly double the wall.""" |
| 133 | + short = estimate_sampling_wall_hours( |
| 134 | + n_atoms=5000, n_windows=11, |
| 135 | + n_production_steps=10000, n_equilibration_steps=1000) |
| 136 | + long = estimate_sampling_wall_hours( |
| 137 | + n_atoms=5000, n_windows=11, |
| 138 | + n_production_steps=21000, n_equilibration_steps=1000) |
| 139 | + # (21000+1000) / (10000+1000) = 22/11 = 2.0 |
| 140 | + for plat in ("cpu", "metal_m5max", "cuda_h100"): |
| 141 | + ratio = long[plat] / short[plat] |
| 142 | + assert 1.9 < ratio < 2.1, (plat, ratio) |
| 143 | + |
| 144 | + |
| 145 | +def test_overhead_surcharge_in_result(): |
| 146 | + """Sanity: the overhead surcharge actually raises the estimate |
| 147 | + above the pure MD time. A 1000-atom, 100-step run on CUDA at |
| 148 | + 150M/1000 steps/sec = 150k steps/sec would be 100/150000 = |
| 149 | + 6.67e-4 s pure. With 1.25× surcharge + n_legs=2: 1.67e-3 s.""" |
| 150 | + est = estimate_sampling_wall_hours( |
| 151 | + n_atoms=1000, n_windows=1, |
| 152 | + n_production_steps=100, n_equilibration_steps=0) |
| 153 | + cuda_seconds = est["cuda_h100"] * 3600 |
| 154 | + # 2 legs × 100 steps × 1.25 overhead / (150M / 1k) = 1.67e-3 s |
| 155 | + assert 1.5e-3 < cuda_seconds < 2.0e-3, cuda_seconds |
| 156 | + |
| 157 | + |
| 158 | +if __name__ == "__main__": |
| 159 | + funcs = [ |
| 160 | + test_methane_smoke_runs_in_minutes_on_metal, |
| 161 | + test_streptavidin_class_30k_atoms_reasonable, |
| 162 | + test_egfr_class_40k_atoms_cpu_flagged_as_infeasible, |
| 163 | + test_formatter_flags_cpu_infeasible_over_gate, |
| 164 | + test_formatter_does_not_flag_short_cpu_runs, |
| 165 | + test_formatter_uses_minutes_for_short_runs, |
| 166 | + test_formatter_uses_days_for_very_long_runs, |
| 167 | + test_scaling_inversely_with_atoms, |
| 168 | + test_scaling_linearly_with_steps, |
| 169 | + test_overhead_surcharge_in_result, |
| 170 | + ] |
| 171 | + fails = [] |
| 172 | + for f in funcs: |
| 173 | + try: |
| 174 | + f() |
| 175 | + print(f"[PASS] {f.__name__}") |
| 176 | + except AssertionError as e: |
| 177 | + print(f"[FAIL] {f.__name__}: {e}") |
| 178 | + fails.append(f.__name__) |
| 179 | + except Exception as e: |
| 180 | + import traceback |
| 181 | + traceback.print_exc() |
| 182 | + print(f"[ERROR] {f.__name__}: {e}") |
| 183 | + fails.append(f.__name__) |
| 184 | + print(f"{len(funcs) - len(fails)}/{len(funcs)} PASS") |
| 185 | + sys.exit(0 if not fails else 1) |
0 commit comments