Skip to content

Commit 75a2348

Browse files
committed
Tighten fused MoE test tolerances from 5% to 2%
test_eager_correctness, test_single_expert, and test_batched_correctness used 5% relative tolerance for INT4 kernel-vs-dequant comparison. Tighten to 2% to match the e2e runner bar (fe71bd4).
1 parent 8226f50 commit 75a2348

1 file changed

Lines changed: 3 additions & 3 deletions

File tree

backends/cuda/tests/test_fused_moe.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ def test_eager_correctness(self):
302302
rel = diff / (ref.float().abs().max().item() + 1e-10)
303303
self.assertLess(
304304
rel,
305-
0.05,
305+
0.02,
306306
f"{desc}: relative diff {rel:.4f} (abs {diff:.6f})",
307307
)
308308

@@ -332,7 +332,7 @@ def test_single_expert(self):
332332
ref = w2_dq[1] @ activated
333333
diff = (out[t].float() - ref.float()).abs().max().item()
334334
rel = diff / (ref.float().abs().max().item() + 1e-10)
335-
self.assertLess(rel, 0.05, f"token {t}: relative diff {rel:.4f}")
335+
self.assertLess(rel, 0.02, f"token {t}: relative diff {rel:.4f}")
336336

337337
def test_batched_correctness(self):
338338
"""Batched kernel matches reference across M values."""
@@ -390,7 +390,7 @@ def test_batched_correctness(self):
390390
rel = diff / (ref.float().abs().max().item() + 1e-10)
391391
self.assertLess(
392392
rel,
393-
0.05,
393+
0.02,
394394
f"{desc}: relative diff {rel:.4f} (abs {diff:.6f})",
395395
)
396396

0 commit comments

Comments
 (0)