Fix test ordering: check NaN explicitly, access state after forward

TimDettmers · claude · TimDettmers · commit 00fdc39d2678 · 2026-03-09T15:28:50.000-04:00
- test_pipeline_deterministic: check for NaN before comparing outputs
- test_pipeline_larger_config: access weight_scales_batched after forward

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/tests/test_moe_sm100_pipeline.py b/tests/test_moe_sm100_pipeline.py
@@ -296,6 +296,7 @@ def test_pipeline_deterministic(self, small_moe_config):
 
         Note: CUTLASS SM_100 block-scaled GEMM may have non-deterministic
         accumulation order across tiles, so we use approximate comparison.
+        NaN in output indicates a GEMM state issue (tracked separately).
         """
         from bitsandbytes.nn.modules import LinearNVFP4MoE
 
@@ -312,7 +313,13 @@ def test_pipeline_deterministic(self, small_moe_config):
         expert_offsets = _make_expert_offsets(tpe)
 
         out1 = layer(x, expert_offsets)
+        has_nan1 = torch.isnan(out1).any().item()
+        assert not has_nan1, f"First call produced NaN (abs_max={out1.abs().max().item()})"
+
         out2 = layer(x, expert_offsets)
+        has_nan2 = torch.isnan(out2).any().item()
+        assert not has_nan2, \
+            f"Second call produced NaN (first call abs_max={out1.abs().max().item()})"
 
         # Allow small numerical differences from non-deterministic accumulation
         if not torch.equal(out1, out2):
@@ -348,15 +355,18 @@ def test_pipeline_larger_config(self, moe_config):
         layer = LinearNVFP4MoE(num_experts, K, N, bias=False)
         layer = layer.cuda()
 
-        # Diagnostic: check weight_scales_batched size
-        actual_sfb = layer.weight_scales_batched.numel()
-        print(f"  weight_scales_batched size: {actual_sfb} bytes, expected batched: {sfb_batched}")
-
         x = torch.randn(total_tokens, K, dtype=torch.bfloat16, device="cuda")
         expert_offsets = _make_expert_offsets(tpe)
 
         out = layer(x, expert_offsets)
 
+        # Diagnostic: check weight_scales_batched size (after forward triggers quantization)
+        if layer.weight_scales_batched is not None:
+            actual_sfb = layer.weight_scales_batched.numel()
+            print(f"  weight_scales_batched size: {actual_sfb} bytes, expected batched: {sfb_batched}")
+        else:
+            print("  WARNING: weight_scales_batched is None after forward")
+
         assert out.shape == (total_tokens, N)
         assert out.dtype == torch.bfloat16