remove tests for unsupported fp8 setup

andrea-fasoli · andrea-fasoli · commit 7fa142bcdfa1 · 2026-03-27T00:03:26.000Z
Signed-off-by: Andrea Fasoli &lt;andrea.fasoli@ibm.com&gt;
diff --git a/fms_mo/aiu_addons/fp8/fp8_spyre_op.py b/fms_mo/aiu_addons/fp8/fp8_spyre_op.py
@@ -50,9 +50,10 @@ def _scaled_mm_cpu_out(
     mat2 = (mat2.to(dtype=out_dtype) * scale2).to(dtype=out_dtype)
 
     if bias is not None:
-        ret = torch.addmm(bias, mat1, mat2).to(dtype=out_dtype)
+        bias_converted = bias.to(dtype=out_dtype)
+        ret = torch.addmm(bias_converted, mat1, mat2)
     else:
-        ret = torch.mm(mat1, mat2).to(dtype=out_dtype)
+        ret = torch.mm(mat1, mat2)
 
     if out is not None:
         out.copy_(ret)
@@ -87,6 +88,7 @@ def _scaled_mm_cpu(
     # In PyTorch 2.8+, use torch.library.impl to override the native CPU kernel
     # The py_kernels dictionary assignment no longer works to override native kernels
     # Note: default overload is registered without the ".default" suffix
+    # Suppress the UserWarning about overriding a previously registered kernel
     torch.library.impl("aten::_scaled_mm.out", "CPU")(_scaled_mm_cpu_out)
     torch.library.impl("aten::_scaled_mm", "CPU")(_scaled_mm_cpu)
 else:
diff --git a/tests/aiu_addons/test_fp8_addon.py b/tests/aiu_addons/test_fp8_addon.py
@@ -26,7 +26,9 @@
 # Suppress the UserWarning about overriding kernel registration in PyTorch 2.8+
 # This warning is expected when we override the native CPU kernel for _scaled_mm
 warnings.simplefilter("ignore", UserWarning)
-import fms_mo.aiu_addons.fp8.fp8_spyre_op  # pylint: disable=unused-import
+# Local
+import fms_mo.aiu_addons.fp8.fp8_spyre_op  # noqa: E402  # pylint: disable=unused-import,wrong-import-position
+
 warnings.simplefilter("default", UserWarning)  # Reset to default after import
 
 # ============================================================================
@@ -154,8 +156,6 @@ def test_fp8_op() -> None:
     "weight_strategy,activation_strategy",
     [
         ("tensor", "tensor"),  # Per-tensor W + per-tensor dynamic A
-        ("tensor", "token"),  # Per-tensor W + per-token dynamic A
-        ("channel", "tensor"),  # Per-channel W + per-tensor dynamic A
         ("channel", "token"),  # Per-channel W + per-token dynamic A
     ],
 )
@@ -164,14 +164,14 @@ def test_fp8_linear_cpu_support(  # pylint: disable=redefined-outer-name
     activation_strategy: str,
     fp8_test_dimensions: dict,
 ) -> None:
-    """Test FP8Linear on CPU with different quantization strategies.
+    """Test FP8Linear on CPU with supported quantization strategies.
 
     This test ensures that FP8Linear works correctly on CPU with:
-    - Per-tensor quantization (native support in PyTorch 2.10+)
-    - Per-channel/per-token quantization (uses fallback path in PyTorch 2.10+)
+    - Per-tensor quantization (weights and activations both per-tensor)
+    - Per-channel quantization (weights and activations both per-channel/per-token)
 
-    Note: PyTorch 2.10+ only supports per-tensor FP8 matmul on CPU. Per-channel
-    and per-token quantization require a fallback to dequantize + regular matmul.
+    Note: Mixed granularity (e.g., per-tensor weights with per-token activations)
+    is not supported on the target custom hardware.
 
     Args:
         weight_strategy: "tensor" or "channel" weight quantization