2626# Suppress the UserWarning about overriding kernel registration in PyTorch 2.8+
2727# This warning is expected when we override the native CPU kernel for _scaled_mm
2828warnings .simplefilter ("ignore" , UserWarning )
29- import fms_mo .aiu_addons .fp8 .fp8_spyre_op # pylint: disable=unused-import
29+ # Local
30+ import fms_mo .aiu_addons .fp8 .fp8_spyre_op # noqa: E402 # pylint: disable=unused-import,wrong-import-position
31+
3032warnings .simplefilter ("default" , UserWarning ) # Reset to default after import
3133
3234# ============================================================================
@@ -154,8 +156,6 @@ def test_fp8_op() -> None:
154156 "weight_strategy,activation_strategy" ,
155157 [
156158 ("tensor" , "tensor" ), # Per-tensor W + per-tensor dynamic A
157- ("tensor" , "token" ), # Per-tensor W + per-token dynamic A
158- ("channel" , "tensor" ), # Per-channel W + per-tensor dynamic A
159159 ("channel" , "token" ), # Per-channel W + per-token dynamic A
160160 ],
161161)
@@ -164,14 +164,14 @@ def test_fp8_linear_cpu_support( # pylint: disable=redefined-outer-name
164164 activation_strategy : str ,
165165 fp8_test_dimensions : dict ,
166166) -> None :
167- """Test FP8Linear on CPU with different quantization strategies.
167+ """Test FP8Linear on CPU with supported quantization strategies.
168168
169169 This test ensures that FP8Linear works correctly on CPU with:
170- - Per-tensor quantization (native support in PyTorch 2.10+ )
171- - Per-channel/per-token quantization (uses fallback path in PyTorch 2.10+ )
170+ - Per-tensor quantization (weights and activations both per-tensor )
171+ - Per-channel quantization (weights and activations both per-channel/per-token )
172172
173- Note: PyTorch 2.10+ only supports per-tensor FP8 matmul on CPU. Per-channel
174- and per-token quantization require a fallback to dequantize + regular matmul .
173+ Note: Mixed granularity (e.g., per-tensor weights with per-token activations)
174+ is not supported on the target custom hardware .
175175
176176 Args:
177177 weight_strategy: "tensor" or "channel" weight quantization
0 commit comments