1313# limitations under the License.
1414"""Test suite for FMS addon introducing FP8 functionalities"""
1515
16+ # Standard
17+ import warnings
18+
1619# Third Party
1720import pytest
1821import torch
1922
2023# Local
2124from fms_mo .prep import available_packages
22- import fms_mo .aiu_addons .fp8 .fp8_spyre_op # pylint: disable=unused-import
25+
26+ # Suppress the UserWarning about overriding kernel registration in PyTorch 2.8+
27+ # This warning is expected when we override the native CPU kernel for _scaled_mm
28+ warnings .simplefilter ("ignore" , UserWarning )
29+ # Local
30+ import fms_mo .aiu_addons .fp8 .fp8_spyre_op # noqa: E402 # pylint: disable=unused-import,wrong-import-position
31+
32+ warnings .simplefilter ("default" , UserWarning ) # Reset to default after import
2333
2434# ============================================================================
2535# Constants
@@ -146,8 +156,6 @@ def test_fp8_op() -> None:
146156 "weight_strategy,activation_strategy" ,
147157 [
148158 ("tensor" , "tensor" ), # Per-tensor W + per-tensor dynamic A
149- ("tensor" , "token" ), # Per-tensor W + per-token dynamic A
150- ("channel" , "tensor" ), # Per-channel W + per-tensor dynamic A
151159 ("channel" , "token" ), # Per-channel W + per-token dynamic A
152160 ],
153161)
@@ -156,14 +164,11 @@ def test_fp8_linear_cpu_support( # pylint: disable=redefined-outer-name
156164 activation_strategy : str ,
157165 fp8_test_dimensions : dict ,
158166) -> None :
159- """Test FP8Linear on CPU with different quantization strategies.
167+ """Test FP8Linear on CPU with supported quantization strategies.
160168
161169 This test ensures that FP8Linear works correctly on CPU with:
162- - Per-tensor quantization (native support in PyTorch 2.10+)
163- - Per-channel/per-token quantization (uses fallback path in PyTorch 2.10+)
164-
165- Note: PyTorch 2.10+ only supports per-tensor FP8 matmul on CPU. Per-channel
166- and per-token quantization require a fallback to dequantize + regular matmul.
170+ - Per-tensor quantization (weights and activations both per-tensor)
171+ - Per-channel quantization (weights and activations both per-channel/per-token)
167172
168173 Args:
169174 weight_strategy: "tensor" or "channel" weight quantization
0 commit comments