From 639a014b7ab476b00f1c6e9a5cb1d006bfcd6444 Mon Sep 17 00:00:00 2001 From: jiqing-feng Date: Wed, 22 Apr 2026 09:34:09 +0800 Subject: [PATCH 1/3] Enable TorchAO int4wo quantization tests on XPU - Remove _int4wo_skip marker that restricted int4wo tests to CUDA only - Add XPU-specific int4_packing_format='plain_int32' for Int4WeightOnlyConfig --- tests/models/testing_utils/quantization.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/models/testing_utils/quantization.py b/tests/models/testing_utils/quantization.py index 4403cacc6966..155646330a38 100644 --- a/tests/models/testing_utils/quantization.py +++ b/tests/models/testing_utils/quantization.py @@ -818,6 +818,10 @@ class TorchAoConfigMixin: @staticmethod def _get_quant_config(config_name): config_cls = getattr(_torchao_quantization, config_name) + # TorchAO int4 quantization requires plain_int32 packing format on Intel XPU + if config_name == "Int4WeightOnlyConfig" and torch_device == "xpu": + return TorchAoConfig(config_cls(int4_packing_format="plain_int32")) + return TorchAoConfig(config_cls()) def _create_quantized_model(self, config_name, **extra_kwargs): @@ -832,10 +836,6 @@ def _verify_if_layer_quantized(self, name, module, config_kwargs): assert isinstance(module, torch.nn.Linear), f"Layer {name} is not Linear, got {type(module)}" -# int4wo requires CUDA-specific ops (_convert_weight_to_int4pack) -_int4wo_skip = pytest.mark.skipif(torch_device != "cuda", reason="int4wo quantization requires CUDA") - - @is_torchao @require_accelerator @require_torchao_version_greater_or_equal("0.7.0") @@ -861,7 +861,7 @@ class TorchAoTesterMixin(TorchAoConfigMixin, QuantizationTesterMixin): @pytest.mark.parametrize( "quant_type", [ - pytest.param("int4wo", marks=_int4wo_skip), + "int4wo", "int8wo", "int8dq", ], @@ -873,7 +873,7 @@ def test_torchao_quantization_num_parameters(self, quant_type): @pytest.mark.parametrize( "quant_type", [ - pytest.param("int4wo", marks=_int4wo_skip), + "int4wo", "int8wo", "int8dq", ], @@ -888,7 +888,7 @@ def test_torchao_quantization_memory_footprint(self, quant_type): @pytest.mark.parametrize( "quant_type", [ - pytest.param("int4wo", marks=_int4wo_skip), + "int4wo", "int8wo", "int8dq", ], From d77dd97ce7d13a88ad2363e183b66049a082b33b Mon Sep 17 00:00:00 2001 From: jiqing-feng Date: Wed, 6 May 2026 09:43:33 +0800 Subject: [PATCH 2/3] add xpu to not skip Signed-off-by: jiqing-feng --- tests/models/testing_utils/quantization.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/models/testing_utils/quantization.py b/tests/models/testing_utils/quantization.py index 155646330a38..68f6bf20955c 100644 --- a/tests/models/testing_utils/quantization.py +++ b/tests/models/testing_utils/quantization.py @@ -836,6 +836,10 @@ def _verify_if_layer_quantized(self, name, module, config_kwargs): assert isinstance(module, torch.nn.Linear), f"Layer {name} is not Linear, got {type(module)}" +# int4wo requires CUDA or XPU ops (_convert_weight_to_int4pack) +_int4wo_skip = pytest.mark.skipif(torch_device not in ["cuda", "xpu"], reason="int4wo quantization requires CUDA or XPU") + + @is_torchao @require_accelerator @require_torchao_version_greater_or_equal("0.7.0") @@ -861,7 +865,7 @@ class TorchAoTesterMixin(TorchAoConfigMixin, QuantizationTesterMixin): @pytest.mark.parametrize( "quant_type", [ - "int4wo", + pytest.param("int4wo", marks=_int4wo_skip), "int8wo", "int8dq", ], @@ -873,7 +877,7 @@ def test_torchao_quantization_num_parameters(self, quant_type): @pytest.mark.parametrize( "quant_type", [ - "int4wo", + pytest.param("int4wo", marks=_int4wo_skip), "int8wo", "int8dq", ], @@ -888,7 +892,7 @@ def test_torchao_quantization_memory_footprint(self, quant_type): @pytest.mark.parametrize( "quant_type", [ - "int4wo", + pytest.param("int4wo", marks=_int4wo_skip), "int8wo", "int8dq", ], From 9eb220465b710a2e2911a7089ba85e8032f6cef6 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 6 May 2026 03:05:41 +0000 Subject: [PATCH 3/3] Apply style fixes --- tests/models/testing_utils/quantization.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/models/testing_utils/quantization.py b/tests/models/testing_utils/quantization.py index e27aa3004534..30d44a92c425 100644 --- a/tests/models/testing_utils/quantization.py +++ b/tests/models/testing_utils/quantization.py @@ -824,7 +824,9 @@ def _verify_if_layer_quantized(self, name, module, config_kwargs): # int4wo requires CUDA or XPU ops (_convert_weight_to_int4pack) -_int4wo_skip = pytest.mark.skipif(torch_device not in ["cuda", "xpu"], reason="int4wo quantization requires CUDA or XPU") +_int4wo_skip = pytest.mark.skipif( + torch_device not in ["cuda", "xpu"], reason="int4wo quantization requires CUDA or XPU" +) @is_torchao