diff --git a/docs/source/openvino/export.mdx b/docs/source/openvino/export.mdx index f232dba84c..d6de77201f 100644 --- a/docs/source/openvino/export.mdx +++ b/docs/source/openvino/export.mdx @@ -111,11 +111,13 @@ Optional arguments: precision which is int8_asym by default. --backup-precision {none,int8_sym,int8_asym} Defines a backup precision for mixed-precision weight compression. Only valid for 4-bit weight - formats. If not provided, backup precision is int8_asym. 'none' stands for original floating- - point precision of the model weights, in this case weights are retained in their original - precision without any quantization. 'int8_sym' stands for 8-bit integer symmetric quantization - without zero point. 'int8_asym' stands for 8-bit integer asymmetric quantization with zero - points per each quantization group. + formats. If not provided, the default backup precision depends on the primary compression mode: + mxfp8 is used for mxfp4 and mxfp8 modes with group_size=32; fp8 is used for fp4 and fp8 modes + with the same group_size as the primary precision; for all other compression modes, int8_asym + is used with group_size=-1. 'none' stands for original floating-point precision of the model weights, + in this case weights are retained in their original precision without any quantization. + 'int8_sym' stands for 8-bit integer symmetric quantization without zero point. 'int8_asym' stands for + 8-bit integer asymmetric quantization with zero points per each quantization group. --dataset DATASET The dataset used for data-aware compression or quantization with NNCF. Can be a dataset name (e.g., 'wikitext2') or a string with options (e.g., 'wikitext2:seq_len=128'). The only currently supported option is `seq_len` which represents a length of an input sample sequence (sentence). diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py index cd3280189e..dcea20ce2f 100644 --- a/optimum/commands/export/openvino.py +++ b/optimum/commands/export/openvino.py @@ -162,7 +162,9 @@ def parse_args_openvino(parser: "ArgumentParser"): default=None, help=( "Defines a backup precision for mixed-precision weight compression. Only valid for 4-bit weight formats. " - "If not provided, backup precision is int8_asym. 'none' stands for original floating-point precision of " + "If not provided, the default backup precision depends on the primary compression mode: mxfp8 is used for " + "mxfp4 and mxfp8 modes with group_size=32; fp8 is used for fp4 and fp8 modes with the same group_size as the " + "primary precision; for all other compression modes, int8_asym is used with group_size=-1. 'none' stands for original floating-point precision of " "the model weights, in this case weights are retained in their original precision without any " "quantization. 'int8_sym' stands for 8-bit integer symmetric quantization without zero point. 'int8_asym' " "stands for 8-bit integer asymmetric quantization with zero points per each quantization group." diff --git a/setup.py b/setup.py index 29ad551373..013c3f9dc7 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ "optimum-onnx@git+https://github.com/huggingface/optimum-onnx.git@main", "transformers>=4.45,<4.58", "setuptools", - "nncf>=2.19.0", + "nncf>=3.1.0", "openvino>=2025.4.0", "openvino-tokenizers>=2025.4.0", ] @@ -68,8 +68,8 @@ QUALITY_REQUIRE = ["black~=23.1", "ruff==0.4.4"] EXTRAS_REQUIRE = { - "nncf": ["nncf>=2.19.0"], - "openvino": ["nncf>=2.19.0", "openvino>=2025.4.0", "openvino-tokenizers>=2025.4.0"], + "nncf": ["nncf>=3.1.0"], + "openvino": ["nncf>=3.1.0", "openvino>=2025.4.0", "openvino-tokenizers>=2025.4.0"], "neural-compressor": ["neural-compressor[pt]>=3.4.1,<3.8", "accelerate", "transformers<4.46", "datasets"], "ipex": ["intel-extension-for-pytorch>=2.8", "transformers>4.54,<4.56", "accelerate"], "diffusers": ["diffusers"], diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py index 8e860ba743..5735d21986 100644 --- a/tests/openvino/test_exporters_cli.py +++ b/tests/openvino/test_exporters_cli.py @@ -539,7 +539,7 @@ class OVCLIExportTestCase(unittest.TestCase): "text-generation-with-past", "opt125m", "mxfp4", - {"model": {"int8": 4, "f4e2m1": 72, "f8e8m0": 72}}, + {"model": {"int8": 0, "f4e2m1": 72, "f8e4m3": 2, "f8e8m0": 74}}, ), ( "text-generation-with-past", diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index bfc6ec976a..a140f13b6b 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -613,7 +613,7 @@ class OVWeightCompressionTest(unittest.TestCase): "gpt2", False, dict(bits=4, dtype="mxfp4", group_size=32), - {"model": {"int8": 4, "f4e2m1": 20, "f8e8m0": 20}}, + {"model": {"int8": 0, "f4e2m1": 20, "f8e8m0": 22, "f8e4m3": 2}}, ), ( OVModelForCausalLM,