huggingface · andrey-churkin · Apr 8, 2026 · Apr 9, 2026 · rkazants · Apr 9, 2026
diff --git a/docs/source/openvino/export.mdx b/docs/source/openvino/export.mdx
@@ -111,11 +111,13 @@ Optional arguments:
                         precision which is int8_asym by default.
   --backup-precision {none,int8_sym,int8_asym}
                         Defines a backup precision for mixed-precision weight compression. Only valid for 4-bit weight
-                        formats. If not provided, backup precision is int8_asym. 'none' stands for original floating-
-                        point precision of the model weights, in this case weights are retained in their original
-                        precision without any quantization. 'int8_sym' stands for 8-bit integer symmetric quantization
-                        without zero point. 'int8_asym' stands for 8-bit integer asymmetric quantization with zero
-                        points per each quantization group.
+                        formats. If not provided, the default backup precision depends on the primary compression mode:
+                        mxfp8 is used for mxfp4 and mxfp8 modes with group_size=32; fp8 is used for fp4 and fp8 modes
+                        with the same group_size as the primary precision; for all other compression modes, int8_asym
+                        is used with group_size=-1. 'none' stands for original floating-point precision of the model weights,
+                        in this case weights are retained in their original precision without any quantization.
+                        'int8_sym' stands for 8-bit integer symmetric quantization without zero point. 'int8_asym' stands for
+                        8-bit integer asymmetric quantization with zero points per each quantization group.
   --dataset DATASET     The dataset used for data-aware compression or quantization with NNCF. Can be a dataset name
                         (e.g., 'wikitext2') or a string with options (e.g., 'wikitext2:seq_len=128'). The only currently
                         supported option is `seq_len` which represents a length of an input sample sequence (sentence).

diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py
@@ -162,7 +162,9 @@ def parse_args_openvino(parser: "ArgumentParser"):
         default=None,
         help=(
             "Defines a backup precision for mixed-precision weight compression. Only valid for 4-bit weight formats. "
-            "If not provided, backup precision is int8_asym. 'none' stands for original floating-point precision of "
+            "If not provided, the default backup precision depends on the primary compression mode: mxfp8 is used for "
+            "mxfp4 and mxfp8 modes with group_size=32; fp8 is used for fp4 and fp8 modes with the same group_size as the "
+            "primary precision; for all other compression modes, int8_asym is used with group_size=-1. 'none' stands for original floating-point precision of "
             "the model weights, in this case weights are retained in their original precision without any "
             "quantization. 'int8_sym' stands for 8-bit integer symmetric quantization without zero point. 'int8_asym' "
             "stands for 8-bit integer asymmetric quantization with zero points per each quantization group."

diff --git a/setup.py b/setup.py
@@ -31,7 +31,7 @@
     "optimum-onnx@git+https://github.com/huggingface/optimum-onnx.git@main",
     "transformers>=4.45,<4.58",
     "setuptools",
-    "nncf>=2.19.0",
+    "nncf>=3.1.0",
     "openvino>=2025.4.0",
     "openvino-tokenizers>=2025.4.0",
 ]
@@ -68,8 +68,8 @@
 QUALITY_REQUIRE = ["black~=23.1", "ruff==0.4.4"]
 
 EXTRAS_REQUIRE = {
-    "nncf": ["nncf>=2.19.0"],
-    "openvino": ["nncf>=2.19.0", "openvino>=2025.4.0", "openvino-tokenizers>=2025.4.0"],
+    "nncf": ["nncf>=3.1.0"],
+    "openvino": ["nncf>=3.1.0", "openvino>=2025.4.0", "openvino-tokenizers>=2025.4.0"],
     "neural-compressor": ["neural-compressor[pt]>=3.4.1,<3.8", "accelerate", "transformers<4.46", "datasets"],
     "ipex": ["intel-extension-for-pytorch>=2.8", "transformers>4.54,<4.56", "accelerate"],
     "diffusers": ["diffusers"],

diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py
@@ -539,7 +539,7 @@ class OVCLIExportTestCase(unittest.TestCase):
             "text-generation-with-past",
             "opt125m",
             "mxfp4",
-            {"model": {"int8": 4, "f4e2m1": 72, "f8e8m0": 72}},
+            {"model": {"int8": 0, "f4e2m1": 72, "f8e4m3": 2, "f8e8m0": 74}},
         ),
         (
             "text-generation-with-past",

diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py
@@ -613,7 +613,7 @@ class OVWeightCompressionTest(unittest.TestCase):
             "gpt2",
             False,
             dict(bits=4, dtype="mxfp4", group_size=32),
-            {"model": {"int8": 4, "f4e2m1": 20, "f8e8m0": 20}},
+            {"model": {"int8": 0, "f4e2m1": 20, "f8e8m0": 22, "f8e4m3": 2}},
         ),
         (
             OVModelForCausalLM,