Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions docs/source/openvino/export.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,13 @@ Optional arguments:
precision which is int8_asym by default.
--backup-precision {none,int8_sym,int8_asym}
Defines a backup precision for mixed-precision weight compression. Only valid for 4-bit weight
formats. If not provided, backup precision is int8_asym. 'none' stands for original floating-
point precision of the model weights, in this case weights are retained in their original
precision without any quantization. 'int8_sym' stands for 8-bit integer symmetric quantization
without zero point. 'int8_asym' stands for 8-bit integer asymmetric quantization with zero
points per each quantization group.
formats. If not provided, the default backup precision depends on the primary compression mode:
mxfp8 is used for mxfp4 and mxfp8 modes with group_size=32; fp8 is used for fp4 and fp8 modes
with the same group_size as the primary precision; for all other compression modes, int8_asym
is used with group_size=-1. 'none' stands for original floating-point precision of the model weights,
in this case weights are retained in their original precision without any quantization.
'int8_sym' stands for 8-bit integer symmetric quantization without zero point. 'int8_asym' stands for
8-bit integer asymmetric quantization with zero points per each quantization group.
--dataset DATASET The dataset used for data-aware compression or quantization with NNCF. Can be a dataset name
(e.g., 'wikitext2') or a string with options (e.g., 'wikitext2:seq_len=128'). The only currently
supported option is `seq_len` which represents a length of an input sample sequence (sentence).
Expand Down
4 changes: 3 additions & 1 deletion optimum/commands/export/openvino.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,9 @@ def parse_args_openvino(parser: "ArgumentParser"):
default=None,
help=(
"Defines a backup precision for mixed-precision weight compression. Only valid for 4-bit weight formats. "
"If not provided, backup precision is int8_asym. 'none' stands for original floating-point precision of "
"If not provided, the default backup precision depends on the primary compression mode: mxfp8 is used for "
"mxfp4 and mxfp8 modes with group_size=32; fp8 is used for fp4 and fp8 modes with the same group_size as the "
"primary precision; for all other compression modes, int8_asym is used with group_size=-1. 'none' stands for original floating-point precision of "
"the model weights, in this case weights are retained in their original precision without any "
"quantization. 'int8_sym' stands for 8-bit integer symmetric quantization without zero point. 'int8_asym' "
"stands for 8-bit integer asymmetric quantization with zero points per each quantization group."
Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
"optimum-onnx@git+https://github.com/huggingface/optimum-onnx.git@main",
"transformers>=4.45,<4.58",
"setuptools",
"nncf>=2.19.0",
"nncf>=3.1.0",
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is it recommended to install 3.1.0 by default? Do we have any model requires it? Please recommend.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I bumped the minimum nncf version to 3.1.0 because there is a test that passes only with nncf ≥ 3.1.0, and the default behavior of the --backup-precision parameter was extended in that release. Please see the updated docs.

"openvino>=2025.4.0",
"openvino-tokenizers>=2025.4.0",
]
Expand Down Expand Up @@ -68,8 +68,8 @@
QUALITY_REQUIRE = ["black~=23.1", "ruff==0.4.4"]

EXTRAS_REQUIRE = {
"nncf": ["nncf>=2.19.0"],
"openvino": ["nncf>=2.19.0", "openvino>=2025.4.0", "openvino-tokenizers>=2025.4.0"],
"nncf": ["nncf>=3.1.0"],
"openvino": ["nncf>=3.1.0", "openvino>=2025.4.0", "openvino-tokenizers>=2025.4.0"],
"neural-compressor": ["neural-compressor[pt]>=3.4.1,<3.8", "accelerate", "transformers<4.46", "datasets"],
"ipex": ["intel-extension-for-pytorch>=2.8", "transformers>4.54,<4.56", "accelerate"],
"diffusers": ["diffusers"],
Expand Down
2 changes: 1 addition & 1 deletion tests/openvino/test_exporters_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,7 +539,7 @@ class OVCLIExportTestCase(unittest.TestCase):
"text-generation-with-past",
"opt125m",
"mxfp4",
{"model": {"int8": 4, "f4e2m1": 72, "f8e8m0": 72}},
{"model": {"int8": 0, "f4e2m1": 72, "f8e4m3": 2, "f8e8m0": 74}},
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why was it changed and now there is no int8 coeffs?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We recently merged a new feature into NNCF. Please see the openvinotoolkit/nncf#3886. The references have been updated to reflect the current state.

),
(
"text-generation-with-past",
Expand Down
2 changes: 1 addition & 1 deletion tests/openvino/test_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,7 +613,7 @@ class OVWeightCompressionTest(unittest.TestCase):
"gpt2",
False,
dict(bits=4, dtype="mxfp4", group_size=32),
{"model": {"int8": 4, "f4e2m1": 20, "f8e8m0": 20}},
{"model": {"int8": 0, "f4e2m1": 20, "f8e8m0": 22, "f8e4m3": 2}},
),
(
OVModelForCausalLM,
Expand Down
Loading