Skip to content

Commit 2905cb0

Browse files
authored
Updated the diffusion config issue and more test cases (#937)
## What does this PR do? **Type of change:** new tests, Bug fix <!-- Use one of the following: Bug fix, new feature, new example, new tests, documentation. --> **Overview:** - **Fixed the INT8 config issue** - **Add HF checkpoint export test coverage** 1. The `--hf-ckpt-dir` export path had zero test coverage. This MR adds tests at two levels: 2. Unit tests (tests/unit/torch/export/test_export_diffusers.py): - Extended test_export_diffusers_real_quantized to parametrize over INT8, INT8 SmoothQuant, FP8, and FP4 configs - (previously only FP8). This gives 3 models x 4 configs = 12 test cases. 3. GPU integration tests (tests/gpu/torch/export/test_export_diffusers_hf_ckpt.py) - New file testing the full quantize.py --hf-ckpt-dir pipeline via subprocess with 4 combos: - SDXL INT8 smoothquant min-mean (the exact scenario that triggered the bug) - Flux INT8 smoothquant min-mean - SDXL FP8 - Flux FP4 ## Usage <!-- You can potentially add a usage example below. --> ```python # Add a code snippet demonstrating how to use this ``` ## Testing <!-- Mention how have you tested your change if applicable. --> ## Before your PR is "*Ready for review*" <!-- If you haven't finished some of the above items you can still open `Draft` PR. --> - **Make sure you read and follow [Contributor guidelines](https://github.com/NVIDIA/Model-Optimizer/blob/main/CONTRIBUTING.md)** and your commits are signed. - **Is this change backward compatible?**:No <!--- If No, explain why. --> - **Did you write any new necessary tests?**: Yes - **Did you add or update any necessary documentation?**:No - **Did you update [Changelog](https://github.com/NVIDIA/Model-Optimizer/blob/main/CHANGELOG.rst)?**:No <!--- Only for new features, API changes, critical bug fixes or bw breaking changes. --> ## Additional Information <!-- E.g. related issue. --> <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit ## Release Notes * **Tests** * Added test coverage for exporting Diffusers models with Hugging Face checkpoints across multiple quantization formats (INT8, FP8, FP4) * Extended quantization export testing to validate multiple configuration scenarios * **Chores** * Refined INT8 quantization configuration with improved calibrator support for convolution layers <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Signed-off-by: Jingyu Xin <jingyux@nvidia.com>
1 parent 56e97c8 commit 2905cb0

File tree

4 files changed

+219
-51
lines changed

4 files changed

+219
-51
lines changed

examples/diffusers/quantization/config.py

Lines changed: 7 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
INT8_DEFAULT_CONFIG = {
3434
"quant_cfg": {
3535
"*weight_quantizer": {"num_bits": 8, "axis": 0},
36-
"*input_quantizer": {"num_bits": 8, "axis": 0},
36+
"*input_quantizer": {"num_bits": 8, "axis": None},
3737
"*output_quantizer": {"enable": False},
3838
"default": {"enable": False},
3939
},
@@ -112,8 +112,10 @@ def set_quant_config_attr(quant_config, trt_high_precision_dtype, quant_algo, **
112112

113113

114114
def reset_set_int8_config(quant_config, percentile, n_steps, collect_method, backbone):
115-
"""
116-
Configure INT8 quantization with different settings for Conv2d and Linear layers.
115+
"""Add PercentileCalibrator to Conv2d input quantizers.
116+
117+
Linear layers are left unchanged — their axis settings come from the base
118+
quant_config (e.g. INT8_SMOOTHQUANT_CFG or INT8_DEFAULT_CONFIG).
117119
118120
Args:
119121
quant_config: The quantization configuration dictionary
@@ -122,31 +124,9 @@ def reset_set_int8_config(quant_config, percentile, n_steps, collect_method, bac
122124
collect_method: Method for collecting calibration statistics
123125
backbone: The model backbone to analyze layer types
124126
"""
125-
126-
# Build a mapping of layer names to their types
127-
layer_type_map = {}
128127
for name, module in backbone.named_modules():
129-
if isinstance(module, (nn.Linear, nn.Conv2d)):
130-
layer_type_map[name] = type(module)
131-
132-
quant_config["quant_cfg"] = {}
133-
for layer_name, layer_type in layer_type_map.items():
134-
wq_name = f"*{layer_name}*weight_quantizer*"
135-
aq_name = f"*{layer_name}*input_quantizer*"
136-
if layer_type is nn.Linear:
137-
quant_config["quant_cfg"][wq_name] = {
138-
"num_bits": 8,
139-
"axis": 0,
140-
}
141-
quant_config["quant_cfg"][aq_name] = {
142-
"num_bits": 8,
143-
"axis": -1,
144-
}
145-
else:
146-
quant_config["quant_cfg"][wq_name] = {
147-
"num_bits": 8,
148-
"axis": 0,
149-
}
128+
if isinstance(module, nn.Conv2d):
129+
aq_name = f"*{name}*input_quantizer*"
150130
quant_config["quant_cfg"][aq_name] = {
151131
"num_bits": 8,
152132
"axis": None,
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
from pathlib import Path
17+
from typing import NamedTuple
18+
19+
import pytest
20+
from _test_utils.examples.models import FLUX_SCHNELL_PATH, SDXL_1_0_PATH
21+
from _test_utils.examples.run_command import run_example_command
22+
from _test_utils.torch.misc import minimum_sm
23+
24+
25+
class DiffuserHfExportModel(NamedTuple):
26+
name: str
27+
path: str
28+
dtype: str
29+
format_type: str
30+
quant_algo: str
31+
collect_method: str
32+
model_dtype: str = "Half"
33+
34+
def quantize_and_export_hf(self, tmp_path: Path) -> Path:
35+
hf_ckpt_dir = tmp_path / f"{self.name}_{self.format_type}_hf_ckpt"
36+
cmd_args = [
37+
"python",
38+
"quantize.py",
39+
"--model",
40+
self.name,
41+
"--override-model-path",
42+
self.path,
43+
"--calib-size",
44+
"8",
45+
"--batch-size",
46+
"2",
47+
"--n-steps",
48+
"20",
49+
"--percentile",
50+
"1.0",
51+
"--alpha",
52+
"0.8",
53+
"--format",
54+
self.format_type,
55+
"--quant-algo",
56+
self.quant_algo,
57+
"--collect-method",
58+
self.collect_method,
59+
"--model-dtype",
60+
self.model_dtype,
61+
"--trt-high-precision-dtype",
62+
self.dtype,
63+
"--hf-ckpt-dir",
64+
str(hf_ckpt_dir),
65+
]
66+
run_example_command(cmd_args, "diffusers/quantization")
67+
return hf_ckpt_dir
68+
69+
70+
@pytest.mark.parametrize(
71+
"model",
72+
[
73+
DiffuserHfExportModel(
74+
name="sdxl-1.0",
75+
path=SDXL_1_0_PATH,
76+
dtype="Half",
77+
format_type="int8",
78+
quant_algo="smoothquant",
79+
collect_method="min-mean",
80+
),
81+
DiffuserHfExportModel(
82+
name="flux-schnell",
83+
path=FLUX_SCHNELL_PATH,
84+
dtype="BFloat16",
85+
format_type="int8",
86+
quant_algo="smoothquant",
87+
collect_method="min-mean",
88+
model_dtype="BFloat16",
89+
),
90+
pytest.param(
91+
DiffuserHfExportModel(
92+
name="sdxl-1.0",
93+
path=SDXL_1_0_PATH,
94+
dtype="Half",
95+
format_type="fp8",
96+
quant_algo="max",
97+
collect_method="default",
98+
),
99+
marks=minimum_sm(89),
100+
),
101+
pytest.param(
102+
DiffuserHfExportModel(
103+
name="flux-schnell",
104+
path=FLUX_SCHNELL_PATH,
105+
dtype="BFloat16",
106+
format_type="fp4",
107+
quant_algo="max",
108+
collect_method="default",
109+
model_dtype="BFloat16",
110+
),
111+
marks=minimum_sm(89),
112+
),
113+
],
114+
ids=[
115+
"sdxl_1.0_int8_smoothquant_min_mean",
116+
"flux_schnell_int8_smoothquant_min_mean",
117+
"sdxl_1.0_fp8_max_default",
118+
"flux_schnell_fp4_max_default",
119+
],
120+
)
121+
def test_diffusers_hf_ckpt_export(model: DiffuserHfExportModel, tmp_path: Path) -> None:
122+
hf_ckpt_dir = model.quantize_and_export_hf(tmp_path)
123+
124+
assert hf_ckpt_dir.exists(), f"HF checkpoint directory was not created: {hf_ckpt_dir}"
125+
126+
config_files = list(hf_ckpt_dir.rglob("config.json"))
127+
assert len(config_files) > 0, f"No config.json found in {hf_ckpt_dir}"
128+
129+
weight_files = list(hf_ckpt_dir.rglob("*.safetensors")) + list(hf_ckpt_dir.rglob("*.bin"))
130+
assert len(weight_files) > 0, f"No weight files (.safetensors or .bin) found in {hf_ckpt_dir}"
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
import json
17+
18+
import pytest
19+
from _test_utils.torch.diffusers_models import get_tiny_dit, get_tiny_flux, get_tiny_unet
20+
21+
pytest.importorskip("diffusers")
22+
23+
import modelopt.torch.quantization as mtq
24+
from modelopt.torch.export.diffusers_utils import generate_diffusion_dummy_inputs
25+
from modelopt.torch.export.unified_export_hf import export_hf_checkpoint
26+
27+
28+
def _load_config(config_path):
29+
with open(config_path) as file:
30+
return json.load(file)
31+
32+
33+
@pytest.mark.parametrize("model_factory", [get_tiny_unet, get_tiny_dit, get_tiny_flux])
34+
@pytest.mark.parametrize(
35+
("config_id", "quant_cfg"),
36+
[
37+
("int8", mtq.INT8_DEFAULT_CFG),
38+
("int8_smoothquant", mtq.INT8_SMOOTHQUANT_CFG),
39+
("fp8", mtq.FP8_DEFAULT_CFG),
40+
],
41+
)
42+
def test_export_diffusers_real_quantized(tmp_path, model_factory, config_id, quant_cfg):
43+
model = model_factory()
44+
export_dir = tmp_path / f"export_{type(model).__name__}_{config_id}_real_quant"
45+
46+
def _calib_fn(m):
47+
param = next(m.parameters())
48+
dummy_inputs = generate_diffusion_dummy_inputs(m, param.device, param.dtype)
49+
assert dummy_inputs is not None
50+
m(**dummy_inputs)
51+
52+
mtq.quantize(model, quant_cfg, forward_loop=_calib_fn)
53+
54+
export_hf_checkpoint(model, export_dir=export_dir)
55+
56+
config_path = export_dir / "config.json"
57+
assert config_path.exists()
58+
59+
config_data = _load_config(config_path)
60+
assert "quantization_config" in config_data
61+
62+
63+
def test_export_diffusers_real_quantized_fp4(tmp_path):
64+
"""FP4 export test using get_tiny_dit (the only tiny model with FP4-compatible weight shapes)."""
65+
model = get_tiny_dit()
66+
export_dir = tmp_path / "export_DiTTransformer2DModel_fp4_real_quant"
67+
68+
def _calib_fn(m):
69+
param = next(m.parameters())
70+
dummy_inputs = generate_diffusion_dummy_inputs(m, param.device, param.dtype)
71+
assert dummy_inputs is not None
72+
m(**dummy_inputs)
73+
74+
mtq.quantize(model, mtq.NVFP4_DEFAULT_CFG, forward_loop=_calib_fn)
75+
76+
export_hf_checkpoint(model, export_dir=export_dir)
77+
78+
config_path = export_dir / "config.json"
79+
assert config_path.exists()
80+
81+
config_data = _load_config(config_path)
82+
assert "quantization_config" in config_data

tests/unit/torch/export/test_export_diffusers.py

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,7 @@
2020

2121
pytest.importorskip("diffusers")
2222

23-
import modelopt.torch.quantization as mtq
2423
from modelopt.torch.export.convert_hf_config import convert_hf_quant_config_format
25-
from modelopt.torch.export.diffusers_utils import generate_diffusion_dummy_inputs
2624
from modelopt.torch.export.unified_export_hf import export_hf_checkpoint
2725

2826

@@ -84,25 +82,3 @@ def _process_stub(*_args, **_kwargs):
8482
config_data = _load_config(config_path)
8583
assert "quantization_config" in config_data
8684
assert config_data["quantization_config"] == convert_hf_quant_config_format(dummy_quant_config)
87-
88-
89-
@pytest.mark.parametrize("model_factory", [get_tiny_unet, get_tiny_dit, get_tiny_flux])
90-
def test_export_diffusers_real_quantized(tmp_path, model_factory):
91-
model = model_factory()
92-
export_dir = tmp_path / f"export_{type(model).__name__}_real_quant"
93-
94-
def _calib_fn(m):
95-
param = next(m.parameters())
96-
dummy_inputs = generate_diffusion_dummy_inputs(m, param.device, param.dtype)
97-
assert dummy_inputs is not None
98-
m(**dummy_inputs)
99-
100-
mtq.quantize(model, mtq.FP8_DEFAULT_CFG, forward_loop=_calib_fn)
101-
102-
export_hf_checkpoint(model, export_dir=export_dir)
103-
104-
config_path = export_dir / "config.json"
105-
assert config_path.exists()
106-
107-
config_data = _load_config(config_path)
108-
assert "quantization_config" in config_data

0 commit comments

Comments
 (0)