Skip to content

Commit 6f63f44

Browse files
realAsmaclaude
andcommitted
Rename layerwise config fields and enable layerwise on experts-only recipe
- use_layerwise -> layerwise, checkpoint_dir -> layerwise_checkpoint_dir - Enable layerwise calibration + checkpointing on nvfp4_experts_only-fp8_kv recipe - Add layerwise_checkpoint_dir to nvfp4_default-none_kv_gptq recipe Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Signed-off-by: realAsma <akuriparambi@nvidia.com>
1 parent d2cd03c commit 6f63f44

11 files changed

Lines changed: 36 additions & 28 deletions

File tree

examples/llm_ptq/example_utils.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -858,21 +858,21 @@ def copy_custom_model_files(source_path: str, export_path: str, trust_remote_cod
858858

859859

860860
def needs_checkpoint_path_update(quant_cfg: dict) -> bool:
861-
"""Check if quant_cfg has a checkpoint_dir that should be auto-resolved to a unique subpath."""
861+
"""Check if quant_cfg has a layerwise_checkpoint_dir that should be auto-resolved to a unique subpath."""
862862
algorithm = quant_cfg.get("algorithm")
863863
if algorithm is None or isinstance(algorithm, str):
864864
return False
865-
return algorithm.get("checkpoint_dir") is not None
865+
return algorithm.get("layerwise_checkpoint_dir") is not None
866866

867867

868868
def resolve_checkpoint_dir(quant_cfg: dict, model_path: str) -> dict:
869-
"""Append a unique ``<model_name>_<config_hash>`` subdirectory to checkpoint_dir.
869+
"""Append a unique ``<model_name>_<config_hash>`` subdirectory to layerwise_checkpoint_dir.
870870
871871
Allows a single recipe to be reused across models without checkpoint collisions.
872872
Must only be called when :func:`needs_checkpoint_path_update` returns True.
873873
"""
874874
algorithm = quant_cfg["algorithm"]
875-
base_dir = algorithm["checkpoint_dir"]
875+
base_dir = algorithm["layerwise_checkpoint_dir"]
876876

877877
name = model_path.rstrip("/")
878878
if "/" in name and not os.path.isabs(name):
@@ -885,5 +885,7 @@ def resolve_checkpoint_dir(quant_cfg: dict, model_path: str) -> dict:
885885
).hexdigest()[:8]
886886

887887
quant_cfg = copy.deepcopy(quant_cfg)
888-
quant_cfg["algorithm"]["checkpoint_dir"] = os.path.join(base_dir, f"{name}_{config_hash}")
888+
quant_cfg["algorithm"]["layerwise_checkpoint_dir"] = os.path.join(
889+
base_dir, f"{name}_{config_hash}"
890+
)
889891
return quant_cfg

examples/llm_ptq/hf_ptq.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1109,7 +1109,9 @@ def quantize_main(
11091109

11101110
if needs_checkpoint_path_update(quant_cfg):
11111111
quant_cfg = resolve_checkpoint_dir(quant_cfg, args.pyt_ckpt_path)
1112-
print(f"Auto-resolved checkpoint_dir: {quant_cfg['algorithm']['checkpoint_dir']}")
1112+
print(
1113+
f"Auto-resolved layerwise_checkpoint_dir: {quant_cfg['algorithm']['layerwise_checkpoint_dir']}"
1114+
)
11131115

11141116
if args.qformat in QUANT_CFG_CHOICES:
11151117
mono_quantize(

modelopt/torch/quantization/config.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1217,7 +1217,7 @@ class QuantizeAlgorithmConfig(ModeloptBaseConfig):
12171217
),
12181218
)
12191219

1220-
use_layerwise: bool = ModeloptField(
1220+
layerwise: bool = ModeloptField(
12211221
default=False,
12221222
title="Enable layerwise (layer-by-layer) calibration.",
12231223
description=(
@@ -1227,11 +1227,11 @@ class QuantizeAlgorithmConfig(ModeloptBaseConfig):
12271227
),
12281228
)
12291229

1230-
checkpoint_dir: str | None = ModeloptField(
1230+
layerwise_checkpoint_dir: str | None = ModeloptField(
12311231
default=None,
12321232
title="Checkpoint directory for layerwise calibration.",
12331233
description=(
1234-
"If set together with use_layerwise=True, per-layer checkpoints are saved to this "
1234+
"If set together with layerwise=True, per-layer checkpoints are saved to this "
12351235
"directory during calibration. On restart, calibration resumes from the last "
12361236
"completed layer."
12371237
),

modelopt/torch/quantization/mode.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,8 +222,8 @@ def wrapped_calib_func(
222222
"""
223223
kwargs = config.model_dump()
224224
method = kwargs.pop("method")
225-
layerwise = kwargs.pop("use_layerwise", False)
226-
checkpoint_dir = kwargs.pop("checkpoint_dir", None)
225+
layerwise = kwargs.pop("layerwise", False)
226+
checkpoint_dir = kwargs.pop("layerwise_checkpoint_dir", None)
227227
if method is not None and "awq" in method:
228228
# For backward compatibility
229229
kwargs["algorithm"] = method

modelopt/torch/quantization/model_calib.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1646,12 +1646,12 @@ def gptq(
16461646
):
16471647
"""GPTQ quantization.
16481648
1649-
Works in two modes depending on ``use_layerwise`` in the config:
1649+
Works in two modes depending on ``layerwise`` in the config:
16501650
1651-
* **Layerwise** (``use_layerwise=True``): ``layerwise_calibrate`` calls this
1651+
* **Layerwise** (``layerwise=True``): ``layerwise_calibrate`` calls this
16521652
function once per decoder layer with updated activations, producing more
16531653
accurate Hessian estimates.
1654-
* **Non-layerwise** (``use_layerwise=False``): called once on the full model.
1654+
* **Non-layerwise** (``layerwise=False``): called once on the full model.
16551655
All layers are quantized in parallel from the original activations.
16561656
16571657
Per-module steps:

modelopt_recipes/general/ptq/nvfp4_default-fp8_kv.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
metadata:
1717
recipe_type: ptq
18-
description: NVFP4 MLP/MoE weight only (W4A16), FP8 KV cache, max calibration.
18+
description: NVFP4 W4A4, FP8 KV cache, max calibration.
1919
quantize:
2020
algorithm: max
2121
quant_cfg:

modelopt_recipes/general/ptq/nvfp4_default-none_kv_gptq.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,12 @@
1515

1616
metadata:
1717
recipe_type: ptq
18-
description: NVFP4 weight and activation (W4A4), gptq sequential calibration.
18+
description: NVFP4 weight and activation (W4A4), gptq layerwise calibration.
1919
quantize:
2020
algorithm:
2121
method: gptq
22-
use_sequential: true
22+
layerwise: true
23+
layerwise_checkpoint_dir: output/layerwise_ckpts/
2324
quant_cfg:
2425
- quantizer_name: '*'
2526
enable: false

modelopt_recipes/general/ptq/nvfp4_experts_only-fp8_kv.yml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,12 @@
1515

1616
metadata:
1717
recipe_type: ptq
18-
description: NVFP4 static weight and dynamic activation for expert layers only (W4A4), FP8 KV cache, max calibration.
18+
description: NVFP4 static weight and dynamic activation for expert layers only (W4A4), FP8 KV cache, max layerwise calibration.
1919
quantize:
20-
algorithm: max
20+
algorithm:
21+
method: max
22+
layerwise: true
23+
layerwise_checkpoint_dir: output/layerwise_ckpts/
2124
quant_cfg:
2225
- quantizer_name: '*'
2326
enable: false

tests/gpu/torch/quantization/plugins/test_accelerate_gpu.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -100,20 +100,20 @@ def _make_cpu_offloaded_model(tmp_path, num_hidden_layers=3):
100100

101101

102102
def _make_layerwise_cfg(base_cfg):
103-
"""Add use_layerwise=True to a quant config's algorithm field."""
103+
"""Add layerwise=True to a quant config's algorithm field."""
104104
cfg = copy.deepcopy(base_cfg)
105105
algo = cfg.get("algorithm", "max")
106106
if isinstance(algo, str):
107-
cfg["algorithm"] = {"method": algo, "use_layerwise": True}
107+
cfg["algorithm"] = {"method": algo, "layerwise": True}
108108
else:
109-
algo["use_layerwise"] = True
109+
algo["layerwise"] = True
110110
return cfg
111111

112112

113113
def _make_layerwise_checkpoint_cfg(base_cfg, checkpoint_dir):
114-
"""Add use_layerwise=True and checkpoint_dir to a quant config's algorithm field."""
114+
"""Add layerwise=True and layerwise_checkpoint_dir to a quant config's algorithm field."""
115115
cfg = _make_layerwise_cfg(base_cfg)
116-
cfg["algorithm"]["checkpoint_dir"] = checkpoint_dir
116+
cfg["algorithm"]["layerwise_checkpoint_dir"] = checkpoint_dir
117117
return cfg
118118

119119

@@ -274,14 +274,14 @@ def _make_multi_offload_model():
274274
def _make_gptq_sequential_cfg(base_cfg):
275275
"""Create a sequential GPTQ config from a base quantization config."""
276276
cfg = copy.deepcopy(base_cfg)
277-
cfg["algorithm"] = {"method": "gptq", "use_layerwise": True}
277+
cfg["algorithm"] = {"method": "gptq", "layerwise": True}
278278
return cfg
279279

280280

281281
def _make_gptq_sequential_checkpoint_cfg(base_cfg, checkpoint_dir):
282282
"""Create a sequential GPTQ config with checkpoint dir."""
283283
cfg = _make_gptq_sequential_cfg(base_cfg)
284-
cfg["algorithm"]["checkpoint_dir"] = checkpoint_dir
284+
cfg["algorithm"]["layerwise_checkpoint_dir"] = checkpoint_dir
285285
return cfg
286286

287287

tests/gpu/torch/quantization/test_fsdp2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ def _test_layerwise_calibrate_fsdp2(rank, size):
184184
# Reference: non-FSDP layerwise calibration
185185
ref_model = copy.deepcopy(model)
186186
seq_cfg = copy.deepcopy(mtq.INT8_DEFAULT_CFG)
187-
seq_cfg["algorithm"] = {"method": "max", "use_layerwise": True}
187+
seq_cfg["algorithm"] = {"method": "max", "layerwise": True}
188188
mtq.quantize(ref_model, seq_cfg, lambda m: m(inputs))
189189
output_ref = ref_model(inputs)
190190

0 commit comments

Comments
 (0)