Skip to content

Commit 26f20d4

Browse files
committed
Revert MO_DEBUG_MAX_LAYERS hatches in model_calib + unified_export_hf
The env-var-gated early-break (model_calib.layerwise_calibrate) and export skip (unified_export_hf._process_quantized_modules) were only needed to bound wall-clock during the cliff-fix smoke test. The bug fix itself is purely about not bringing over glm5.1-tmp's clamps in moe_utils.py — which we already don't. Removing the debug hatches keeps the branch a clean superset of main's production behavior.
1 parent fe242d6 commit 26f20d4

2 files changed

Lines changed: 0 additions & 20 deletions

File tree

modelopt/torch/export/unified_export_hf.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717

1818
import collections.abc
1919
import json
20-
import os
2120
import re
2221
import tempfile
2322
import warnings
@@ -663,16 +662,6 @@ def _process_quantized_modules(
663662
# _QuantFusedExperts uses plural `gate_up_proj_weight_quantizers` (ModuleList),
664663
# which get_quantization_format's singular-weight_quantizer check misses. Handle
665664
# it explicitly before the format gate so fused-experts get split + quantized.
666-
# Debug hatch (paired with MO_DEBUG_MAX_LAYERS in model_calib.layerwise_calibrate):
667-
# skip _export_fused_experts for layers whose layerwise calibration was never run.
668-
# Those layers' per-expert quantizers have no _amax — touching them triggers the
669-
# uncalibrated-fallback warnings or, with corrupt storage, a CUDA illegal-memory
670-
# error. With the calibrated layers only, every expert has a valid _amax.
671-
_debug_max = int(os.environ.get("MO_DEBUG_MAX_LAYERS", "0") or "0")
672-
if _debug_max > 0:
673-
_m = re.search(r"\.layers\.(\d+)\.", name or "")
674-
if _m and int(_m.group(1)) >= _debug_max:
675-
continue
676665
with fsdp2_aware_weight_update(model, sub_module, reshard=False):
677666
_export_fused_experts(sub_module, dtype)
678667
elif get_quantization_format(sub_module) != QUANTIZATION_NONE:

modelopt/torch/quantization/model_calib.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
"""Calibration utilities."""
1717

1818
import math
19-
import os
2019
import time
2120
import warnings
2221
from collections.abc import Callable
@@ -1767,15 +1766,7 @@ def layerwise_calibrate(
17671766
start_layer, resumed_inputs, forward_loop
17681767
)
17691768

1770-
_debug_max_layers = int(os.environ.get("MO_DEBUG_MAX_LAYERS", "0") or "0")
1771-
17721769
for layer_idx in range(start_layer, num_layers):
1773-
if _debug_max_layers > 0 and layer_idx >= _debug_max_layers:
1774-
print_rank_0(
1775-
f"MO_DEBUG_MAX_LAYERS={_debug_max_layers}: stopping layerwise "
1776-
f"calibration after layer {layer_idx - 1}/{num_layers}"
1777-
)
1778-
break
17791770
layer = transformer_layers[layer_idx]
17801771

17811772
def _layer_forward_loop(m, _inputs=layer_inputs):

0 commit comments

Comments
 (0)