Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions modelopt/torch/quantization/qtensor/nvfp4_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,32 @@ def get_activation_scaling_factor(cls, quantizer):

activation_scaling_factor = amax.float() / (quantizer.maxbound * 448.0)

# Handle exact-zero entries produced by MoE routing sparsity: some
# per-channel input slots on rarely-routed experts never see traffic
# during calibration, leaving their amax (and therefore scaling factor)
# at exactly zero. A zero scaling factor would break downstream
# dequantization arithmetic. Replace exact zeros with the minimum
# positive value in the same tensor — this is a no-op for values
# flowing through zeroed channels (~0 anyway) and keeps the tensor
# valid. We deliberately leave negative entries alone so that the
# existing positivity assertion below still catches upstream
# quantizer/config bugs rather than silently masking them.
zero_mask = activation_scaling_factor == 0
if zero_mask.any():
positive = activation_scaling_factor[activation_scaling_factor > 0]
replacement = (
positive.min()
if positive.numel() > 0
else torch.tensor(
1e-8,
device=activation_scaling_factor.device,
dtype=activation_scaling_factor.dtype,
)
)
activation_scaling_factor = torch.where(
zero_mask, replacement, activation_scaling_factor
)

assert torch.all(activation_scaling_factor > 0), (
f" activation scaling factor {activation_scaling_factor} not positive."
)
Expand Down