NVIDIA · jenchen13 · Apr 3, 2026 · Mar 10, 2026 · Mar 18, 2026 · Mar 20, 2026
@@ -67,12 +67,11 @@ def weight_only_quantize(model: nn.Module):
     for module in name_to_module.values():
         if module in seen_modules:
             continue
-        for weight_name in weight_attr_names(module):
+
+        if isinstance(module, QuantModule):
             with enable_weight_access_and_writeback(module, model, name_to_module):
-                weight_quantizer = getattr(
-                    module, quantizer_attr_names(weight_name).weight_quantizer
-                )
-                weight_quantizer(getattr(module, weight_name))
+                for weight, weight_quantizer in module.iter_weights_for_calibration():
+                    weight_quantizer(weight)
         seen_modules.add(module)
 
 

@@ -119,6 +119,14 @@ def modelopt_post_restore(self, prefix: str = ""):
             if isinstance(module, TensorQuantizer):
                 module.to(non_tq_param_or_buffer.device)
 
+    def iter_weights_for_calibration(self):
+        """Yield ``(weight, weight_quantizer)`` pairs for weight-only calibration."""
+        from modelopt.torch.quantization.utils import quantizer_attr_names, weight_attr_names
+
+        for weight_name in weight_attr_names(self):
+            weight_quantizer = getattr(self, quantizer_attr_names(weight_name).weight_quantizer)
+            yield getattr(self, weight_name), weight_quantizer
+
     def fold_weight(self, keep_attrs: bool = False):
         """Fold the weight for faster eval."""
         # Handle all attributes that end with _weight_quantizer

@@ -151,6 +151,13 @@ def modelopt_post_restore(self, prefix: str = ""):
         # Remove self.weight after post_restore.
         delattr(self, "weight")
 
+    def iter_weights_for_calibration(self):
+        """Yield ``(weight_i, weight_quantizer)`` for each of the ``num_gemms`` grouped weights."""
+        for i in range(self.num_gemms):
+            weight_i = getattr(self, f"weight{i}", None)
+            if weight_i is not None:
+                yield weight_i, self.weight_quantizer
+
     @staticmethod
     def te_grouped_quantized_linear_fn(package, func_name, self, *args):
         _assert_te_fp8_enabled()

@@ -213,7 +213,7 @@ def weight_attr_names(module: nn.Module) -> "Generator[str, None, None]":
     # the standard weight and quantizer case
     weight = getattr(module, "weight", None)
     weight_quantizer = getattr(module, "weight_quantizer", None)
-    if isinstance(weight_quantizer, (TensorQuantizer, SequentialQuantizer)):
+    if weight is not None and isinstance(weight_quantizer, (TensorQuantizer, SequentialQuantizer)):
         yield "weight"
 
     # other weight and quantizer case