Implement _dequantize for TorchAO quantizer

jiqing-feng · jiqing-feng · commit c5076b66fba8 · 2026-04-22T09:34:56.000+08:00
- Add _dequantize() method in TorchAoHfQuantizer that dequantizes
  TorchAOBaseTensor weights back to standard nn.Parameter
- Fix _verify_if_layer_quantized to check isinstance(weight, TorchAOBaseTensor)
  so dequantized layers are correctly detected as non-quantized
diff --git a/src/diffusers/quantizers/torchao/torchao_quantizer.py b/src/diffusers/quantizers/torchao/torchao_quantizer.py
@@ -376,3 +376,17 @@ def is_trainable(self):
     @property
     def is_compileable(self) -> bool:
         return True
+
+    def _dequantize(self, model):
+        from torchao.utils import TorchAOBaseTensor
+
+        for name, module in model.named_modules():
+            if isinstance(module, nn.Linear) and isinstance(module.weight, TorchAOBaseTensor):
+                device = module.weight.device
+                dequantized_weight = module.weight.dequantize().to(device)
+                module.weight = nn.Parameter(dequantized_weight)
+                # Reset extra_repr if it was overridden
+                if hasattr(module.extra_repr, "__func__") and module.extra_repr.__func__ is not nn.Linear.extra_repr:
+                    module.extra_repr = types.MethodType(nn.Linear.extra_repr, module)
+
+        return model
diff --git a/tests/models/testing_utils/quantization.py b/tests/models/testing_utils/quantization.py
@@ -829,7 +829,12 @@ def _create_quantized_model(self, config_name, **extra_kwargs):
         return self.model_class.from_pretrained(self.pretrained_model_name_or_path, **kwargs)
 
     def _verify_if_layer_quantized(self, name, module, config_kwargs):
+        from torchao.utils import TorchAOBaseTensor
+
         assert isinstance(module, torch.nn.Linear), f"Layer {name} is not Linear, got {type(module)}"
+        assert isinstance(module.weight, TorchAOBaseTensor), (
+            f"Layer {name} weight is {type(module.weight)}, expected TorchAOBaseTensor"
+        )
 
 
 # int4wo requires CUDA-specific ops (_convert_weight_to_int4pack)