Implement _dequantize for TorchAO quantizer (#13538)

jiqing-feng · sayakpaul · web-flow · commit 907c0c2c76e7 · 2026-05-18T18:39:50.000+05:30
* Implement _dequantize for TorchAO quantizer

- Add _dequantize() method in TorchAoHfQuantizer that dequantizes
  TorchAOBaseTensor weights back to standard nn.Parameter
- Fix _verify_if_layer_quantized to check isinstance(weight, TorchAOBaseTensor)
  so dequantized layers are correctly detected as non-quantized

* enable dequantize for TorchAO tester mixin

Signed-off-by: jiqing-feng &lt;jiqing.feng@intel.com&gt;

* check dequantize

Signed-off-by: jiqing-feng &lt;jiqing.feng@intel.com&gt;

* fix dequantize: clear is_quantized flag and cast dtype after dequantize

* fix

Signed-off-by: jiqing-feng &lt;jiqing.feng@intel.com&gt;

* fix error report

Signed-off-by: jiqing-feng &lt;jiqing.feng@intel.com&gt;

---------

Signed-off-by: jiqing-feng &lt;jiqing.feng@intel.com&gt;
Co-authored-by: Sayak Paul &lt;spsayakpaul@gmail.com&gt;
diff --git a/src/diffusers/quantizers/base.py b/src/diffusers/quantizers/base.py
@@ -206,6 +206,7 @@ def dequantize(self, model):
 
         # Delete quantizer and quantization config
         del model.hf_quantizer
+        model.is_quantized = False
 
         return model
 
diff --git a/src/diffusers/quantizers/torchao/torchao_quantizer.py b/src/diffusers/quantizers/torchao/torchao_quantizer.py
@@ -376,3 +376,22 @@ def is_trainable(self):
     @property
     def is_compileable(self) -> bool:
         return True
+
+    def _dequantize(self, model):
+        from torchao.utils import TorchAOBaseTensor
+
+        for name, module in model.named_modules():
+            if isinstance(module, nn.Linear) and isinstance(module.weight, TorchAOBaseTensor):
+                if not hasattr(module.weight, "dequantize"):
+                    raise NotImplementedError(
+                        f"Dequantization is not supported for {type(module.weight).__name__} "
+                        f"(module: {name}). Please use a quantization type that supports dequantization."
+                    )
+                device = module.weight.device
+                dequantized_weight = module.weight.dequantize().to(device)
+                module.weight = nn.Parameter(dequantized_weight)
+                # Reset extra_repr if it was overridden
+                if hasattr(module.extra_repr, "__func__") and module.extra_repr.__func__ is not nn.Linear.extra_repr:
+                    module.extra_repr = types.MethodType(nn.Linear.extra_repr, module)
+
+        return model
diff --git a/tests/models/testing_utils/quantization.py b/tests/models/testing_utils/quantization.py
@@ -822,7 +822,12 @@ def _create_quantized_model(self, config_name, **extra_kwargs):
         return self.model_class.from_pretrained(self.pretrained_model_name_or_path, **kwargs)
 
     def _verify_if_layer_quantized(self, name, module, config_kwargs):
+        from torchao.utils import TorchAOBaseTensor
+
         assert isinstance(module, torch.nn.Linear), f"Layer {name} is not Linear, got {type(module)}"
+        assert isinstance(module.weight, TorchAOBaseTensor), (
+            f"Layer {name} weight is {type(module.weight)}, expected TorchAOBaseTensor"
+        )
 
 
 # int4wo requires CUDA or XPU ops (_convert_weight_to_int4pack)
diff --git a/tests/models/transformers/test_models_transformer_flux.py b/tests/models/transformers/test_models_transformer_flux.py
@@ -368,6 +368,10 @@ def pretrained_model_kwargs(self):
 class TestFluxTransformerTorchAo(FluxTransformerTesterConfig, TorchAoTesterMixin):
     """TorchAO quantization tests for Flux Transformer."""
 
+    @property
+    def torch_dtype(self):
+        return torch.bfloat16
+
 
 class TestFluxTransformerGGUF(FluxTransformerTesterConfig, GGUFTesterMixin):
     @property
@@ -404,6 +408,10 @@ class TestFluxTransformerQuantoCompile(FluxTransformerTesterConfig, QuantoCompil
 class TestFluxTransformerTorchAoCompile(FluxTransformerTesterConfig, TorchAoCompileTesterMixin):
     """TorchAO + compile tests for Flux Transformer."""
 
+    @property
+    def torch_dtype(self):
+        return torch.bfloat16
+
 
 class TestFluxTransformerGGUFCompile(FluxTransformerTesterConfig, GGUFCompileTesterMixin):
     @property