huggingface · jiqing-feng · Apr 22, 2026 · May 13, 2026 · May 14, 2026 · May 14, 2026
diff --git a/src/diffusers/quantizers/base.py b/src/diffusers/quantizers/base.py
@@ -206,6 +206,7 @@ def dequantize(self, model):
 
         # Delete quantizer and quantization config
         del model.hf_quantizer
+        model.is_quantized = False
 
         return model
 

diff --git a/src/diffusers/quantizers/torchao/torchao_quantizer.py b/src/diffusers/quantizers/torchao/torchao_quantizer.py
@@ -376,3 +376,19 @@ def is_trainable(self):
     @property
     def is_compileable(self) -> bool:
         return True
+
+    def _dequantize(self, model):
+        from torchao.utils import TorchAOBaseTensor
+
+        for name, module in model.named_modules():
+            if isinstance(module, nn.Linear) and isinstance(module.weight, TorchAOBaseTensor):
+                if not hasattr(module.weight, "dequantize"):
+                    continue
+                device = module.weight.device
+                dequantized_weight = module.weight.dequantize().to(device)
+                module.weight = nn.Parameter(dequantized_weight)
+                # Reset extra_repr if it was overridden
+                if hasattr(module.extra_repr, "__func__") and module.extra_repr.__func__ is not nn.Linear.extra_repr:
+                    module.extra_repr = types.MethodType(nn.Linear.extra_repr, module)
+
+        return model
diff --git a/tests/models/testing_utils/quantization.py b/tests/models/testing_utils/quantization.py
@@ -820,7 +820,12 @@ def _create_quantized_model(self, config_name, **extra_kwargs):
         return self.model_class.from_pretrained(self.pretrained_model_name_or_path, **kwargs)
 
     def _verify_if_layer_quantized(self, name, module, config_kwargs):
+        from torchao.utils import TorchAOBaseTensor
+
         assert isinstance(module, torch.nn.Linear), f"Layer {name} is not Linear, got {type(module)}"
+        assert isinstance(module.weight, TorchAOBaseTensor), (
+            f"Layer {name} weight is {type(module.weight)}, expected TorchAOBaseTensor"
+        )
 
 
 # int4wo requires CUDA or XPU ops (_convert_weight_to_int4pack)

diff --git a/tests/models/transformers/test_models_transformer_flux.py b/tests/models/transformers/test_models_transformer_flux.py
@@ -367,6 +367,10 @@ def pretrained_model_kwargs(self):
 class TestFluxTransformerTorchAo(FluxTransformerTesterConfig, TorchAoTesterMixin):
     """TorchAO quantization tests for Flux Transformer."""
 
+    @property
+    def torch_dtype(self):
+        return torch.bfloat16
+
 
 class TestFluxTransformerGGUF(FluxTransformerTesterConfig, GGUFTesterMixin):
     @property
@@ -403,6 +407,10 @@ class TestFluxTransformerQuantoCompile(FluxTransformerTesterConfig, QuantoCompil
 class TestFluxTransformerTorchAoCompile(FluxTransformerTesterConfig, TorchAoCompileTesterMixin):
     """TorchAO + compile tests for Flux Transformer."""
 
+    @property
+    def torch_dtype(self):
+        return torch.bfloat16
+
 
 class TestFluxTransformerGGUFCompile(FluxTransformerTesterConfig, GGUFCompileTesterMixin):
     @property