fix multiple issues.

sayakpaul · sayakpaul · commit 650424e63353 · 2025-11-10T11:30:18.000+05:30
diff --git a/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py b/tests/modular_pipelines/qwen/test_modular_pipeline_qwenimage.py
@@ -14,11 +14,9 @@
 # limitations under the License.
 
 
-import numpy as np
 import PIL
 import pytest
 
-from diffusers import ClassifierFreeGuidance
 from diffusers.modular_pipelines import (
     QwenImageAutoBlocks,
     QwenImageEditAutoBlocks,
@@ -28,32 +26,10 @@
     QwenImageModularPipeline,
 )
 
-from ...testing_utils import torch_device
-from ..test_modular_pipelines_common import ModularPipelineTesterMixin
+from ..test_modular_pipelines_common import ModularGuiderTesterMixin, ModularPipelineTesterMixin
 
 
-class QwenImageModularGuiderMixin:
-    def test_guider_cfg(self, tol=1e-2):
-        pipe = self.get_pipeline()
-        pipe = pipe.to(torch_device)
-
-        guider = ClassifierFreeGuidance(guidance_scale=1.0)
-        pipe.update_components(guider=guider)
-
-        inputs = self.get_dummy_inputs()
-        out_no_cfg = pipe(**inputs, output="images")
-
-        guider = ClassifierFreeGuidance(guidance_scale=7.5)
-        pipe.update_components(guider=guider)
-        inputs = self.get_dummy_inputs()
-        out_cfg = pipe(**inputs, output="images")
-
-        assert out_cfg.shape == out_no_cfg.shape
-        max_diff = np.abs(out_cfg - out_no_cfg).max()
-        assert max_diff > tol, "Output with CFG must be different from normal inference"
-
-
-class TestQwenImageModularPipelineFast(ModularPipelineTesterMixin, QwenImageModularGuiderMixin):
+class TestQwenImageModularPipelineFast(ModularPipelineTesterMixin, ModularGuiderTesterMixin):
     pipeline_class = QwenImageModularPipeline
     pipeline_blocks_class = QwenImageAutoBlocks
     repo = "hf-internal-testing/tiny-qwenimage-modular"
@@ -76,7 +52,7 @@ def get_dummy_inputs(self):
         return inputs
 
 
-class TestQwenImageEditModularPipelineFast(ModularPipelineTesterMixin, QwenImageModularGuiderMixin):
+class TestQwenImageEditModularPipelineFast(ModularPipelineTesterMixin, ModularGuiderTesterMixin):
     pipeline_class = QwenImageEditModularPipeline
     pipeline_blocks_class = QwenImageEditAutoBlocks
     repo = "hf-internal-testing/tiny-qwenimage-edit-modular"
@@ -102,7 +78,7 @@ def test_guider_cfg(self):
         super().test_guider_cfg(7e-5)
 
 
-class TestQwenImageEditPlusModularPipelineFast(ModularPipelineTesterMixin, QwenImageModularGuiderMixin):
+class TestQwenImageEditPlusModularPipelineFast(ModularPipelineTesterMixin, ModularGuiderTesterMixin):
     pipeline_class = QwenImageEditPlusModularPipeline
     pipeline_blocks_class = QwenImageEditPlusAutoBlocks
     repo = "hf-internal-testing/tiny-qwenimage-edit-plus-modular"
diff --git a/tests/modular_pipelines/stable_diffusion_xl/test_modular_pipeline_stable_diffusion_xl.py b/tests/modular_pipelines/stable_diffusion_xl/test_modular_pipeline_stable_diffusion_xl.py
@@ -25,7 +25,7 @@
 
 from ...models.unets.test_models_unet_2d_condition import create_ip_adapter_state_dict
 from ...testing_utils import enable_full_determinism, floats_tensor, torch_device
-from ..test_modular_pipelines_common import ModularPipelineTesterMixin
+from ..test_modular_pipelines_common import ModularGuiderTesterMixin, ModularPipelineTesterMixin
 
 
 enable_full_determinism()
@@ -37,13 +37,11 @@ class SDXLModularTesterMixin:
     """
 
     def _test_stable_diffusion_xl_euler(self, expected_image_shape, expected_slice, expected_max_diff=1e-2):
-        sd_pipe = self.get_pipeline()
-        sd_pipe = sd_pipe.to(torch_device)
-        sd_pipe.set_progress_bar_config(disable=None)
+        sd_pipe = self.get_pipeline().to(torch_device)
 
         inputs = self.get_dummy_inputs()
         image = sd_pipe(**inputs, output="images")
-        image_slice = image[0, -3:, -3:, -1]
+        image_slice = image[0, -3:, -3:, -1].cpu()
 
         assert image.shape == expected_image_shape
         max_diff = torch.abs(image_slice.flatten() - expected_slice).max()
@@ -110,7 +108,7 @@ def test_ip_adapter(self, expected_max_diff: float = 1e-4, expected_pipe_slice=N
         pipe = blocks.init_pipeline(self.repo)
         pipe.load_components(torch_dtype=torch.float32)
         pipe = pipe.to(torch_device)
-        pipe.set_progress_bar_config(disable=None)
+
         cross_attention_dim = pipe.unet.config.get("cross_attention_dim")
 
         # forward pass without ip adapter
@@ -219,9 +217,7 @@ def test_controlnet(self, expected_max_diff: float = 1e-4, expected_pipe_slice=N
         # compare against static slices and that can be shaky (with a VVVV low probability).
         expected_max_diff = 9e-4 if torch_device == "cpu" else expected_max_diff
 
-        pipe = self.get_pipeline()
-        pipe = pipe.to(torch_device)
-        pipe.set_progress_bar_config(disable=None)
+        pipe = self.get_pipeline().to(torch_device)
 
         # forward pass without controlnet
         inputs = self.get_dummy_inputs()
@@ -251,9 +247,7 @@ def test_controlnet(self, expected_max_diff: float = 1e-4, expected_pipe_slice=N
         assert max_diff_with_controlnet_scale > 1e-2, "Output with controlnet must be different from normal inference"
 
     def test_controlnet_cfg(self):
-        pipe = self.get_pipeline()
-        pipe = pipe.to(torch_device)
-        pipe.set_progress_bar_config(disable=None)
+        pipe = self.get_pipeline().to(torch_device)
 
         # forward pass with CFG not applied
         guider = ClassifierFreeGuidance(guidance_scale=1.0)
@@ -273,35 +267,11 @@ def test_controlnet_cfg(self):
         assert max_diff > 1e-2, "Output with CFG must be different from normal inference"
 
 
-class SDXLModularGuiderTesterMixin:
-    def test_guider_cfg(self):
-        pipe = self.get_pipeline()
-        pipe = pipe.to(torch_device)
-        pipe.set_progress_bar_config(disable=None)
-
-        # forward pass with CFG not applied
-        guider = ClassifierFreeGuidance(guidance_scale=1.0)
-        pipe.update_components(guider=guider)
-
-        inputs = self.get_dummy_inputs()
-        out_no_cfg = pipe(**inputs, output="images")
-
-        # forward pass with CFG applied
-        guider = ClassifierFreeGuidance(guidance_scale=7.5)
-        pipe.update_components(guider=guider)
-        inputs = self.get_dummy_inputs()
-        out_cfg = pipe(**inputs, output="images")
-
-        assert out_cfg.shape == out_no_cfg.shape
-        max_diff = np.abs(out_cfg - out_no_cfg).max()
-        assert max_diff > 1e-2, "Output with CFG must be different from normal inference"
-
-
 class TestSDXLModularPipelineFast(
     SDXLModularTesterMixin,
     SDXLModularIPAdapterTesterMixin,
     SDXLModularControlNetTesterMixin,
-    SDXLModularGuiderTesterMixin,
+    ModularGuiderTesterMixin,
     ModularPipelineTesterMixin,
 ):
     """Test cases for Stable Diffusion XL modular pipeline fast tests."""
@@ -335,18 +305,7 @@ def test_stable_diffusion_xl_euler(self):
         self._test_stable_diffusion_xl_euler(
             expected_image_shape=self.expected_image_output_shape,
             expected_slice=torch.tensor(
-                [
-                    0.5966781,
-                    0.62939394,
-                    0.48465094,
-                    0.51573336,
-                    0.57593524,
-                    0.47035995,
-                    0.53410417,
-                    0.51436996,
-                    0.47313565,
-                ],
-                device=torch_device,
+                [0.3886, 0.4685, 0.4953, 0.4217, 0.4317, 0.3945, 0.4847, 0.4704, 0.4731],
             ),
             expected_max_diff=1e-2,
         )
@@ -359,7 +318,7 @@ class TestSDXLImg2ImgModularPipelineFast(
     SDXLModularTesterMixin,
     SDXLModularIPAdapterTesterMixin,
     SDXLModularControlNetTesterMixin,
-    SDXLModularGuiderTesterMixin,
+    ModularGuiderTesterMixin,
     ModularPipelineTesterMixin,
 ):
     """Test cases for Stable Diffusion XL image-to-image modular pipeline fast tests."""
@@ -400,20 +359,7 @@ def get_dummy_inputs(self, seed=0):
     def test_stable_diffusion_xl_euler(self):
         self._test_stable_diffusion_xl_euler(
             expected_image_shape=self.expected_image_output_shape,
-            expected_slice=torch.tensor(
-                [
-                    0.56943184,
-                    0.4702148,
-                    0.48048905,
-                    0.6235963,
-                    0.551138,
-                    0.49629188,
-                    0.60031277,
-                    0.5688907,
-                    0.43996853,
-                ],
-                device=torch_device,
-            ),
+            expected_slice=torch.tensor([0.5246, 0.4466, 0.444, 0.3246, 0.4443, 0.5108, 0.5225, 0.559, 0.5147]),
             expected_max_diff=1e-2,
         )
 
@@ -425,7 +371,7 @@ class SDXLInpaintingModularPipelineFastTests(
     SDXLModularTesterMixin,
     SDXLModularIPAdapterTesterMixin,
     SDXLModularControlNetTesterMixin,
-    SDXLModularGuiderTesterMixin,
+    ModularGuiderTesterMixin,
     ModularPipelineTesterMixin,
 ):
     """Test cases for Stable Diffusion XL inpainting modular pipeline fast tests."""
diff --git a/tests/modular_pipelines/test_modular_pipelines_common.py b/tests/modular_pipelines/test_modular_pipelines_common.py
@@ -7,6 +7,7 @@
 
 import diffusers
 from diffusers import ComponentsManager, ModularPipeline, ModularPipelineBlocks
+from diffusers.guiders import ClassifierFreeGuidance
 from diffusers.utils import logging
 
 from ..testing_utils import backend_empty_cache, numpy_cosine_similarity_distance, torch_device
@@ -104,6 +105,7 @@ def teardown_method(self):
     def get_pipeline(self, components_manager=None, torch_dtype=torch.float32):
         pipeline = self.pipeline_blocks_class().init_pipeline(self.repo, components_manager=components_manager)
         pipeline.load_components(torch_dtype=torch_dtype)
+        pipeline.set_progress_bar_config(disable=None)
         return pipeline
 
     def test_pipeline_call_signature(self):
@@ -121,9 +123,7 @@ def _check_for_parameters(parameters, expected_parameters, param_type):
         _check_for_parameters(self.optional_params, optional_parameters, "optional")
 
     def test_inference_batch_consistent(self, batch_sizes=[2], batch_generator=True):
-        pipe = self.get_pipeline()
-        pipe.to(torch_device)
-        pipe.set_progress_bar_config(disable=None)
+        pipe = self.get_pipeline().to(torch_device)
 
         inputs = self.get_dummy_inputs()
         inputs["generator"] = self.get_generator(0)
@@ -162,9 +162,8 @@ def test_inference_batch_single_identical(
         batch_size=2,
         expected_max_diff=1e-4,
     ):
-        pipe = self.get_pipeline()
-        pipe.to(torch_device)
-        pipe.set_progress_bar_config(disable=None)
+        pipe = self.get_pipeline().to(torch_device)
+
         inputs = self.get_dummy_inputs()
 
         # Reset generator in case it is has been used in self.get_dummy_inputs
@@ -202,7 +201,6 @@ def test_inference_batch_single_identical(
     def test_float16_inference(self, expected_max_diff=5e-2):
         pipe = self.get_pipeline()
         pipe.to(torch_device, torch.float32)
-        pipe.set_progress_bar_config(disable=None)
 
         pipe_fp16 = self.get_pipeline()
         pipe_fp16.to(torch_device, torch.float16)
@@ -229,10 +227,8 @@ def test_float16_inference(self, expected_max_diff=5e-2):
 
     @pytest.mark.skipif(torch_device == "cpu", reason="Test needs an accelerator.")
     def test_to_device(self):
-        pipe = self.get_pipeline()
-        pipe.set_progress_bar_config(disable=None)
+        pipe = self.get_pipeline().to("cpu")
 
-        pipe.to("cpu")
         model_devices = [
             component.device.type for component in pipe.components.values() if hasattr(component, "device")
         ]
@@ -247,30 +243,23 @@ def test_to_device(self):
         )
 
     def test_inference_is_not_nan_cpu(self):
-        pipe = self.get_pipeline()
-        pipe.set_progress_bar_config(disable=None)
-        pipe.to("cpu")
+        pipe = self.get_pipeline().to("cpu")
 
         output = pipe(**self.get_dummy_inputs(), output="images")
         assert torch.isnan(output).sum() == 0, "CPU Inference returns NaN"
 
     @pytest.mark.skipif(torch_device == "cpu", reason="Test needs an accelerator.")
     def test_inference_is_not_nan(self):
-        pipe = self.get_pipeline()
-        pipe.set_progress_bar_config(disable=None)
-        pipe.to(torch_device)
+        pipe = self.get_pipeline().to(torch_device)
 
         output = pipe(**self.get_dummy_inputs(), output="images")
         assert torch.isnan(output).sum() == 0, "Accelerator Inference returns NaN"
 
     def test_num_images_per_prompt(self):
-        pipe = self.get_pipeline()
+        pipe = self.get_pipeline().to(torch_device)
 
         if "num_images_per_prompt" not in pipe.blocks.input_names:
-            return
-
-        pipe = pipe.to(torch_device)
-        pipe.set_progress_bar_config(disable=None)
+            pytest.mark.skip("Skipping test as `num_images_per_prompt` is not present in input names.")
 
         batch_sizes = [1, 2]
         num_images_per_prompts = [1, 2]
@@ -325,3 +314,25 @@ def test_save_from_pretrained(self):
             image_slices.append(image[0, -3:, -3:, -1].flatten())
 
         assert torch.abs(image_slices[0] - image_slices[1]).max() < 1e-3
+
+
+class ModularGuiderTesterMixin:
+    def test_guider_cfg(self, expected_max_diff=1e-2):
+        pipe = self.get_pipeline().to(torch_device)
+
+        # forward pass with CFG not applied
+        guider = ClassifierFreeGuidance(guidance_scale=1.0)
+        pipe.update_components(guider=guider)
+
+        inputs = self.get_dummy_inputs()
+        out_no_cfg = pipe(**inputs, output="images")
+
+        # forward pass with CFG applied
+        guider = ClassifierFreeGuidance(guidance_scale=7.5)
+        pipe.update_components(guider=guider)
+        inputs = self.get_dummy_inputs()
+        out_cfg = pipe(**inputs, output="images")
+
+        assert out_cfg.shape == out_no_cfg.shape
+        max_diff = torch.abs(out_cfg - out_no_cfg).max()
+        assert max_diff > expected_max_diff, "Output with CFG must be different from normal inference"