huggingface
diff --git a/‎pipeline_testing_refactor_plan.md‎
Lines changed: 895 additions & 0 deletions b/‎pipeline_testing_refactor_plan.md‎
Lines changed: 895 additions & 0 deletions
diff --git a/‎tests/pipelines/flux/test_pipeline_flux.py‎
Lines changed: 75 additions & 50 deletions b/‎tests/pipelines/flux/test_pipeline_flux.py‎
Lines changed: 75 additions & 50 deletions
diff --git a/‎tests/pipelines/test_pipeline_push_to_hub.py‎
Lines changed: 177 additions & 0 deletions b/‎tests/pipelines/test_pipeline_push_to_hub.py‎
Lines changed: 177 additions & 0 deletions
@@ -23,44 +23,40 @@
     slow,
     torch_device,
 )
-from ..test_pipelines_common import (
+from ..testing_utils import (
+    BasePipelineTesterConfig,
     FasterCacheTesterMixin,
     FirstBlockCacheTesterMixin,
     FluxIPAdapterTesterMixin,
     MagCacheTesterMixin,
+    MemoryTesterMixin,
     PipelineTesterMixin,
     PyramidAttentionBroadcastTesterMixin,
     TaylorSeerCacheTesterMixin,
     check_qkv_fused_layers_exist,
 )
 
 
-class FluxPipelineFastTests(
-    PipelineTesterMixin,
-    FluxIPAdapterTesterMixin,
-    PyramidAttentionBroadcastTesterMixin,
-    FasterCacheTesterMixin,
-    FirstBlockCacheTesterMixin,
-    TaylorSeerCacheTesterMixin,
-    MagCacheTesterMixin,
-    unittest.TestCase,
-):
-    pipeline_class = FluxPipeline
-    params = frozenset(["prompt", "height", "width", "guidance_scale", "prompt_embeds", "pooled_prompt_embeds"])
-    batch_params = frozenset(["prompt"])
+class FluxPipelineTesterConfig(BasePipelineTesterConfig):
+    @property
+    def pipeline_class(self):
+        return FluxPipeline
 
-    # there is no xformers processor for Flux
-    test_xformers_attention = False
-    test_layerwise_casting = True
-    test_group_offloading = True
+    @property
+    def params(self):
+        return frozenset(["prompt", "height", "width", "guidance_scale", "prompt_embeds", "pooled_prompt_embeds"])
 
-    faster_cache_config = FasterCacheConfig(
-        spatial_attention_block_skip_range=2,
-        spatial_attention_timestep_skip_range=(-1, 901),
-        unconditional_batch_skip_range=2,
-        attention_weight_callback=lambda _: 0.5,
-        is_guidance_distilled=True,
-    )
+    @property
+    def batch_params(self):
+        return frozenset(["prompt"])
+
+    @property
+    def test_layerwise_casting(self):
+        return True
+
+    @property
+    def test_group_offloading(self):
+        return True
 
     def get_dummy_components(self, num_layers: int = 1, num_single_layers: int = 1):
         torch.manual_seed(0)
@@ -146,6 +142,8 @@ def get_dummy_inputs(self, device, seed=0):
         }
         return inputs
 
+
+class TestFluxPipeline(FluxPipelineTesterConfig, PipelineTesterMixin):
     def test_flux_different_prompts(self):
         pipe = self.pipeline_class(**self.get_dummy_components()).to(torch_device)
 
@@ -160,7 +158,7 @@ def test_flux_different_prompts(self):
 
         # Outputs should be different here
         # For some reasons, they don't show large differences
-        self.assertGreater(max_diff, 1e-6, "Outputs should be different for different prompts.")
+        assert max_diff > 1e-6, "Outputs should be different for different prompts."
 
     def test_fused_qkv_projections(self):
         device = "cpu"  # ensure determinism for the device-dependent torch.Generator
@@ -176,9 +174,8 @@ def test_fused_qkv_projections(self):
         # TODO (sayakpaul): will refactor this once `fuse_qkv_projections()` has been added
         # to the pipeline level.
         pipe.transformer.fuse_qkv_projections()
-        self.assertTrue(
-            check_qkv_fused_layers_exist(pipe.transformer, ["to_qkv"]),
-            ("Something wrong with the fused attention layers. Expected all the attention projections to be fused."),
+        assert check_qkv_fused_layers_exist(pipe.transformer, ["to_qkv"]), (
+            "Something wrong with the fused attention layers. Expected all the attention projections to be fused."
         )
 
         inputs = self.get_dummy_inputs(device)
@@ -190,17 +187,14 @@ def test_fused_qkv_projections(self):
         image = pipe(**inputs).images
         image_slice_disabled = image[0, -3:, -3:, -1]
 
-        self.assertTrue(
-            np.allclose(original_image_slice, image_slice_fused, atol=1e-3, rtol=1e-3),
-            ("Fusion of QKV projections shouldn't affect the outputs."),
+        assert np.allclose(original_image_slice, image_slice_fused, atol=1e-3, rtol=1e-3), (
+            "Fusion of QKV projections shouldn't affect the outputs."
         )
-        self.assertTrue(
-            np.allclose(image_slice_fused, image_slice_disabled, atol=1e-3, rtol=1e-3),
-            ("Outputs, with QKV projection fusion enabled, shouldn't change when fused QKV projections are disabled."),
+        assert np.allclose(image_slice_fused, image_slice_disabled, atol=1e-3, rtol=1e-3), (
+            "Outputs, with QKV projection fusion enabled, shouldn't change when fused QKV projections are disabled."
         )
-        self.assertTrue(
-            np.allclose(original_image_slice, image_slice_disabled, atol=1e-2, rtol=1e-2),
-            ("Original outputs should match when fused QKV projections are disabled."),
+        assert np.allclose(original_image_slice, image_slice_disabled, atol=1e-2, rtol=1e-2), (
+            "Original outputs should match when fused QKV projections are disabled."
         )
 
     def test_flux_image_output_shape(self):
@@ -215,10 +209,8 @@ def test_flux_image_output_shape(self):
             inputs.update({"height": height, "width": width})
             image = pipe(**inputs).images[0]
             output_height, output_width, _ = image.shape
-            self.assertEqual(
-                (output_height, output_width),
-                (expected_height, expected_width),
-                f"Output shape {image.shape} does not match expected shape {(expected_height, expected_width)}",
+            assert (output_height, output_width) == (expected_height, expected_width), (
+                f"Output shape {image.shape} does not match expected shape {(expected_height, expected_width)}"
             )
 
     def test_flux_true_cfg(self):
@@ -230,11 +222,48 @@ def test_flux_true_cfg(self):
         inputs["negative_prompt"] = "bad quality"
         inputs["true_cfg_scale"] = 2.0
         true_cfg_out = pipe(**inputs, generator=torch.manual_seed(0)).images[0]
-        self.assertFalse(
-            np.allclose(no_true_cfg_out, true_cfg_out), "Outputs should be different when true_cfg_scale is set."
+        assert not np.allclose(no_true_cfg_out, true_cfg_out), (
+            "Outputs should be different when true_cfg_scale is set."
         )
 
 
+class TestFluxPipelineMemory(FluxPipelineTesterConfig, MemoryTesterMixin):
+    """Offload / device-map / group-offload / layerwise-casting tests for Flux."""
+
+
+class TestFluxPipelineIPAdapter(FluxPipelineTesterConfig, FluxIPAdapterTesterMixin):
+    """IP-Adapter tests for Flux."""
+
+
+class TestFluxPipelinePAB(FluxPipelineTesterConfig, PyramidAttentionBroadcastTesterMixin):
+    """Pyramid Attention Broadcast cache tests for Flux."""
+
+
+class TestFluxPipelineFasterCache(FluxPipelineTesterConfig, FasterCacheTesterMixin):
+    """FasterCache tests for Flux."""
+
+    # Flux is guidance distilled, so we set `is_guidance_distilled=True`.
+    faster_cache_config = FasterCacheConfig(
+        spatial_attention_block_skip_range=2,
+        spatial_attention_timestep_skip_range=(-1, 901),
+        unconditional_batch_skip_range=2,
+        attention_weight_callback=lambda _: 0.5,
+        is_guidance_distilled=True,
+    )
+
+
+class TestFluxPipelineFirstBlockCache(FluxPipelineTesterConfig, FirstBlockCacheTesterMixin):
+    """FirstBlockCache tests for Flux."""
+
+
+class TestFluxPipelineTaylorSeerCache(FluxPipelineTesterConfig, TaylorSeerCacheTesterMixin):
+    """TaylorSeerCache tests for Flux."""
+
+
+class TestFluxPipelineMagCache(FluxPipelineTesterConfig, MagCacheTesterMixin):
+    """MagCache tests for Flux."""
+
+
 @nightly
 @require_big_accelerator
 class FluxPipelineSlowTests(unittest.TestCase):
@@ -293,9 +322,7 @@ def test_flux_inference(self):
         # fmt: on
 
         max_diff = numpy_cosine_similarity_distance(expected_slice.flatten(), image_slice.flatten())
-        self.assertLess(
-            max_diff, 1e-4, f"Image slice is different from expected slice: {image_slice} != {expected_slice}"
-        )
+        assert max_diff < 1e-4, f"Image slice is different from expected slice: {image_slice} != {expected_slice}"
 
 
 @slow
@@ -373,6 +400,4 @@ def test_flux_ip_adapter_inference(self):
         # fmt: on
 
         max_diff = numpy_cosine_similarity_distance(expected_slice.flatten(), image_slice.flatten())
-        self.assertLess(
-            max_diff, 1e-4, f"Image slice is different from expected slice: {image_slice} != {expected_slice}"
-        )
+        assert max_diff < 1e-4, f"Image slice is different from expected slice: {image_slice} != {expected_slice}"
@@ -0,0 +1,177 @@
+# coding=utf-8
+# Copyright 2025 HuggingFace Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import os
+import tempfile
+import unittest
+import uuid
+
+import torch
+from huggingface_hub import ModelCard, delete_repo
+from huggingface_hub.utils import is_jinja_available
+from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer
+
+from diffusers import (
+    AutoencoderKL,
+    DDIMScheduler,
+    StableDiffusionPipeline,
+    UNet2DConditionModel,
+)
+
+from ..others.test_utils import TOKEN, USER, is_staging_test
+
+
+# Standalone, pipeline-agnostic Hub integration test. It does not compose the `BasePipelineTesterConfig`
+# fixtures (it builds its own fixed SD components) and relies on `@is_staging_test` (a `unittest.skip`-based
+# decorator), so it stays a `unittest.TestCase` rather than a config + mixin test.
+@is_staging_test
+class TestPipelinePushToHub(unittest.TestCase):
+    identifier = uuid.uuid4()
+    repo_id = f"test-pipeline-{identifier}"
+    org_repo_id = f"valid_org/{repo_id}-org"
+
+    def get_pipeline_components(self):
+        unet = UNet2DConditionModel(
+            block_out_channels=(32, 64),
+            layers_per_block=2,
+            sample_size=32,
+            in_channels=4,
+            out_channels=4,
+            down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"),
+            up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"),
+            cross_attention_dim=32,
+        )
+
+        scheduler = DDIMScheduler(
+            beta_start=0.00085,
+            beta_end=0.012,
+            beta_schedule="scaled_linear",
+            clip_sample=False,
+            set_alpha_to_one=False,
+        )
+
+        vae = AutoencoderKL(
+            block_out_channels=[32, 64],
+            in_channels=3,
+            out_channels=3,
+            down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"],
+            up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"],
+            latent_channels=4,
+        )
+
+        text_encoder_config = CLIPTextConfig(
+            bos_token_id=0,
+            eos_token_id=2,
+            hidden_size=32,
+            intermediate_size=37,
+            layer_norm_eps=1e-05,
+            num_attention_heads=4,
+            num_hidden_layers=5,
+            pad_token_id=1,
+            vocab_size=1000,
+        )
+        text_encoder = CLIPTextModel(text_encoder_config)
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            dummy_vocab = {"<|startoftext|>": 0, "<|endoftext|>": 1, "!": 2}
+            vocab_path = os.path.join(tmpdir, "vocab.json")
+            with open(vocab_path, "w") as f:
+                json.dump(dummy_vocab, f)
+
+            merges = "Ġ t\nĠt h"
+            merges_path = os.path.join(tmpdir, "merges.txt")
+            with open(merges_path, "w") as f:
+                f.writelines(merges)
+            tokenizer = CLIPTokenizer(vocab_file=vocab_path, merges_file=merges_path)
+
+        components = {
+            "unet": unet,
+            "scheduler": scheduler,
+            "vae": vae,
+            "text_encoder": text_encoder,
+            "tokenizer": tokenizer,
+            "safety_checker": None,
+            "feature_extractor": None,
+        }
+        return components
+
+    def test_push_to_hub(self):
+        components = self.get_pipeline_components()
+        pipeline = StableDiffusionPipeline(**components)
+        pipeline.push_to_hub(self.repo_id, token=TOKEN)
+
+        new_model = UNet2DConditionModel.from_pretrained(f"{USER}/{self.repo_id}", subfolder="unet")
+        unet = components["unet"]
+        for p1, p2 in zip(unet.parameters(), new_model.parameters()):
+            self.assertTrue(torch.equal(p1, p2))
+
+        # Push to hub via save_pretrained to a separate repo. Reusing `self.repo_id` after
+        # deleting it makes the staging server's LFS GC reject the next commit with
+        # "LFS pointer pointed to a file that does not exist" when the model bytes are identical.
+        save_repo_id = f"{self.repo_id}-saved"
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            pipeline.save_pretrained(tmp_dir, repo_id=save_repo_id, push_to_hub=True, token=TOKEN)
+
+        new_model = UNet2DConditionModel.from_pretrained(f"{USER}/{save_repo_id}", subfolder="unet")
+        for p1, p2 in zip(unet.parameters(), new_model.parameters()):
+            self.assertTrue(torch.equal(p1, p2))
+
+        # Reset repos
+        delete_repo(token=TOKEN, repo_id=self.repo_id)
+        delete_repo(save_repo_id, token=TOKEN)
+
+    def test_push_to_hub_in_organization(self):
+        components = self.get_pipeline_components()
+        pipeline = StableDiffusionPipeline(**components)
+        pipeline.push_to_hub(self.org_repo_id, token=TOKEN)
+
+        new_model = UNet2DConditionModel.from_pretrained(self.org_repo_id, subfolder="unet")
+        unet = components["unet"]
+        for p1, p2 in zip(unet.parameters(), new_model.parameters()):
+            self.assertTrue(torch.equal(p1, p2))
+
+        # Push to hub via save_pretrained to a separate repo. Reusing `self.org_repo_id` after
+        # deleting it makes the staging server's LFS GC reject the next commit with
+        # "LFS pointer pointed to a file that does not exist" when the model bytes are identical.
+        save_org_repo_id = f"{self.org_repo_id}-saved"
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            pipeline.save_pretrained(tmp_dir, push_to_hub=True, token=TOKEN, repo_id=save_org_repo_id)
+
+        new_model = UNet2DConditionModel.from_pretrained(save_org_repo_id, subfolder="unet")
+        for p1, p2 in zip(unet.parameters(), new_model.parameters()):
+            self.assertTrue(torch.equal(p1, p2))
+
+        # Reset repos
+        delete_repo(token=TOKEN, repo_id=self.org_repo_id)
+        delete_repo(save_org_repo_id, token=TOKEN)
+
+    @unittest.skipIf(
+        not is_jinja_available(),
+        reason="Model card tests cannot be performed without Jinja installed.",
+    )
+    def test_push_to_hub_library_name(self):
+        components = self.get_pipeline_components()
+        pipeline = StableDiffusionPipeline(**components)
+        # Use a method-unique repo to avoid recycling a name that `test_push_to_hub` just deleted,
+        # which the staging server rejects with an LFS pointer error.
+        repo_id = f"test-pipeline-library-name-{uuid.uuid4()}"
+        pipeline.push_to_hub(repo_id, token=TOKEN)
+
+        model_card = ModelCard.load(f"{USER}/{repo_id}", token=TOKEN).data
+        assert model_card.library_name == "diffusers"
+
+        # Reset repo
+        delete_repo(repo_id, token=TOKEN)