[tests] fix consistency decoder tests (#13905)

sayakpaul · web-flow · commit d1f8e55c3b6e · 2026-06-15T14:38:52.000+05:30
* fix consistency decoder tests

* address feedback

* feedback

* up
diff --git a/tests/models/autoencoders/test_models_consistency_decoder_vae.py b/tests/models/autoencoders/test_models_consistency_decoder_vae.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 
 import gc
-import unittest
 
 import numpy as np
 import torch
@@ -103,14 +102,12 @@ class TestConsistencyDecoderVAESlicingTiling(ConsistencyDecoderVAETesterConfig,
 
 
 @slow
-class ConsistencyDecoderVAEIntegrationTests(unittest.TestCase):
-    def setUp(self):
-        super().setUp()
+class TestConsistencyDecoderVAEIntegration:
+    def setup_method(self):
         gc.collect()
         backend_empty_cache(torch_device)
 
-    def tearDown(self):
-        super().tearDown()
+    def teardown_method(self):
         gc.collect()
         backend_empty_cache(torch_device)
 
diff --git a/tests/models/testing_utils/common.py b/tests/models/testing_utils/common.py
@@ -242,6 +242,9 @@ def get_dummy_inputs(self) -> Dict[str, Any]:
         """
         Returns dict of inputs to pass to the model forward pass.
 
+        Implementations must be deterministic: every call must return identical inputs (seed any random
+        tensors and generators), since tests call this once per forward pass to compare outputs.
+
         Returns:
             Dict[str, Any]: Input tensors/values for model.forward().
 
@@ -292,9 +295,8 @@ def test_from_save_pretrained(self, tmp_path, atol=5e-5, rtol=5e-5):
                 f"Parameter shape mismatch for {param_name}. Original: {param_1.shape}, loaded: {param_2.shape}"
             )
 
-        inputs_dict = self.get_dummy_inputs()
-        image = model(**inputs_dict, return_dict=False)[0]
-        new_image = new_model(**inputs_dict, return_dict=False)[0]
+        image = model(**self.get_dummy_inputs(), return_dict=False)[0]
+        new_image = new_model(**self.get_dummy_inputs(), return_dict=False)[0]
 
         assert_tensors_close(image, new_image, atol=atol, rtol=rtol, msg="Models give different forward passes.")
 
@@ -314,9 +316,8 @@ def test_from_save_pretrained_variant(self, tmp_path, atol=5e-5, rtol=0):
 
         new_model.to(torch_device)
 
-        inputs_dict = self.get_dummy_inputs()
-        image = model(**inputs_dict, return_dict=False)[0]
-        new_image = new_model(**inputs_dict, return_dict=False)[0]
+        image = model(**self.get_dummy_inputs(), return_dict=False)[0]
+        new_image = new_model(**self.get_dummy_inputs(), return_dict=False)[0]
 
         assert_tensors_close(image, new_image, atol=atol, rtol=rtol, msg="Models give different forward passes.")
 
@@ -344,9 +345,8 @@ def test_determinism(self, atol=1e-5, rtol=0):
         model.to(torch_device)
         model.eval()
 
-        inputs_dict = self.get_dummy_inputs()
-        first = model(**inputs_dict, return_dict=False)[0]
-        second = model(**inputs_dict, return_dict=False)[0]
+        first = model(**self.get_dummy_inputs(), return_dict=False)[0]
+        second = model(**self.get_dummy_inputs(), return_dict=False)[0]
 
         first_flat = first.flatten()
         second_flat = second.flatten()
@@ -403,9 +403,8 @@ def recursive_check(tuple_object, dict_object):
         model.to(torch_device)
         model.eval()
 
-        inputs_dict = self.get_dummy_inputs()
-        outputs_dict = model(**inputs_dict)
-        outputs_tuple = model(**inputs_dict, return_dict=False)
+        outputs_dict = model(**self.get_dummy_inputs())
+        outputs_tuple = model(**self.get_dummy_inputs(), return_dict=False)
 
         recursive_check(outputs_tuple, outputs_dict)
 
@@ -509,11 +508,10 @@ def test_from_save_pretrained_dtype_inference(self, tmp_path, dtype, atol=1e-4,
     def test_sharded_checkpoints(self, tmp_path, atol=1e-5, rtol=0):
         torch.manual_seed(0)
         config = self.get_init_dict()
-        inputs_dict = self.get_dummy_inputs()
         model = self.model_class(**config).eval()
         model = model.to(torch_device)
 
-        base_output = model(**inputs_dict, return_dict=False)[0]
+        base_output = model(**self.get_dummy_inputs(), return_dict=False)[0]
 
         model_size = compute_module_persistent_sizes(model)[""]
         max_shard_size = int((model_size * 0.75) / (2**10))  # Convert to KB as these test models are small
@@ -532,10 +530,7 @@ def test_sharded_checkpoints(self, tmp_path, atol=1e-5, rtol=0):
         new_model = new_model.to(torch_device)
 
         torch.manual_seed(0)
-        # Re-create inputs only if they contain a generator (which needs to be reset)
-        if "generator" in inputs_dict:
-            inputs_dict = self.get_dummy_inputs()
-        new_output = new_model(**inputs_dict, return_dict=False)[0]
+        new_output = new_model(**self.get_dummy_inputs(), return_dict=False)[0]
 
         assert_tensors_close(
             base_output, new_output, atol=atol, rtol=rtol, msg="Output should match after sharded save/load"
@@ -546,11 +541,10 @@ def test_sharded_checkpoints(self, tmp_path, atol=1e-5, rtol=0):
     def test_sharded_checkpoints_with_variant(self, tmp_path, atol=1e-5, rtol=0):
         torch.manual_seed(0)
         config = self.get_init_dict()
-        inputs_dict = self.get_dummy_inputs()
         model = self.model_class(**config).eval()
         model = model.to(torch_device)
 
-        base_output = model(**inputs_dict, return_dict=False)[0]
+        base_output = model(**self.get_dummy_inputs(), return_dict=False)[0]
 
         model_size = compute_module_persistent_sizes(model)[""]
         max_shard_size = int((model_size * 0.75) / (2**10))  # Convert to KB as these test models are small
@@ -574,10 +568,7 @@ def test_sharded_checkpoints_with_variant(self, tmp_path, atol=1e-5, rtol=0):
         new_model = new_model.to(torch_device)
 
         torch.manual_seed(0)
-        # Re-create inputs only if they contain a generator (which needs to be reset)
-        if "generator" in inputs_dict:
-            inputs_dict = self.get_dummy_inputs()
-        new_output = new_model(**inputs_dict, return_dict=False)[0]
+        new_output = new_model(**self.get_dummy_inputs(), return_dict=False)[0]
 
         assert_tensors_close(
             base_output, new_output, atol=atol, rtol=rtol, msg="Output should match after variant sharded save/load"
@@ -589,11 +580,10 @@ def test_sharded_checkpoints_with_parallel_loading(self, tmp_path, atol=1e-5, rt
 
         torch.manual_seed(0)
         config = self.get_init_dict()
-        inputs_dict = self.get_dummy_inputs()
         model = self.model_class(**config).eval()
         model = model.to(torch_device)
 
-        base_output = model(**inputs_dict, return_dict=False)[0]
+        base_output = model(**self.get_dummy_inputs(), return_dict=False)[0]
 
         model_size = compute_module_persistent_sizes(model)[""]
         max_shard_size = int((model_size * 0.75) / (2**10))  # Convert to KB as these test models are small
@@ -627,10 +617,7 @@ def test_sharded_checkpoints_with_parallel_loading(self, tmp_path, atol=1e-5, rt
             model_parallel = model_parallel.to(torch_device)
 
             torch.manual_seed(0)
-            # Re-create inputs only if they contain a generator (which needs to be reset)
-            if "generator" in inputs_dict:
-                inputs_dict = self.get_dummy_inputs()
-            output_parallel = model_parallel(**inputs_dict, return_dict=False)[0]
+            output_parallel = model_parallel(**self.get_dummy_inputs(), return_dict=False)[0]
 
             assert_tensors_close(
                 base_output, output_parallel, atol=atol, rtol=rtol, msg="Output should match with parallel loading"
diff --git a/tests/models/unets/test_models_unet_2d_condition.py b/tests/models/unets/test_models_unet_2d_condition.py
@@ -35,6 +35,7 @@
 from diffusers.models.embeddings import ImageProjection, IPAdapterFaceIDImageProjection, IPAdapterPlusImageProjection
 from diffusers.utils import logging
 from diffusers.utils.import_utils import is_xformers_available
+from diffusers.utils.torch_utils import randn_tensor
 
 from ...testing_utils import (
     backend_empty_cache,
@@ -391,11 +392,13 @@ def get_dummy_inputs(self) -> dict[str, torch.Tensor]:
         batch_size = 4
         num_channels = 4
         sizes = (16, 16)
+        # Seed locally so repeated calls (e.g. one per forward pass in the mixins) yield identical inputs.
+        generator = torch.Generator("cpu").manual_seed(0)
 
         return {
-            "sample": floats_tensor((batch_size, num_channels) + sizes).to(torch_device),
+            "sample": randn_tensor((batch_size, num_channels) + sizes, generator=generator, device=torch_device),
             "timestep": torch.tensor([10]).to(torch_device),
-            "encoder_hidden_states": floats_tensor((batch_size, 4, 8)).to(torch_device),
+            "encoder_hidden_states": randn_tensor((batch_size, 4, 8), generator=generator, device=torch_device),
         }