style

yiyixuxu · yiyixuxu · commit 012b40d8406c · 2025-10-15T05:35:24.000+02:00
diff --git a/src/diffusers/guiders/classifier_free_guidance.py b/src/diffusers/guiders/classifier_free_guidance.py
@@ -32,8 +32,8 @@ class ClassifierFreeGuidance(BaseGuidance):
     Reference: https://huggingface.co/papers/2207.12598
 
     CFG improves generation quality and prompt adherence by jointly training models on both conditional and
-    unconditional data, then combining predictions during inference. This allows trading off between quality
-    (high guidance) and diversity (low guidance).
+    unconditional data, then combining predictions during inference. This allows trading off between quality (high
+    guidance) and diversity (low guidance).
 
     **Two CFG Formulations:**
 
@@ -47,38 +47,38 @@ class ClassifierFreeGuidance(BaseGuidance):
        ```
        x_pred = x_uncond + guidance_scale * (x_cond - x_uncond)
        ```
-       Moves unconditional predictions toward conditional ones, effectively suppressing negative features
-       (e.g., "bad quality", "watermarks"). Equivalent in theory but more intuitive.
+       Moves unconditional predictions toward conditional ones, effectively suppressing negative features (e.g., "bad
+       quality", "watermarks"). Equivalent in theory but more intuitive.
 
     Use `use_original_formulation=True` to switch to the original formulation.
 
     **Guidance-Distilled Models:**
 
-    For models with distilled guidance (guidance baked into the model via distillation), set
-    `distilled_guidance_scale` to the desired guidance value. The pipeline will pass this to the model
-    during forward passes. Set to `None` for regular (non-distilled) models.
+    For models with distilled guidance (guidance baked into the model via distillation), set `distilled_guidance_scale`
+    to the desired guidance value. The pipeline will pass this to the model during forward passes. Set to `None` for
+    regular (non-distilled) models.
 
     Args:
         guidance_scale (`float`, defaults to `7.5`):
-            CFG scale applied by this guider during post-processing. Higher values = stronger prompt
-            conditioning but may reduce quality. Typical range: 1.0-20.0.
+            CFG scale applied by this guider during post-processing. Higher values = stronger prompt conditioning but
+            may reduce quality. Typical range: 1.0-20.0.
         distilled_guidance_scale (`float`, *optional*, defaults to `None`):
-            Guidance scale for distilled models, passed directly to the model during forward pass.
-            If `None`, assumes a regular (non-distilled) model. Allows pipelines to configure different
-            defaults for distilled vs. non-distilled models. Typical range for distilled models: 1.0-8.0.
+            Guidance scale for distilled models, passed directly to the model during forward pass. If `None`, assumes a
+            regular (non-distilled) model. Allows pipelines to configure different defaults for distilled vs.
+            non-distilled models. Typical range for distilled models: 1.0-8.0.
         guidance_rescale (`float`, defaults to `0.0`):
-            Rescaling factor to prevent overexposure from high guidance scales. Based on
-            [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://huggingface.co/papers/2305.08891).
-            Range: 0.0 (no rescaling) to 1.0 (full rescaling).
+            Rescaling factor to prevent overexposure from high guidance scales. Based on [Common Diffusion Noise
+            Schedules and Sample Steps are Flawed](https://huggingface.co/papers/2305.08891). Range: 0.0 (no rescaling)
+            to 1.0 (full rescaling).
         use_original_formulation (`bool`, defaults to `False`):
             If `True`, uses the original CFG formulation from the paper. If `False` (default), uses the
             diffusers-native formulation from the Imagen paper.
         start (`float`, defaults to `0.0`):
-            Fraction of denoising steps (0.0-1.0) after which CFG starts. Use > 0.0 to disable CFG in
-            early denoising steps.
+            Fraction of denoising steps (0.0-1.0) after which CFG starts. Use > 0.0 to disable CFG in early denoising
+            steps.
         stop (`float`, defaults to `1.0`):
-            Fraction of denoising steps (0.0-1.0) after which CFG stops. Use < 1.0 to disable CFG in
-            late denoising steps.
+            Fraction of denoising steps (0.0-1.0) after which CFG stops. Use < 1.0 to disable CFG in late denoising
+            steps.
         enabled (`bool`, defaults to `True`):
             Whether CFG is enabled. Set to `False` to disable CFG entirely (uses only conditional predictions).
     """
diff --git a/src/diffusers/pipelines/hunyuan_image/pipeline_hunyuanimage.py b/src/diffusers/pipelines/hunyuan_image/pipeline_hunyuanimage.py
@@ -329,7 +329,7 @@ def encode_prompt(
                 argument using self.tokenizer_2 and self.text_encoder_2.
         """
         device = device or self._execution_device
-        
+
         if prompt is None:
             prompt = ""
         prompt = [prompt] if isinstance(prompt, str) else prompt
@@ -537,8 +537,9 @@ def __call__(
                 The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
                 instead.
             negative_prompt (`str` or `List[str]`, *optional*):
-                The prompt or prompts not to guide the image generation. If not defined and negative_prompt_embeds is not provided, will use an empty negative prompt.
-                Ignored when not using guidance (i.e., ignored if any of the following conditions are met:
+                The prompt or prompts not to guide the image generation. If not defined and negative_prompt_embeds is
+                not provided, will use an empty negative prompt. Ignored when not using guidance (i.e., ignored if any
+                of the following conditions are met:
                     1. guider is diabled
                     2. guider.guidance_scale is not greater than `1` and `true_cfg_scale` is not provided,
                     3. `true_cfg_scale` is not greater than `1`.
@@ -558,15 +559,15 @@ def __call__(
                 Guidance scale as defined in [Classifier-Free Diffusion
                 Guidance](https://huggingface.co/papers/2207.12598). `true_cfg_scale` is defined as `w` of equation 2.
                 of [Imagen Paper](https://huggingface.co/papers/2205.11487). Classifier-free guidance is enabled by
-                setting `true_cfg_scale > 1`. Higher guidance scale encourages to
-                generate images that are closely linked to the text `prompt`, usually at the expense of lower image
-                quality. If not defined, the default `guidance_scale` configured in guider will be used.
+                setting `true_cfg_scale > 1`. Higher guidance scale encourages to generate images that are closely
+                linked to the text `prompt`, usually at the expense of lower image quality. If not defined, the default
+                `guidance_scale` configured in guider will be used.
             guidance_scale (`float`, *optional*, defaults to None):
                 A guidance scale value for guidance distilled models. Unlike the traditional classifier-free guidance
                 where the guidance scale is applied during inference through noise prediction rescaling, guidance
                 distilled models take the guidance scale directly as an input parameter during forward pass. Guidance
-                is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate images
-                that are closely linked to the text `prompt`, usually at the expense of lower image quality. If not defined,
+                is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate images that
+                are closely linked to the text `prompt`, usually at the expense of lower image quality. If not defined,
                 the default `distilled_guidance_scale` configured in guider will be used.
             num_images_per_prompt (`int`, *optional*, defaults to 1):
                 The number of images to generate per prompt.
@@ -698,7 +699,7 @@ def __call__(
         if guidance_scale is not None:
             guider_kwargs["distilled_guidance_scale"] = guidance_scale
         guider = guider.new(**guider_kwargs)
-    
+
         if is_guider_enabled:
             guider.enable()
         else:
@@ -745,16 +746,24 @@ def __call__(
         self._num_timesteps = len(timesteps)
 
         # handle guidance (for guidance-distilled model)
-        if self.transformer.config.guidance_embeds and not(hasattr(guider, "distilled_guidance_scale") and guider.distilled_guidance_scale is not None):
+        if self.transformer.config.guidance_embeds and not (
+            hasattr(guider, "distilled_guidance_scale") and guider.distilled_guidance_scale is not None
+        ):
             raise ValueError("`guidance_scale` is required for guidance-distilled model.")
-        elif not self.transformer.config.guidance_embeds and hasattr(guider, "distilled_guidance_scale") and guider.distilled_guidance_scale is not None:
+        elif (
+            not self.transformer.config.guidance_embeds
+            and hasattr(guider, "distilled_guidance_scale")
+            and guider.distilled_guidance_scale is not None
+        ):
             logger.warning(
                 f"`distilled_guidance_scale` {guider.distilled_guidance_scale} is ignored since the model is not guidance-distilled. Please use `true_cfg_scale` instead."
             )
 
         if self.transformer.config.guidance_embeds:
             guidance = (
-                torch.tensor([guider.distilled_guidance_scale] * latents.shape[0], dtype=self.transformer.dtype, device=device)
+                torch.tensor(
+                    [guider.distilled_guidance_scale] * latents.shape[0], dtype=self.transformer.dtype, device=device
+                )
                 * 1000.0
             )
 
diff --git a/src/diffusers/pipelines/hunyuan_image/pipeline_hunyuanimage_refiner.py b/src/diffusers/pipelines/hunyuan_image/pipeline_hunyuanimage_refiner.py
@@ -19,14 +19,14 @@
 import torch
 from transformers import Qwen2_5_VLForConditionalGeneration, Qwen2Tokenizer
 
+from ...guiders import ClassifierFreeGuidance
 from ...image_processor import PipelineImageInput, VaeImageProcessor
 from ...models import AutoencoderKLHunyuanImageRefiner, HunyuanImageTransformer2DModel
 from ...schedulers import FlowMatchEulerDiscreteScheduler
 from ...utils import is_torch_xla_available, logging, replace_example_docstring
 from ...utils.torch_utils import randn_tensor
 from ..pipeline_utils import DiffusionPipeline
 from .pipeline_output import HunyuanImagePipelineOutput
-from ...guiders import ClassifierFreeGuidance
 
 
 if is_torch_xla_available():
@@ -150,7 +150,7 @@ class HunyuanImageRefinerPipeline(DiffusionPipeline):
     """
 
     model_cpu_offload_seq = "text_encoder->transformer->vae"
-    _callback_tensor_inputs = ["latents", "prompt_embeds"]    
+    _callback_tensor_inputs = ["latents", "prompt_embeds"]
     _guider_input_fields = {
         "encoder_hidden_states": ("prompt_embeds", "negative_prompt_embeds"),
         "encoder_attention_mask": ("prompt_embeds_mask", "negative_prompt_embeds_mask"),
@@ -224,7 +224,7 @@ def _get_qwen_prompt_embeds(
 
     def encode_prompt(
         self,
-        prompt: Optional[Union[str, List[str]]] =  None,
+        prompt: Optional[Union[str, List[str]]] = None,
         device: Optional[torch.device] = None,
         num_images_per_prompt: int = 1,
         prompt_embeds: Optional[torch.Tensor] = None,
@@ -464,8 +464,8 @@ def __call__(
                 The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
                 instead.
             negative_prompt (`str` or `List[str]`, *optional*):
-                The prompt or prompts not to guide the image generation. If not defined, will use an empty negative prompt.
-                Ignored when not using guidance (i.e., ignored if any of the following conditions are met:
+                The prompt or prompts not to guide the image generation. If not defined, will use an empty negative
+                prompt. Ignored when not using guidance (i.e., ignored if any of the following conditions are met:
                     1. guider is diabled
                     2. guider.guidance_scale is not greater than `1` and `true_cfg_scale` is not provided,
                     3. `true_cfg_scale` is not greater than `1`.
@@ -474,15 +474,15 @@ def __call__(
                 Guidance scale as defined in [Classifier-Free Diffusion
                 Guidance](https://huggingface.co/papers/2207.12598). `true_cfg_scale` is defined as `w` of equation 2.
                 of [Imagen Paper](https://huggingface.co/papers/2205.11487). Classifier-free guidance is enabled by
-                setting `true_cfg_scale > 1`. Higher guidance scale encourages to
-                generate images that are closely linked to the text `prompt`, usually at the expense of lower image
-                quality. If not defined, the default `guidance_scale` configured in guider will be used.
+                setting `true_cfg_scale > 1`. Higher guidance scale encourages to generate images that are closely
+                linked to the text `prompt`, usually at the expense of lower image quality. If not defined, the default
+                `guidance_scale` configured in guider will be used.
             guidance_scale (`float`, *optional*, defaults to None):
                 A guidance scale value for guidance distilled models. Unlike the traditional classifier-free guidance,
-                guidance distilled models take the guidance scale directly as an input parameter during forward pass. Guidance
-                is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate images
-                that are closely linked to the text `prompt`, usually at the expense of lower image quality. 
-                If not defined, the default `distilled_guidance_scale` configured in guider will be used.
+                guidance distilled models take the guidance scale directly as an input parameter during forward pass.
+                Guidance is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate
+                images that are closely linked to the text `prompt`, usually at the expense of lower image quality. If
+                not defined, the default `distilled_guidance_scale` configured in guider will be used.
             num_images_per_prompt (`int`, *optional*, defaults to 1):
             height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
                 The height in pixels of the generated image. This is set to 1024 by default for the best results.
@@ -577,7 +577,7 @@ def __call__(
             image_latents = self._encode_vae_image(image=image, generator=generator)
 
         # 3.prepare prompt embeds
-        
+
         # if true_cfg_scale/guidance_scale is provided, override the guidance_scale/distilled_guidance_scale in guider at runtime
         is_guider_enabled = self.guider._enabled
         guider_kwargs = {}
@@ -640,8 +640,11 @@ def __call__(
         if not (hasattr(guider, "distilled_guidance_scale") and guider.distilled_guidance_scale is not None):
             raise ValueError("`distilled_guidance_scale` is required for guidance-distilled model.")
         guidance = (
-                torch.tensor([guider.distilled_guidance_scale] * latents.shape[0], dtype=self.transformer.dtype, device=device) * 1000.0
+            torch.tensor(
+                [guider.distilled_guidance_scale] * latents.shape[0], dtype=self.transformer.dtype, device=device
             )
+            * 1000.0
+        )
 
         if self.attention_kwargs is None:
             self._attention_kwargs = {}
@@ -659,7 +662,7 @@ def __call__(
                 timestep = t.expand(latents.shape[0]).to(latents.dtype)
 
                 # Step 1: Collect model inputs needed for the guidance method
-                # The `_guider_input_fields` defines which inputs model needs for conditional/unconditional predictions. 
+                # The `_guider_input_fields` defines which inputs model needs for conditional/unconditional predictions.
                 # e.g. {"encoder_hidden_states": ("prompt_embeds", "negative_prompt_embeds")}
                 # means the both prompt_embeds (conditional) and negative_prompt_embeds (unconditional) as inputs.
                 guider_inputs = {}