Skip to content

Commit 012b40d

Browse files
committed
style
1 parent b9fd002 commit 012b40d

3 files changed

Lines changed: 58 additions & 46 deletions

File tree

src/diffusers/guiders/classifier_free_guidance.py

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ class ClassifierFreeGuidance(BaseGuidance):
3232
Reference: https://huggingface.co/papers/2207.12598
3333
3434
CFG improves generation quality and prompt adherence by jointly training models on both conditional and
35-
unconditional data, then combining predictions during inference. This allows trading off between quality
36-
(high guidance) and diversity (low guidance).
35+
unconditional data, then combining predictions during inference. This allows trading off between quality (high
36+
guidance) and diversity (low guidance).
3737
3838
**Two CFG Formulations:**
3939
@@ -47,38 +47,38 @@ class ClassifierFreeGuidance(BaseGuidance):
4747
```
4848
x_pred = x_uncond + guidance_scale * (x_cond - x_uncond)
4949
```
50-
Moves unconditional predictions toward conditional ones, effectively suppressing negative features
51-
(e.g., "bad quality", "watermarks"). Equivalent in theory but more intuitive.
50+
Moves unconditional predictions toward conditional ones, effectively suppressing negative features (e.g., "bad
51+
quality", "watermarks"). Equivalent in theory but more intuitive.
5252
5353
Use `use_original_formulation=True` to switch to the original formulation.
5454
5555
**Guidance-Distilled Models:**
5656
57-
For models with distilled guidance (guidance baked into the model via distillation), set
58-
`distilled_guidance_scale` to the desired guidance value. The pipeline will pass this to the model
59-
during forward passes. Set to `None` for regular (non-distilled) models.
57+
For models with distilled guidance (guidance baked into the model via distillation), set `distilled_guidance_scale`
58+
to the desired guidance value. The pipeline will pass this to the model during forward passes. Set to `None` for
59+
regular (non-distilled) models.
6060
6161
Args:
6262
guidance_scale (`float`, defaults to `7.5`):
63-
CFG scale applied by this guider during post-processing. Higher values = stronger prompt
64-
conditioning but may reduce quality. Typical range: 1.0-20.0.
63+
CFG scale applied by this guider during post-processing. Higher values = stronger prompt conditioning but
64+
may reduce quality. Typical range: 1.0-20.0.
6565
distilled_guidance_scale (`float`, *optional*, defaults to `None`):
66-
Guidance scale for distilled models, passed directly to the model during forward pass.
67-
If `None`, assumes a regular (non-distilled) model. Allows pipelines to configure different
68-
defaults for distilled vs. non-distilled models. Typical range for distilled models: 1.0-8.0.
66+
Guidance scale for distilled models, passed directly to the model during forward pass. If `None`, assumes a
67+
regular (non-distilled) model. Allows pipelines to configure different defaults for distilled vs.
68+
non-distilled models. Typical range for distilled models: 1.0-8.0.
6969
guidance_rescale (`float`, defaults to `0.0`):
70-
Rescaling factor to prevent overexposure from high guidance scales. Based on
71-
[Common Diffusion Noise Schedules and Sample Steps are Flawed](https://huggingface.co/papers/2305.08891).
72-
Range: 0.0 (no rescaling) to 1.0 (full rescaling).
70+
Rescaling factor to prevent overexposure from high guidance scales. Based on [Common Diffusion Noise
71+
Schedules and Sample Steps are Flawed](https://huggingface.co/papers/2305.08891). Range: 0.0 (no rescaling)
72+
to 1.0 (full rescaling).
7373
use_original_formulation (`bool`, defaults to `False`):
7474
If `True`, uses the original CFG formulation from the paper. If `False` (default), uses the
7575
diffusers-native formulation from the Imagen paper.
7676
start (`float`, defaults to `0.0`):
77-
Fraction of denoising steps (0.0-1.0) after which CFG starts. Use > 0.0 to disable CFG in
78-
early denoising steps.
77+
Fraction of denoising steps (0.0-1.0) after which CFG starts. Use > 0.0 to disable CFG in early denoising
78+
steps.
7979
stop (`float`, defaults to `1.0`):
80-
Fraction of denoising steps (0.0-1.0) after which CFG stops. Use < 1.0 to disable CFG in
81-
late denoising steps.
80+
Fraction of denoising steps (0.0-1.0) after which CFG stops. Use < 1.0 to disable CFG in late denoising
81+
steps.
8282
enabled (`bool`, defaults to `True`):
8383
Whether CFG is enabled. Set to `False` to disable CFG entirely (uses only conditional predictions).
8484
"""

src/diffusers/pipelines/hunyuan_image/pipeline_hunyuanimage.py

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ def encode_prompt(
329329
argument using self.tokenizer_2 and self.text_encoder_2.
330330
"""
331331
device = device or self._execution_device
332-
332+
333333
if prompt is None:
334334
prompt = ""
335335
prompt = [prompt] if isinstance(prompt, str) else prompt
@@ -537,8 +537,9 @@ def __call__(
537537
The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
538538
instead.
539539
negative_prompt (`str` or `List[str]`, *optional*):
540-
The prompt or prompts not to guide the image generation. If not defined and negative_prompt_embeds is not provided, will use an empty negative prompt.
541-
Ignored when not using guidance (i.e., ignored if any of the following conditions are met:
540+
The prompt or prompts not to guide the image generation. If not defined and negative_prompt_embeds is
541+
not provided, will use an empty negative prompt. Ignored when not using guidance (i.e., ignored if any
542+
of the following conditions are met:
542543
1. guider is diabled
543544
2. guider.guidance_scale is not greater than `1` and `true_cfg_scale` is not provided,
544545
3. `true_cfg_scale` is not greater than `1`.
@@ -558,15 +559,15 @@ def __call__(
558559
Guidance scale as defined in [Classifier-Free Diffusion
559560
Guidance](https://huggingface.co/papers/2207.12598). `true_cfg_scale` is defined as `w` of equation 2.
560561
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Classifier-free guidance is enabled by
561-
setting `true_cfg_scale > 1`. Higher guidance scale encourages to
562-
generate images that are closely linked to the text `prompt`, usually at the expense of lower image
563-
quality. If not defined, the default `guidance_scale` configured in guider will be used.
562+
setting `true_cfg_scale > 1`. Higher guidance scale encourages to generate images that are closely
563+
linked to the text `prompt`, usually at the expense of lower image quality. If not defined, the default
564+
`guidance_scale` configured in guider will be used.
564565
guidance_scale (`float`, *optional*, defaults to None):
565566
A guidance scale value for guidance distilled models. Unlike the traditional classifier-free guidance
566567
where the guidance scale is applied during inference through noise prediction rescaling, guidance
567568
distilled models take the guidance scale directly as an input parameter during forward pass. Guidance
568-
is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate images
569-
that are closely linked to the text `prompt`, usually at the expense of lower image quality. If not defined,
569+
is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate images that
570+
are closely linked to the text `prompt`, usually at the expense of lower image quality. If not defined,
570571
the default `distilled_guidance_scale` configured in guider will be used.
571572
num_images_per_prompt (`int`, *optional*, defaults to 1):
572573
The number of images to generate per prompt.
@@ -698,7 +699,7 @@ def __call__(
698699
if guidance_scale is not None:
699700
guider_kwargs["distilled_guidance_scale"] = guidance_scale
700701
guider = guider.new(**guider_kwargs)
701-
702+
702703
if is_guider_enabled:
703704
guider.enable()
704705
else:
@@ -745,16 +746,24 @@ def __call__(
745746
self._num_timesteps = len(timesteps)
746747

747748
# handle guidance (for guidance-distilled model)
748-
if self.transformer.config.guidance_embeds and not(hasattr(guider, "distilled_guidance_scale") and guider.distilled_guidance_scale is not None):
749+
if self.transformer.config.guidance_embeds and not (
750+
hasattr(guider, "distilled_guidance_scale") and guider.distilled_guidance_scale is not None
751+
):
749752
raise ValueError("`guidance_scale` is required for guidance-distilled model.")
750-
elif not self.transformer.config.guidance_embeds and hasattr(guider, "distilled_guidance_scale") and guider.distilled_guidance_scale is not None:
753+
elif (
754+
not self.transformer.config.guidance_embeds
755+
and hasattr(guider, "distilled_guidance_scale")
756+
and guider.distilled_guidance_scale is not None
757+
):
751758
logger.warning(
752759
f"`distilled_guidance_scale` {guider.distilled_guidance_scale} is ignored since the model is not guidance-distilled. Please use `true_cfg_scale` instead."
753760
)
754761

755762
if self.transformer.config.guidance_embeds:
756763
guidance = (
757-
torch.tensor([guider.distilled_guidance_scale] * latents.shape[0], dtype=self.transformer.dtype, device=device)
764+
torch.tensor(
765+
[guider.distilled_guidance_scale] * latents.shape[0], dtype=self.transformer.dtype, device=device
766+
)
758767
* 1000.0
759768
)
760769

src/diffusers/pipelines/hunyuan_image/pipeline_hunyuanimage_refiner.py

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,14 @@
1919
import torch
2020
from transformers import Qwen2_5_VLForConditionalGeneration, Qwen2Tokenizer
2121

22+
from ...guiders import ClassifierFreeGuidance
2223
from ...image_processor import PipelineImageInput, VaeImageProcessor
2324
from ...models import AutoencoderKLHunyuanImageRefiner, HunyuanImageTransformer2DModel
2425
from ...schedulers import FlowMatchEulerDiscreteScheduler
2526
from ...utils import is_torch_xla_available, logging, replace_example_docstring
2627
from ...utils.torch_utils import randn_tensor
2728
from ..pipeline_utils import DiffusionPipeline
2829
from .pipeline_output import HunyuanImagePipelineOutput
29-
from ...guiders import ClassifierFreeGuidance
3030

3131

3232
if is_torch_xla_available():
@@ -150,7 +150,7 @@ class HunyuanImageRefinerPipeline(DiffusionPipeline):
150150
"""
151151

152152
model_cpu_offload_seq = "text_encoder->transformer->vae"
153-
_callback_tensor_inputs = ["latents", "prompt_embeds"]
153+
_callback_tensor_inputs = ["latents", "prompt_embeds"]
154154
_guider_input_fields = {
155155
"encoder_hidden_states": ("prompt_embeds", "negative_prompt_embeds"),
156156
"encoder_attention_mask": ("prompt_embeds_mask", "negative_prompt_embeds_mask"),
@@ -224,7 +224,7 @@ def _get_qwen_prompt_embeds(
224224

225225
def encode_prompt(
226226
self,
227-
prompt: Optional[Union[str, List[str]]] = None,
227+
prompt: Optional[Union[str, List[str]]] = None,
228228
device: Optional[torch.device] = None,
229229
num_images_per_prompt: int = 1,
230230
prompt_embeds: Optional[torch.Tensor] = None,
@@ -464,8 +464,8 @@ def __call__(
464464
The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
465465
instead.
466466
negative_prompt (`str` or `List[str]`, *optional*):
467-
The prompt or prompts not to guide the image generation. If not defined, will use an empty negative prompt.
468-
Ignored when not using guidance (i.e., ignored if any of the following conditions are met:
467+
The prompt or prompts not to guide the image generation. If not defined, will use an empty negative
468+
prompt. Ignored when not using guidance (i.e., ignored if any of the following conditions are met:
469469
1. guider is diabled
470470
2. guider.guidance_scale is not greater than `1` and `true_cfg_scale` is not provided,
471471
3. `true_cfg_scale` is not greater than `1`.
@@ -474,15 +474,15 @@ def __call__(
474474
Guidance scale as defined in [Classifier-Free Diffusion
475475
Guidance](https://huggingface.co/papers/2207.12598). `true_cfg_scale` is defined as `w` of equation 2.
476476
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Classifier-free guidance is enabled by
477-
setting `true_cfg_scale > 1`. Higher guidance scale encourages to
478-
generate images that are closely linked to the text `prompt`, usually at the expense of lower image
479-
quality. If not defined, the default `guidance_scale` configured in guider will be used.
477+
setting `true_cfg_scale > 1`. Higher guidance scale encourages to generate images that are closely
478+
linked to the text `prompt`, usually at the expense of lower image quality. If not defined, the default
479+
`guidance_scale` configured in guider will be used.
480480
guidance_scale (`float`, *optional*, defaults to None):
481481
A guidance scale value for guidance distilled models. Unlike the traditional classifier-free guidance,
482-
guidance distilled models take the guidance scale directly as an input parameter during forward pass. Guidance
483-
is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate images
484-
that are closely linked to the text `prompt`, usually at the expense of lower image quality.
485-
If not defined, the default `distilled_guidance_scale` configured in guider will be used.
482+
guidance distilled models take the guidance scale directly as an input parameter during forward pass.
483+
Guidance is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate
484+
images that are closely linked to the text `prompt`, usually at the expense of lower image quality. If
485+
not defined, the default `distilled_guidance_scale` configured in guider will be used.
486486
num_images_per_prompt (`int`, *optional*, defaults to 1):
487487
height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
488488
The height in pixels of the generated image. This is set to 1024 by default for the best results.
@@ -577,7 +577,7 @@ def __call__(
577577
image_latents = self._encode_vae_image(image=image, generator=generator)
578578

579579
# 3.prepare prompt embeds
580-
580+
581581
# if true_cfg_scale/guidance_scale is provided, override the guidance_scale/distilled_guidance_scale in guider at runtime
582582
is_guider_enabled = self.guider._enabled
583583
guider_kwargs = {}
@@ -640,8 +640,11 @@ def __call__(
640640
if not (hasattr(guider, "distilled_guidance_scale") and guider.distilled_guidance_scale is not None):
641641
raise ValueError("`distilled_guidance_scale` is required for guidance-distilled model.")
642642
guidance = (
643-
torch.tensor([guider.distilled_guidance_scale] * latents.shape[0], dtype=self.transformer.dtype, device=device) * 1000.0
643+
torch.tensor(
644+
[guider.distilled_guidance_scale] * latents.shape[0], dtype=self.transformer.dtype, device=device
644645
)
646+
* 1000.0
647+
)
645648

646649
if self.attention_kwargs is None:
647650
self._attention_kwargs = {}
@@ -659,7 +662,7 @@ def __call__(
659662
timestep = t.expand(latents.shape[0]).to(latents.dtype)
660663

661664
# Step 1: Collect model inputs needed for the guidance method
662-
# The `_guider_input_fields` defines which inputs model needs for conditional/unconditional predictions.
665+
# The `_guider_input_fields` defines which inputs model needs for conditional/unconditional predictions.
663666
# e.g. {"encoder_hidden_states": ("prompt_embeds", "negative_prompt_embeds")}
664667
# means the both prompt_embeds (conditional) and negative_prompt_embeds (unconditional) as inputs.
665668
guider_inputs = {}

0 commit comments

Comments
 (0)