1919import torch
2020from transformers import Qwen2_5_VLForConditionalGeneration , Qwen2Tokenizer
2121
22+ from ...guiders import ClassifierFreeGuidance
2223from ...image_processor import PipelineImageInput , VaeImageProcessor
2324from ...models import AutoencoderKLHunyuanImageRefiner , HunyuanImageTransformer2DModel
2425from ...schedulers import FlowMatchEulerDiscreteScheduler
2526from ...utils import is_torch_xla_available , logging , replace_example_docstring
2627from ...utils .torch_utils import randn_tensor
2728from ..pipeline_utils import DiffusionPipeline
2829from .pipeline_output import HunyuanImagePipelineOutput
29- from ...guiders import ClassifierFreeGuidance
3030
3131
3232if is_torch_xla_available ():
@@ -150,7 +150,7 @@ class HunyuanImageRefinerPipeline(DiffusionPipeline):
150150 """
151151
152152 model_cpu_offload_seq = "text_encoder->transformer->vae"
153- _callback_tensor_inputs = ["latents" , "prompt_embeds" ]
153+ _callback_tensor_inputs = ["latents" , "prompt_embeds" ]
154154 _guider_input_fields = {
155155 "encoder_hidden_states" : ("prompt_embeds" , "negative_prompt_embeds" ),
156156 "encoder_attention_mask" : ("prompt_embeds_mask" , "negative_prompt_embeds_mask" ),
@@ -224,7 +224,7 @@ def _get_qwen_prompt_embeds(
224224
225225 def encode_prompt (
226226 self ,
227- prompt : Optional [Union [str , List [str ]]] = None ,
227+ prompt : Optional [Union [str , List [str ]]] = None ,
228228 device : Optional [torch .device ] = None ,
229229 num_images_per_prompt : int = 1 ,
230230 prompt_embeds : Optional [torch .Tensor ] = None ,
@@ -464,8 +464,8 @@ def __call__(
464464 The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
465465 instead.
466466 negative_prompt (`str` or `List[str]`, *optional*):
467- The prompt or prompts not to guide the image generation. If not defined, will use an empty negative prompt.
468- Ignored when not using guidance (i.e., ignored if any of the following conditions are met:
467+ The prompt or prompts not to guide the image generation. If not defined, will use an empty negative
468+ prompt. Ignored when not using guidance (i.e., ignored if any of the following conditions are met:
469469 1. guider is diabled
470470 2. guider.guidance_scale is not greater than `1` and `true_cfg_scale` is not provided,
471471 3. `true_cfg_scale` is not greater than `1`.
@@ -474,15 +474,15 @@ def __call__(
474474 Guidance scale as defined in [Classifier-Free Diffusion
475475 Guidance](https://huggingface.co/papers/2207.12598). `true_cfg_scale` is defined as `w` of equation 2.
476476 of [Imagen Paper](https://huggingface.co/papers/2205.11487). Classifier-free guidance is enabled by
477- setting `true_cfg_scale > 1`. Higher guidance scale encourages to
478- generate images that are closely linked to the text `prompt`, usually at the expense of lower image
479- quality. If not defined, the default `guidance_scale` configured in guider will be used.
477+ setting `true_cfg_scale > 1`. Higher guidance scale encourages to generate images that are closely
478+ linked to the text `prompt`, usually at the expense of lower image quality. If not defined, the default
479+ `guidance_scale` configured in guider will be used.
480480 guidance_scale (`float`, *optional*, defaults to None):
481481 A guidance scale value for guidance distilled models. Unlike the traditional classifier-free guidance,
482- guidance distilled models take the guidance scale directly as an input parameter during forward pass. Guidance
483- is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate images
484- that are closely linked to the text `prompt`, usually at the expense of lower image quality.
485- If not defined, the default `distilled_guidance_scale` configured in guider will be used.
482+ guidance distilled models take the guidance scale directly as an input parameter during forward pass.
483+ Guidance is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate
484+ images that are closely linked to the text `prompt`, usually at the expense of lower image quality. If
485+ not defined, the default `distilled_guidance_scale` configured in guider will be used.
486486 num_images_per_prompt (`int`, *optional*, defaults to 1):
487487 height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
488488 The height in pixels of the generated image. This is set to 1024 by default for the best results.
@@ -577,7 +577,7 @@ def __call__(
577577 image_latents = self ._encode_vae_image (image = image , generator = generator )
578578
579579 # 3.prepare prompt embeds
580-
580+
581581 # if true_cfg_scale/guidance_scale is provided, override the guidance_scale/distilled_guidance_scale in guider at runtime
582582 is_guider_enabled = self .guider ._enabled
583583 guider_kwargs = {}
@@ -640,8 +640,11 @@ def __call__(
640640 if not (hasattr (guider , "distilled_guidance_scale" ) and guider .distilled_guidance_scale is not None ):
641641 raise ValueError ("`distilled_guidance_scale` is required for guidance-distilled model." )
642642 guidance = (
643- torch .tensor ([guider .distilled_guidance_scale ] * latents .shape [0 ], dtype = self .transformer .dtype , device = device ) * 1000.0
643+ torch .tensor (
644+ [guider .distilled_guidance_scale ] * latents .shape [0 ], dtype = self .transformer .dtype , device = device
644645 )
646+ * 1000.0
647+ )
645648
646649 if self .attention_kwargs is None :
647650 self ._attention_kwargs = {}
@@ -659,7 +662,7 @@ def __call__(
659662 timestep = t .expand (latents .shape [0 ]).to (latents .dtype )
660663
661664 # Step 1: Collect model inputs needed for the guidance method
662- # The `_guider_input_fields` defines which inputs model needs for conditional/unconditional predictions.
665+ # The `_guider_input_fields` defines which inputs model needs for conditional/unconditional predictions.
663666 # e.g. {"encoder_hidden_states": ("prompt_embeds", "negative_prompt_embeds")}
664667 # means the both prompt_embeds (conditional) and negative_prompt_embeds (unconditional) as inputs.
665668 guider_inputs = {}
0 commit comments