diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py index f0474487bce9..a41afde71ee0 100644 --- a/src/diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py @@ -141,6 +141,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py index 942bcb49083e..9cda1a255e4c 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py @@ -161,6 +161,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py index 4b7ca284d636..e08f9d7bb5e5 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py @@ -151,6 +151,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg diff --git a/src/diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py b/src/diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py index 8882b561f0a1..47b0e3b84006 100644 --- a/src/diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +++ b/src/diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py @@ -156,6 +156,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg diff --git a/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py b/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py index 9fab42916e9e..2b61c8e2b825 100644 --- a/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +++ b/src/diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py @@ -84,6 +84,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg diff --git a/src/diffusers/pipelines/deprecated/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py b/src/diffusers/pipelines/deprecated/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py index 16b21dd66132..ae4e059719ca 100644 --- a/src/diffusers/pipelines/deprecated/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +++ b/src/diffusers/pipelines/deprecated/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py @@ -94,6 +94,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg diff --git a/src/diffusers/pipelines/deprecated/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py b/src/diffusers/pipelines/deprecated/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py index 481c9c93ddde..40c659ac6e89 100644 --- a/src/diffusers/pipelines/deprecated/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +++ b/src/diffusers/pipelines/deprecated/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py @@ -89,6 +89,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg diff --git a/src/diffusers/pipelines/deprecated/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py b/src/diffusers/pipelines/deprecated/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py index 9af63e5044bd..64ad472a203b 100644 --- a/src/diffusers/pipelines/deprecated/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +++ b/src/diffusers/pipelines/deprecated/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py @@ -335,6 +335,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg diff --git a/src/diffusers/pipelines/easyanimate/pipeline_easyanimate.py b/src/diffusers/pipelines/easyanimate/pipeline_easyanimate.py index 6ec8f44e6d1a..8cced82c22ec 100755 --- a/src/diffusers/pipelines/easyanimate/pipeline_easyanimate.py +++ b/src/diffusers/pipelines/easyanimate/pipeline_easyanimate.py @@ -117,6 +117,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg diff --git a/src/diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py b/src/diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py index 5e07996a661c..6c67341292f6 100755 --- a/src/diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +++ b/src/diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py @@ -193,6 +193,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg diff --git a/src/diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py b/src/diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py index 872313898008..8e438d0288d2 100755 --- a/src/diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +++ b/src/diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py @@ -215,6 +215,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg diff --git a/src/diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py b/src/diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py index b908dd5dfe83..1fc95a68f652 100644 --- a/src/diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +++ b/src/diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py @@ -140,6 +140,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg diff --git a/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py b/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py index 864f9feeb5aa..30ffac8c646c 100644 --- a/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +++ b/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py @@ -260,6 +260,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg diff --git a/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py b/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py index a136770b9f26..faef69f01920 100644 --- a/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +++ b/src/diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py @@ -1722,6 +1722,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg diff --git a/src/diffusers/pipelines/ltx/pipeline_ltx.py b/src/diffusers/pipelines/ltx/pipeline_ltx.py index e2514c3bca24..3a1a5bf565b2 100644 --- a/src/diffusers/pipelines/ltx/pipeline_ltx.py +++ b/src/diffusers/pipelines/ltx/pipeline_ltx.py @@ -161,6 +161,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg diff --git a/src/diffusers/pipelines/ltx/pipeline_ltx_condition.py b/src/diffusers/pipelines/ltx/pipeline_ltx_condition.py index 539a28f56e67..92845fb1b56b 100644 --- a/src/diffusers/pipelines/ltx/pipeline_ltx_condition.py +++ b/src/diffusers/pipelines/ltx/pipeline_ltx_condition.py @@ -243,6 +243,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg diff --git a/src/diffusers/pipelines/ltx/pipeline_ltx_i2v_long_multi_prompt.py b/src/diffusers/pipelines/ltx/pipeline_ltx_i2v_long_multi_prompt.py index 838d5afc5c5a..31047f7c28d1 100644 --- a/src/diffusers/pipelines/ltx/pipeline_ltx_i2v_long_multi_prompt.py +++ b/src/diffusers/pipelines/ltx/pipeline_ltx_i2v_long_multi_prompt.py @@ -136,6 +136,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg diff --git a/src/diffusers/pipelines/ltx/pipeline_ltx_image2video.py b/src/diffusers/pipelines/ltx/pipeline_ltx_image2video.py index 497f505c4dd8..c1098d077ce9 100644 --- a/src/diffusers/pipelines/ltx/pipeline_ltx_image2video.py +++ b/src/diffusers/pipelines/ltx/pipeline_ltx_image2video.py @@ -180,6 +180,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg diff --git a/src/diffusers/pipelines/ltx2/pipeline_ltx2.py b/src/diffusers/pipelines/ltx2/pipeline_ltx2.py index 946360445e61..86f32a36e98c 100644 --- a/src/diffusers/pipelines/ltx2/pipeline_ltx2.py +++ b/src/diffusers/pipelines/ltx2/pipeline_ltx2.py @@ -176,6 +176,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg diff --git a/src/diffusers/pipelines/ltx2/pipeline_ltx2_condition.py b/src/diffusers/pipelines/ltx2/pipeline_ltx2_condition.py index a80d011015cf..4a17b44eeec7 100644 --- a/src/diffusers/pipelines/ltx2/pipeline_ltx2_condition.py +++ b/src/diffusers/pipelines/ltx2/pipeline_ltx2_condition.py @@ -226,6 +226,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg diff --git a/src/diffusers/pipelines/ltx2/pipeline_ltx2_image2video.py b/src/diffusers/pipelines/ltx2/pipeline_ltx2_image2video.py index 997bfd9fc9dc..a7a96ef864a5 100644 --- a/src/diffusers/pipelines/ltx2/pipeline_ltx2_image2video.py +++ b/src/diffusers/pipelines/ltx2/pipeline_ltx2_image2video.py @@ -196,6 +196,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg diff --git a/src/diffusers/pipelines/pag/pipeline_pag_hunyuandit.py b/src/diffusers/pipelines/pag/pipeline_pag_hunyuandit.py index 15ac665acd2b..da8103d8b8c4 100644 --- a/src/diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +++ b/src/diffusers/pipelines/pag/pipeline_pag_hunyuandit.py @@ -143,6 +143,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg diff --git a/src/diffusers/pipelines/pag/pipeline_pag_sd.py b/src/diffusers/pipelines/pag/pipeline_pag_sd.py index 26ea717556c5..7737d7b5ddc9 100644 --- a/src/diffusers/pipelines/pag/pipeline_pag_sd.py +++ b/src/diffusers/pipelines/pag/pipeline_pag_sd.py @@ -88,6 +88,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg diff --git a/src/diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py b/src/diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py index 0f6fbbd9ae16..b548a0dd4323 100644 --- a/src/diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +++ b/src/diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py @@ -115,6 +115,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg diff --git a/src/diffusers/pipelines/pag/pipeline_pag_sd_xl.py b/src/diffusers/pipelines/pag/pipeline_pag_sd_xl.py index 2987c90626ef..c99f877cc9dc 100644 --- a/src/diffusers/pipelines/pag/pipeline_pag_sd_xl.py +++ b/src/diffusers/pipelines/pag/pipeline_pag_sd_xl.py @@ -103,6 +103,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg diff --git a/src/diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py b/src/diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py index 433b9edc69b7..d6b9f12a9899 100644 --- a/src/diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +++ b/src/diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py @@ -108,6 +108,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg diff --git a/src/diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py b/src/diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py index 9caf50e5e333..a968de0beef5 100644 --- a/src/diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +++ b/src/diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py @@ -121,6 +121,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index 42d62f53a20a..03ef7611e9fa 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -86,6 +86,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py index 2f6b105702e8..8aaec22c125d 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py @@ -101,6 +101,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py index 19ccfab3de0a..f28f03e2b082 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py @@ -105,6 +105,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py index 8de7d4f0bb7d..ef7c27675d56 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py @@ -116,6 +116,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py index a6dd07847de2..7ed459a7b1f4 100644 --- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py @@ -144,6 +144,11 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0): std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True) std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True) # rescale the results from guidance (fixes overexposure) + # Guard against `std_cfg == 0` (constant/zero `noise_cfg`, which can happen at the + # beginning of a schedule or in numerical edge cases): a raw division would produce + # `nan`/`inf` and silently corrupt the diffusion output (issue #13425). When the + # standard deviation of the guided prediction is zero, the rescaling is a no-op. + std_cfg = std_cfg.clamp(min=torch.finfo(noise_cfg.dtype).eps) noise_pred_rescaled = noise_cfg * (std_text / std_cfg) # mix with the original results from guidance by factor guidance_rescale to avoid "plain looking" images noise_cfg = guidance_rescale * noise_pred_rescaled + (1 - guidance_rescale) * noise_cfg