Document scheduler parity limitations

JPPhoto · JPPhoto · commit 3de1a0bf15cd · 2026-04-13T19:39:44.000-05:00
diff --git a/invokeai/app/invocations/anima_denoise.py b/invokeai/app/invocations/anima_denoise.py
@@ -468,6 +468,14 @@ def _run_diffusion(self, context: InvocationContext) -> torch.Tensor:
         # Prepare input latents
         if init_latents is not None:
             if self.add_noise:
+                # Noise the init latents using the first sigma from the clipped
+                # InvokeAI schedule.
+                #
+                # Known limitation: if the selected scheduler later starts from a
+                # different first effective sigma/timestep than sigmas[0], the
+                # img2img preblend below may not match that scheduler exactly.
+                # This is an existing pipeline limitation and affects both
+                # internally generated noise and externally supplied noise.
                 s_0 = sigmas[0]
                 latents = s_0 * noise + (1.0 - s_0) * init_latents
             else:
@@ -507,6 +515,9 @@ def _run_diffusion(self, context: InvocationContext) -> torch.Tensor:
             if not is_lcm and "sigmas" in set_timesteps_sig.parameters:
                 scheduler.set_timesteps(sigmas=sigmas, device=device)
             else:
+                # LCM or a scheduler without custom-sigma support computes its own
+                # schedule from num_inference_steps. That can diverge from sigmas[0]
+                # used in the img2img preblend above.
                 scheduler.set_timesteps(num_inference_steps=total_steps, device=device)
             num_scheduler_steps = len(scheduler.timesteps)
         else:
diff --git a/invokeai/app/invocations/flux2_denoise.py b/invokeai/app/invocations/flux2_denoise.py
@@ -305,6 +305,14 @@ def _run_diffusion(self, context: InvocationContext) -> torch.Tensor:
         # Prepare input latent image
         if init_latents is not None:
             if self.add_noise:
+                # Noise the init latents using the first timestep from the clipped
+                # InvokeAI schedule.
+                #
+                # Known limitation: if a scheduler later uses a different first
+                # effective timestep/sigma than this precomputed schedule, the
+                # img2img preblend below may not match that scheduler exactly.
+                # This is an existing pipeline limitation and applies to both
+                # seed-generated noise and externally supplied noise.
                 t_0 = timesteps[0]
                 x = t_0 * noise + (1.0 - t_0) * init_latents
             else:
@@ -376,8 +384,13 @@ def _run_diffusion(self, context: InvocationContext) -> torch.Tensor:
         is_inpainting = self.denoise_mask is not None or self.denoising_start > 1e-5
 
         # Create scheduler with FLUX.2 Klein configuration
-        # For inpainting/img2img, use manual Euler stepping to preserve the exact timestep schedule
-        # For txt2img, use the scheduler with dynamic shifting for optimal results
+        # For inpainting/img2img, use manual Euler stepping to preserve the exact
+        # clipped timestep schedule used for the initial latent/noise preblend.
+        # For txt2img, use the scheduler with dynamic shifting for optimal results.
+        #
+        # This split is intentional. Reusing a scheduler for img2img here can
+        # change the first effective timestep/sigma and break parity with the
+        # preblend computed above.
         scheduler = None
         if self.scheduler in FLUX_SCHEDULER_MAP and not is_inpainting:
             # Only use scheduler for txt2img - use manual Euler for inpainting to preserve exact timesteps
diff --git a/invokeai/app/invocations/flux_denoise.py b/invokeai/app/invocations/flux_denoise.py
@@ -307,7 +307,15 @@ def _run_diffusion(
                 )
 
             if self.add_noise:
-                # Noise the orig_latents by the appropriate amount for the first timestep.
+                # Noise the orig_latents by the appropriate amount for the first
+                # timestep in InvokeAI's clipped schedule.
+                #
+                # Known limitation: if the selected scheduler later replaces this
+                # schedule with its own first effective timestep/sigma (for example
+                # Heun internal expansion or LCM's scheduler-defined schedule), the
+                # img2img preblend below may not match that scheduler's true first
+                # step exactly. This is an existing pipeline limitation and affects
+                # both internally generated noise and externally supplied noise.
                 t_0 = timesteps[0]
                 x = t_0 * noise + (1.0 - t_0) * init_latents
             else:
diff --git a/invokeai/app/invocations/z_image_denoise.py b/invokeai/app/invocations/z_image_denoise.py
@@ -358,7 +358,14 @@ def _run_diffusion(self, context: InvocationContext) -> torch.Tensor:
         # Prepare input latent image
         if init_latents is not None:
             if self.add_noise:
-                # Noise the init_latents by the appropriate amount for the first timestep.
+                # Noise the init latents using the first sigma from the clipped
+                # InvokeAI schedule.
+                #
+                # Known limitation: if the selected scheduler later starts from a
+                # different first effective sigma/timestep than sigmas[0], the
+                # img2img preblend below may not match that scheduler exactly.
+                # This is an existing pipeline limitation and affects both
+                # internally generated noise and externally supplied noise.
                 s_0 = sigmas[0]
                 latents = s_0 * noise + (1.0 - s_0) * init_latents
             else:
@@ -403,7 +410,9 @@ def _run_diffusion(self, context: InvocationContext) -> torch.Tensor:
             if not is_lcm and "sigmas" in set_timesteps_sig.parameters:
                 scheduler.set_timesteps(sigmas=sigmas, device=device)
             else:
-                # LCM or scheduler doesn't support custom sigmas - use num_inference_steps
+                # LCM or a scheduler without custom-sigma support computes its own
+                # schedule from num_inference_steps. That can diverge from sigmas[0]
+                # used in the img2img preblend above.
                 scheduler.set_timesteps(num_inference_steps=total_steps, device=device)
 
             # For Heun scheduler, the number of actual steps may differ
diff --git a/invokeai/backend/flux/denoise.py b/invokeai/backend/flux/denoise.py
@@ -58,7 +58,11 @@ def denoise(
             scheduler.set_timesteps(sigmas=timesteps, device=img.device)
         else:
             # LCM or scheduler doesn't support custom sigmas - use num_inference_steps
-            # The schedule will be computed by the scheduler itself
+            # The schedule will be computed by the scheduler itself.
+            #
+            # Important for img2img callers: if the initial latent/noise blend was
+            # computed from a separate pre-scheduler schedule, that preblend may not
+            # match this scheduler's true first step exactly.
             num_inference_steps = len(timesteps) - 1
             scheduler.set_timesteps(num_inference_steps=num_inference_steps, device=img.device)
 
diff --git a/invokeai/backend/flux2/denoise.py b/invokeai/backend/flux2/denoise.py
@@ -99,6 +99,10 @@ def denoise(
             scheduler.set_timesteps(sigmas=sigmas.tolist(), device=img.device)
         else:
             # Scheduler doesn't support sigmas (e.g., Heun, LCM) - use num_inference_steps
+            #
+            # Important for img2img callers: if the initial latent/noise blend was
+            # computed from a separate pre-scheduler schedule, that preblend may not
+            # match this scheduler's true first step exactly.
             scheduler.set_timesteps(num_inference_steps=len(sigmas), device=img.device)
         num_scheduler_steps = len(scheduler.timesteps)
         is_heun = hasattr(scheduler, "state_in_first_order")
diff --git a/tests/app/invocations/test_denoise_noise_inputs.py b/tests/app/invocations/test_denoise_noise_inputs.py
@@ -1,3 +1,4 @@
+import inspect
 from unittest.mock import MagicMock, patch
 
 import pytest
@@ -9,6 +10,9 @@
 from invokeai.app.invocations.flux_denoise import FluxDenoiseInvocation
 from invokeai.app.invocations.sd3_denoise import SD3DenoiseInvocation
 from invokeai.app.invocations.z_image_denoise import ZImageDenoiseInvocation
+from invokeai.backend.flux.sampling_utils import clip_timestep_schedule_fractional, get_schedule
+from invokeai.backend.flux.schedulers import ANIMA_SCHEDULER_MAP, FLUX_SCHEDULER_MAP, ZIMAGE_SCHEDULER_MAP
+from invokeai.backend.flux2.sampling_utils import compute_empirical_mu, get_schedule_flux2
 
 
 def test_flux_prepare_noise_uses_external_noise():
@@ -165,3 +169,236 @@ def test_anima_prepare_noise_rejects_invalid_rank():
 
     with pytest.raises(ValueError, match="Expected noise with shape"):
         invocation._prepare_noise_tensor(mock_context, torch.bfloat16, torch.device("cpu"))
+
+
+def _get_first_scheduler_sigma(
+    scheduler, *, scheduler_name: str, sigmas: list[float], mu: float | None = None
+) -> float:
+    set_timesteps_signature = inspect.signature(scheduler.set_timesteps)
+    if scheduler_name != "lcm" and "sigmas" in set_timesteps_signature.parameters:
+        kwargs: dict[str, object] = {"sigmas": sigmas, "device": "cpu"}
+        if mu is not None and "mu" in set_timesteps_signature.parameters:
+            kwargs["mu"] = mu
+        scheduler.set_timesteps(**kwargs)
+    else:
+        scheduler.set_timesteps(num_inference_steps=len(sigmas) - 1, device="cpu")
+    return float(scheduler.sigmas[0])
+
+
+@pytest.mark.parametrize(
+    "scheduler_name",
+    [
+        "euler",
+        pytest.param(
+            "heun",
+            marks=pytest.mark.xfail(
+                reason="Known img2img preblend mismatch for FLUX with scheduler-defined first step.",
+                strict=True,
+            ),
+        ),
+        pytest.param(
+            "lcm",
+            marks=pytest.mark.xfail(
+                reason="Known img2img preblend mismatch for FLUX with scheduler-defined first step.",
+                strict=True,
+            ),
+        ),
+    ],
+)
+def test_flux_img2img_preblend_matches_scheduler_first_sigma(scheduler_name: str):
+    sigmas = clip_timestep_schedule_fractional(get_schedule(num_steps=4, image_seq_len=16, shift=True), 0.25, 1.0)
+    scheduler_class = FLUX_SCHEDULER_MAP[scheduler_name]
+    scheduler = scheduler_class(num_train_timesteps=1000)
+
+    assert sigmas[0] == pytest.approx(
+        _get_first_scheduler_sigma(scheduler, scheduler_name=scheduler_name, sigmas=sigmas)
+    )
+
+
+@pytest.mark.parametrize(
+    "scheduler_name",
+    [
+        pytest.param(
+            "euler",
+            marks=pytest.mark.xfail(
+                reason="Known img2img preblend mismatch for FLUX.2 scheduler path.",
+                strict=True,
+            ),
+        ),
+        pytest.param(
+            "heun",
+            marks=pytest.mark.xfail(
+                reason="Known img2img preblend mismatch for FLUX.2 scheduler path.",
+                strict=True,
+            ),
+        ),
+        pytest.param(
+            "lcm",
+            marks=pytest.mark.xfail(
+                reason="Known FLUX.2 scheduler-path limitation for img2img parity.",
+                strict=True,
+            ),
+        ),
+    ],
+)
+def test_flux2_img2img_preblend_matches_scheduler_first_sigma(scheduler_name: str):
+    sigmas = clip_timestep_schedule_fractional(get_schedule_flux2(num_steps=4, image_seq_len=16), 0.25, 1.0)
+    mu = compute_empirical_mu(image_seq_len=16, num_steps=4)
+    scheduler_class = FLUX_SCHEDULER_MAP[scheduler_name]
+    if scheduler_name == "heun":
+        scheduler = scheduler_class(num_train_timesteps=1000, shift=3.0)
+    else:
+        scheduler = scheduler_class(
+            num_train_timesteps=1000,
+            shift=3.0,
+            use_dynamic_shifting=True,
+            base_shift=0.5,
+            max_shift=1.15,
+            base_image_seq_len=256,
+            max_image_seq_len=4096,
+            time_shift_type="exponential",
+        )
+
+    assert sigmas[0] == pytest.approx(
+        _get_first_scheduler_sigma(scheduler, scheduler_name=scheduler_name, sigmas=sigmas[:-1], mu=mu)
+    )
+
+
+@pytest.mark.parametrize(
+    "scheduler_name",
+    [
+        "euler",
+        pytest.param(
+            "heun",
+            marks=pytest.mark.xfail(
+                reason="Known img2img preblend mismatch for Z-Image with scheduler-defined first step.",
+                strict=True,
+            ),
+        ),
+        pytest.param(
+            "lcm",
+            marks=pytest.mark.xfail(
+                reason="Known img2img preblend mismatch for Z-Image with scheduler-defined first step.",
+                strict=True,
+            ),
+        ),
+    ],
+)
+def test_z_image_img2img_preblend_matches_scheduler_first_sigma(scheduler_name: str):
+    invocation = ZImageDenoiseInvocation.model_construct(steps=8, width=1024, height=1024)
+    img_seq_len = (invocation.height // 8 // 2) * (invocation.width // 8 // 2)
+    shift = invocation._calculate_shift(img_seq_len)
+    sigmas = invocation._get_sigmas(shift, invocation.steps)
+    sigmas = sigmas[int(0.25 * (len(sigmas) - 1)) :]
+    scheduler_class = ZIMAGE_SCHEDULER_MAP[scheduler_name]
+    scheduler = scheduler_class(num_train_timesteps=1000, shift=1.0)
+
+    assert sigmas[0] == pytest.approx(
+        _get_first_scheduler_sigma(scheduler, scheduler_name=scheduler_name, sigmas=sigmas)
+    )
+
+
+@pytest.mark.parametrize(
+    "scheduler_name",
+    [
+        "euler",
+        pytest.param(
+            "heun",
+            marks=pytest.mark.xfail(
+                reason="Known img2img preblend mismatch for Anima with scheduler-defined first step.",
+                strict=True,
+            ),
+        ),
+        pytest.param(
+            "lcm",
+            marks=pytest.mark.xfail(
+                reason="Known img2img preblend mismatch for Anima with scheduler-defined first step.",
+                strict=True,
+            ),
+        ),
+    ],
+)
+def test_anima_img2img_preblend_matches_scheduler_first_sigma(scheduler_name: str):
+    invocation = AnimaDenoiseInvocation.model_construct(steps=30)
+    sigmas = invocation._get_sigmas(invocation.steps)
+    sigmas = sigmas[int(0.25 * (len(sigmas) - 1)) :]
+    scheduler_class = ANIMA_SCHEDULER_MAP[scheduler_name]
+    scheduler = scheduler_class(num_train_timesteps=1000, shift=1.0)
+
+    assert sigmas[0] == pytest.approx(
+        _get_first_scheduler_sigma(scheduler, scheduler_name=scheduler_name, sigmas=sigmas)
+    )
+
+
+def test_sd3_partial_denoise_short_circuit_uses_first_clipped_timestep():
+    invocation = SD3DenoiseInvocation.model_construct(
+        latents=MagicMock(latents_name="latents"),
+        width=64,
+        height=64,
+        steps=4,
+        denoising_start=0.25,
+        denoising_end=0.25,
+        positive_conditioning=MagicMock(conditioning_name="positive"),
+        negative_conditioning=MagicMock(conditioning_name="negative"),
+        transformer=MagicMock(transformer="transformer"),
+        seed=0,
+    )
+    init_latents = torch.full((1, 16, 8, 8), 2.0)
+    noise = torch.full((1, 16, 8, 8), 10.0)
+    mock_context = MagicMock()
+    mock_context.tensors.load.return_value = init_latents
+    mock_context.models.load.return_value = MagicMock(
+        model=MagicMock(config=MagicMock(in_channels=16, joint_attention_dim=4096))
+    )
+
+    with (
+        patch("invokeai.app.invocations.sd3_denoise.TorchDevice.choose_torch_device", return_value=torch.device("cpu")),
+        patch("invokeai.app.invocations.sd3_denoise.TorchDevice.choose_torch_dtype", return_value=torch.float32),
+        patch.object(invocation, "_prepare_noise_tensor", return_value=noise),
+        patch.object(invocation, "_load_text_conditioning", return_value=(torch.zeros(1, 1, 1), torch.zeros(1, 1))),
+    ):
+        result = invocation._run_diffusion(mock_context)
+
+    timesteps = clip_timestep_schedule_fractional(torch.linspace(1, 0, invocation.steps + 1).tolist(), 0.25, 0.25)
+    expected = timesteps[0] * noise + (1.0 - timesteps[0]) * init_latents
+    assert torch.equal(result, expected)
+
+
+def test_cogview4_partial_denoise_short_circuit_uses_first_clipped_sigma():
+    invocation = CogView4DenoiseInvocation.model_construct(
+        latents=MagicMock(latents_name="latents"),
+        width=64,
+        height=64,
+        steps=4,
+        denoising_start=0.25,
+        denoising_end=0.25,
+        positive_conditioning=MagicMock(conditioning_name="positive"),
+        negative_conditioning=MagicMock(conditioning_name="negative"),
+        transformer=MagicMock(transformer="transformer"),
+        seed=0,
+    )
+    init_latents = torch.full((1, 16, 8, 8), 2.0)
+    noise = torch.full((1, 16, 8, 8), 10.0)
+    mock_context = MagicMock()
+    mock_context.tensors.load.return_value = init_latents
+    transformer_model = MagicMock(config=MagicMock(in_channels=16, patch_size=2))
+    mock_context.models.load.return_value = MagicMock(model=transformer_model)
+
+    with (
+        patch("invokeai.app.invocations.cogview4_denoise.CogView4Transformer2DModel", object),
+        patch(
+            "invokeai.app.invocations.cogview4_denoise.TorchDevice.choose_torch_device",
+            return_value=torch.device("cpu"),
+        ),
+        patch.object(invocation, "_prepare_noise_tensor", return_value=noise),
+        patch.object(invocation, "_load_text_conditioning", return_value=torch.zeros(1, 1, 1)),
+    ):
+        result = invocation._run_diffusion(mock_context)
+
+    timesteps = clip_timestep_schedule_fractional(torch.linspace(1, 0, invocation.steps + 1).tolist(), 0.25, 0.25)
+    sigmas = invocation._convert_timesteps_to_sigmas(
+        image_seq_len=((invocation.height // 8) * (invocation.width // 8)) // (2**2),
+        timesteps=torch.tensor(timesteps),
+    )
+    expected = sigmas[0] * noise + (1.0 - sigmas[0]) * init_latents
+    assert torch.allclose(result, expected, atol=2e-3, rtol=0)