Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
e0e3653
add universal noise and optional denoiser noise inputs
JPPhoto Apr 11, 2026
3de1a0b
Document scheduler parity limitations
JPPhoto Apr 11, 2026
1a4c1a8
Clarify external noise integration rules
JPPhoto Apr 11, 2026
eb4a0d9
chore: typegen
JPPhoto Apr 14, 2026
dbb7acb
Merge branch 'main' into universal-noise-and-denoiser-inputs
JPPhoto Apr 14, 2026
5847815
Merge branch 'main' into universal-noise-and-denoiser-inputs
JPPhoto Apr 14, 2026
4eda4ea
Merge branch 'main' into universal-noise-and-denoiser-inputs
JPPhoto Apr 14, 2026
eace497
Merge remote-tracking branch 'origin/main' into codex-tmp/update-bran…
JPPhoto Apr 14, 2026
7bea25f
Merge remote-tracking branch 'origin/main' into codex-tmp/update-bran…
JPPhoto Apr 17, 2026
a4e7326
Merge remote-tracking branch 'origin/main' into codex-tmp/update-bran…
JPPhoto Apr 17, 2026
ccd22ad
Merge remote-tracking branch 'origin/main' into codex-tmp/update-bran…
JPPhoto Apr 18, 2026
b697db8
Merge remote-tracking branch 'origin/main' into codex-tmp/update-bran…
JPPhoto Apr 18, 2026
eb41fb4
Merge remote-tracking branch 'origin/main' into codex-tmp/update-bran…
JPPhoto Apr 19, 2026
7456190
Merge branch 'main' into universal-noise-and-denoiser-inputs
Pfannkuchensack Apr 19, 2026
78b414e
Merge remote-tracking branch 'origin/main' into codex-tmp/update-bran…
JPPhoto Apr 19, 2026
1c1cd81
Merge remote-tracking branch 'origin/main' into codex-tmp/update-bran…
JPPhoto Apr 20, 2026
8799a78
Fix external noise handling regressions
JPPhoto Apr 20, 2026
5106cd9
Merge remote-tracking branch 'origin/main' into universal-noise-and-d…
JPPhoto Apr 20, 2026
d6411d4
Merge remote-tracking branch 'origin/main' into codex-tmp/update-bran…
JPPhoto Apr 20, 2026
fd7c929
Merge remote-tracking branch 'origin/main' into codex-tmp/update-bran…
JPPhoto Apr 20, 2026
125e207
Merge remote-tracking branch 'origin/main' into codex-tmp/update-bran…
JPPhoto Apr 20, 2026
d8aa3b0
Merge remote-tracking branch 'origin/main' into codex-tmp/update-bran…
JPPhoto Apr 21, 2026
a36bf12
Merge remote-tracking branch 'origin/main' into codex-tmp/update-bran…
JPPhoto Apr 21, 2026
7339710
Merge remote-tracking branch 'origin/main' into codex-tmp/update-bran…
JPPhoto Apr 21, 2026
69cbd14
Merge remote-tracking branch 'origin/main' into codex-tmp/update-bran…
JPPhoto Apr 22, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 77 additions & 8 deletions docs-old/contributing/NEW_MODEL_INTEGRATION.md
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,7 @@ class NewModelTextEncoderInvocation(BaseInvocation):
class NewModelDenoiseInvocation(BaseInvocation):
# Standard Fields
latents: LatentsField | None = InputField(default=None)
noise: LatentsField | None = InputField(default=None)
positive_conditioning: ConditioningField = InputField()
negative_conditioning: ConditioningField | None = InputField(default=None)

Expand All @@ -453,6 +454,7 @@ class NewModelDenoiseInvocation(BaseInvocation):
denoising_end: float = InputField(default=1.0, ge=0, le=1)
steps: int = InputField(default=20, ge=1)
cfg_scale: float = InputField(default=7.0)
add_noise: bool = InputField(default=True)

# Image-to-Image / Inpainting
denoise_mask: DenoiseMaskField | None = InputField(default=None)
Expand All @@ -461,16 +463,27 @@ class NewModelDenoiseInvocation(BaseInvocation):
scheduler: Literal["euler", "heun", "lcm"] = InputField(default="euler")

def invoke(self, context: InvocationContext) -> LatentsOutput:
# 1. Generate noise
noise = get_noise_newmodel(seed, height, width, ...)

# 2. Pack latents (if needed)
x = pack_newmodel(latents)
# 1. Load or generate noise
if self.noise is not None:
noise = self._load_and_validate_noise(context)
else:
noise = get_noise_newmodel(seed, height, width, ...)

# 3. Compute schedule
# 2. Compute schedule
timesteps = get_schedule_newmodel(num_steps, denoising_start, denoising_end)

# 4. Denoising loop
# 3. Prepare init latents and img2img preblend
if latents is not None and self.add_noise:
x = noise * timesteps[0] + latents * (1.0 - timesteps[0])
elif latents is not None:
x = latents
else:
x = noise

# 4. Pack latents (if needed)
x = pack_newmodel(x)

# 5. Denoising loop
x = denoise(
model=transformer,
x=x,
Expand All @@ -480,12 +493,19 @@ class NewModelDenoiseInvocation(BaseInvocation):
inpaint_extension=inpaint_extension, # For inpainting
)

# 5. Unpack latents
# 6. Unpack latents
latents = unpack_newmodel(x)

return LatentsOutput(latents=latents)
```

If the architecture supports external noise, the denoise invocation should
accept an optional `noise: LatentsField` input and preserve the existing
seed-driven path when it is not connected. Validate external noise against
the architecture's expected rank, channel count, and spatial shape before
using it. Existing workflows must continue to work unchanged when `noise` is
left disconnected.

### 4.4 VAE Encode Invocation

**File:** `invokeai/app/invocations/[newmodel]_vae_encode.py`
Expand Down Expand Up @@ -536,6 +556,9 @@ class NewModelVaeDecodeInvocation(BaseInvocation):
- [ ] Model loader invocation (`[newmodel]_model_loader.py`)
- [ ] Text encoder invocation (`[newmodel]_text_encoder.py`)
- [ ] Denoise invocation (`[newmodel]_denoise.py`)
- [ ] Add optional `noise: LatentsField` when the architecture supports
external noise
- [ ] Preserve the seed-driven fallback path when `noise` is not connected
- [ ] VAE encode invocation (`[newmodel]_vae_encode.py`)
- [ ] VAE decode invocation (`[newmodel]_vae_decode.py`)
- [ ] Define output classes (e.g., `NewModelLoaderOutput`)
Expand Down Expand Up @@ -574,6 +597,11 @@ def get_noise_newmodel(
dtype=dtype,
)

# If the architecture supports external noise, also extend
# invokeai/app/invocations/universal_noise.py when the tensor contract can be
# represented there. Only create a dedicated noise invocation when
# Universal Noise cannot express the architecture cleanly.

def pack_newmodel(x: torch.Tensor) -> torch.Tensor:
"""Pack latents for transformer input.

Expand Down Expand Up @@ -670,6 +698,13 @@ def denoise(
return img
```

If the architecture supports external noise, the denoise path should accept
validated external noise without changing the legacy seed-driven behavior.
Review img2img and inpaint preblend logic carefully when adding scheduler
support. If the initial latent/noise mix is computed before
`scheduler.set_timesteps()`, confirm that the preblend matches the
scheduler's true first effective sigma or timestep.

### 5.3 Scheduler (if model-specific)

**File:** `invokeai/backend/[newmodel]/schedulers.py` or use existing
Expand All @@ -690,11 +725,16 @@ NEWMODEL_SCHEDULER_MAP = {
### Backend Sampling and Denoise Checklist

- [ ] Noise generation (`get_noise_newmodel()`)
- [ ] Extend `invokeai/app/invocations/universal_noise.py` when the
architecture's noise tensor contract fits there
- [ ] Pack/unpack functions (if transformer-based)
- [ ] Schedule generation (`get_schedule_newmodel()`)
- [ ] Position ID generation (if needed)
- [ ] Implement denoise loop
- [ ] Validate external noise shape and rank if the architecture supports it
- [ ] Scheduler integration
- [ ] Verify img2img and inpaint preblend parity with the scheduler's first
effective timestep or sigma
- [ ] Inpaint extension integration
- [ ] Progress callbacks

Expand Down Expand Up @@ -847,6 +887,11 @@ if (
}
```

If the architecture supports external noise, do not require generated
workflows to connect it. Keep the denoise node backward compatible by
leaving `noise` disconnected unless the workflow explicitly needs external
noise.

### Frontend Graph Building Checklist

- [ ] Create graph builder (`buildNewModelGraph.ts`)
Expand Down Expand Up @@ -1209,6 +1254,25 @@ export const NewModelSchedulerSelect = () => {
- [ ] Frontend UI component
- [ ] State management

**External Noise:**
- [ ] Add optional `noise: LatentsField` input to the denoise invocation
- [ ] Validate external noise shape against the architecture's expected
latent shape
- [ ] Preserve existing behavior when `noise` is not connected
- [ ] Extend `Universal Noise` when the architecture's latent noise contract
can be represented there
- [ ] Add a dedicated architecture-compatible noise invocation only when
`Universal Noise` cannot support the architecture cleanly

If your model supports external noise, the denoise invocation should accept
it as an optional input rather than replacing the existing seed-driven path.
When possible, wire the architecture into `Universal Noise` instead of
creating a separate noise node. Only create a dedicated noise invocation if
the architecture has a noise tensor contract that `Universal Noise` cannot
express cleanly. When external noise is connected, validate rank, channel
count, and spatial shape before blending it with init latents or using it as
the initial latent state.

---

## Summary: Minimal Integration
Expand Down Expand Up @@ -1240,6 +1304,11 @@ For a **minimal txt2img integration**, the following files are required:
3. `src/features/nodes/util/graph/generation/addInpaint.ts`
4. `src/features/nodes/util/graph/generation/addOutpaint.ts`

If the architecture supports external noise, also extend
`invokeai/app/invocations/universal_noise.py` when possible and keep the
denoise invocation's `noise` input optional so existing generated workflows
continue to work without modification.

---

## Reference: Existing Implementations
Expand Down
40 changes: 37 additions & 3 deletions invokeai/app/invocations/anima_denoise.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
)
from invokeai.app.invocations.model import TransformerField
from invokeai.app.invocations.primitives import LatentsOutput
from invokeai.app.invocations.universal_noise import validate_noise_tensor_shape
from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.backend.anima.anima_transformer_patch import patch_anima_for_regional_prompting
from invokeai.backend.anima.conditioning_data import AnimaRegionalTextConditioning, AnimaTextConditioning
Expand Down Expand Up @@ -165,7 +166,7 @@ def merge_intermediate_latents_with_init_latents(
title="Denoise - Anima",
tags=["image", "anima"],
category="image",
version="1.2.0",
version="1.3.0",
classification=Classification.Prototype,
)
class AnimaDenoiseInvocation(BaseInvocation):
Expand All @@ -181,6 +182,9 @@ class AnimaDenoiseInvocation(BaseInvocation):
latents: Optional[LatentsField] = InputField(
default=None, description=FieldDescriptions.latents, input=Input.Connection
)
noise: Optional[LatentsField] = InputField(
default=None, description=FieldDescriptions.noise, input=Input.Connection
)
# denoise_mask is used for inpainting. Only the masked region is modified.
denoise_mask: Optional[DenoiseMaskField] = InputField(
default=None, description=FieldDescriptions.denoise_mask, input=Input.Connection
Expand Down Expand Up @@ -458,19 +462,35 @@ def _run_diffusion(self, context: InvocationContext) -> torch.Tensor:
if init_latents.ndim == 4:
init_latents = init_latents.unsqueeze(2) # [B, C, H, W] -> [B, C, 1, H, W]

# Generate initial noise (3D latent: [B, C, T, H, W])
noise = self._get_noise(self.height, self.width, inference_dtype, device, self.seed)
# Generate initial noise (3D latent: [B, C, T, H, W]).
# If noise will never be consumed, avoid validating/loading it.
should_ignore_noise = init_latents is not None and not self.add_noise and self.denoise_mask is None
noise: torch.Tensor | None
if should_ignore_noise:
noise = None
else:
noise = self._prepare_noise_tensor(context, inference_dtype, device)

# Prepare input latents
if init_latents is not None:
if self.add_noise:
assert noise is not None
# Noise the init latents using the first sigma from the clipped
# InvokeAI schedule.
#
# Known limitation: if the selected scheduler later starts from a
# different first effective sigma/timestep than sigmas[0], the
# img2img preblend below may not match that scheduler exactly.
# This is an existing pipeline limitation and affects both
# internally generated noise and externally supplied noise.
s_0 = sigmas[0]
latents = s_0 * noise + (1.0 - s_0) * init_latents
else:
latents = init_latents
else:
if self.denoising_start > 1e-5:
raise ValueError("denoising_start should be 0 when initial latents are not provided.")
assert noise is not None
latents = noise

if total_steps <= 0:
Expand All @@ -482,6 +502,7 @@ def _run_diffusion(self, context: InvocationContext) -> torch.Tensor:
if inpaint_mask is not None:
if init_latents is None:
raise ValueError("Initial latents are required when using an inpaint mask (image-to-image inpainting)")
assert noise is not None
inpaint_extension = AnimaInpaintExtension(
init_latents=init_latents.squeeze(2),
inpaint_mask=inpaint_mask,
Expand All @@ -503,6 +524,9 @@ def _run_diffusion(self, context: InvocationContext) -> torch.Tensor:
if not is_lcm and "sigmas" in set_timesteps_sig.parameters:
scheduler.set_timesteps(sigmas=sigmas, device=device)
else:
# LCM or a scheduler without custom-sigma support computes its own
# schedule from num_inference_steps. That can diverge from sigmas[0]
# used in the img2img preblend above.
scheduler.set_timesteps(num_inference_steps=total_steps, device=device)
num_scheduler_steps = len(scheduler.timesteps)
else:
Expand Down Expand Up @@ -696,6 +720,16 @@ def _run_transformer(ctx: torch.Tensor, x: torch.Tensor, t: torch.Tensor) -> tor
# Remove temporal dimension for output: [B, C, 1, H, W] -> [B, C, H, W]
return latents.squeeze(2)

def _prepare_noise_tensor(
self, context: InvocationContext, inference_dtype: torch.dtype, device: torch.device
) -> torch.Tensor:
if self.noise is not None:
noise = context.tensors.load(self.noise.latents_name).to(device=device, dtype=inference_dtype)
validate_noise_tensor_shape(noise, "Anima", self.width, self.height)
return noise

return self._get_noise(self.height, self.width, inference_dtype, device, self.seed)

def _build_step_callback(self, context: InvocationContext) -> Callable[[PipelineIntermediateState], None]:
def step_callback(state: PipelineIntermediateState) -> None:
context.util.sd_step_callback(state, BaseModelType.Anima)
Expand Down
34 changes: 24 additions & 10 deletions invokeai/app/invocations/cogview4_denoise.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
)
from invokeai.app.invocations.model import TransformerField
from invokeai.app.invocations.primitives import LatentsOutput
from invokeai.app.invocations.universal_noise import validate_noise_tensor_shape
from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.backend.flux.sampling_utils import clip_timestep_schedule_fractional
from invokeai.backend.model_manager.taxonomy import BaseModelType
Expand All @@ -34,7 +35,7 @@
title="Denoise - CogView4",
tags=["image", "cogview4"],
category="latents",
version="1.0.0",
version="1.1.0",
classification=Classification.Prototype,
)
class CogView4DenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
Expand All @@ -44,6 +45,9 @@ class CogView4DenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
latents: Optional[LatentsField] = InputField(
default=None, description=FieldDescriptions.latents, input=Input.Connection
)
noise: Optional[LatentsField] = InputField(
default=None, description=FieldDescriptions.noise, input=Input.Connection
)
# denoise_mask is used for image-to-image inpainting. Only the masked region is modified.
denoise_mask: Optional[DenoiseMaskField] = InputField(
default=None, description=FieldDescriptions.denoise_mask, input=Input.Connection
Expand Down Expand Up @@ -245,15 +249,7 @@ def _run_diffusion(
# Generate initial latent noise.
num_channels_latents = transformer_info.model.config.in_channels # type: ignore
assert isinstance(num_channels_latents, int)
noise = self._get_noise(
batch_size=1,
num_channels_latents=num_channels_latents,
height=self.height,
width=self.width,
dtype=inference_dtype,
device=device,
seed=self.seed,
)
noise = self._prepare_noise_tensor(context, num_channels_latents, inference_dtype, device)

# Prepare input latent image.
if init_latents is not None:
Expand Down Expand Up @@ -356,6 +352,24 @@ def _run_diffusion(

return latents

def _prepare_noise_tensor(
self, context: InvocationContext, num_channels_latents: int, inference_dtype: torch.dtype, device: torch.device
) -> torch.Tensor:
if self.noise is not None:
noise = context.tensors.load(self.noise.latents_name).to(device=device, dtype=inference_dtype)
validate_noise_tensor_shape(noise, "CogView4", self.width, self.height, num_channels=num_channels_latents)
return noise

return self._get_noise(
batch_size=1,
num_channels_latents=num_channels_latents,
height=self.height,
width=self.width,
dtype=inference_dtype,
device=device,
seed=self.seed,
)

def _build_step_callback(self, context: InvocationContext) -> Callable[[PipelineIntermediateState], None]:
def step_callback(state: PipelineIntermediateState) -> None:
context.util.sd_step_callback(state, BaseModelType.CogView4)
Expand Down
Loading
Loading