From 2beca418adf32443753d7b5a46dbfc1f47ec045b Mon Sep 17 00:00:00 2001
From: Dustin <6962246+djdarcy@users.noreply.github.com>
Date: Tue, 7 Apr 2026 06:07:26 -0400
Subject: [PATCH 1/2] Fix noise/latent tensor mismatch when latent is nested
 but noise is not

When using LTXAV (audio+video) workflows, latent_image is a NestedTensor
but noise may be a regular tensor. Calling unbind() on non-nested noise
splits along dim=0 (channels), producing a shape mismatch at noise_scaling.

Check whether noise is nested before unbinding. If not, pad with zero-noise
for additional components (e.g. audio), which is semantically correct since
those components don't need denoising in the video sampler.
---
 comfy/samplers.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/comfy/samplers.py b/comfy/samplers.py
index 0a4d062db042..34d4a9a48e0e 100755
--- a/comfy/samplers.py
+++ b/comfy/samplers.py
@@ -1006,8 +1006,16 @@ def sample(self, noise, latent_image, sampler, sigmas, denoise_mask=None, callba
             return latent_image
 
         if latent_image.is_nested:
-            latent_image, latent_shapes = comfy.utils.pack_latents(latent_image.unbind())
-            noise, _ = comfy.utils.pack_latents(noise.unbind())
+            li_tensors = latent_image.unbind()
+            if noise.is_nested:
+                n_tensors = noise.unbind()
+            else:
+                # Noise only covers video -- pad remaining components (audio) with zeros
+                n_tensors = [noise]
+                for i in range(1, len(li_tensors)):
+                    n_tensors.append(torch.zeros_like(li_tensors[i]))
+            latent_image, latent_shapes = comfy.utils.pack_latents(li_tensors)
+            noise, _ = comfy.utils.pack_latents(n_tensors)
         else:
             latent_shapes = [latent_image.shape]
 

From 0388ac4309fd11a2001f11b1ba1e0aa088c5909c Mon Sep 17 00:00:00 2001
From: Dustin <6962246+djdarcy@users.noreply.github.com>
Date: Sun, 3 May 2026 23:52:08 -0400
Subject: [PATCH 2/2] Defensively truncate/pad nested noise components to match
 latent

When noise.is_nested with a different number of components than
latent_image, truncate extras or pad missing components with
torch.zeros_like, mirroring the denoise_mask handling pattern below.

Addresses CodeRabbit nitpick on #13318.
---
 comfy/samplers.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/comfy/samplers.py b/comfy/samplers.py
index 34d4a9a48e0e..755fb0a715d2 100755
--- a/comfy/samplers.py
+++ b/comfy/samplers.py
@@ -1008,7 +1008,10 @@ def sample(self, noise, latent_image, sampler, sigmas, denoise_mask=None, callba
         if latent_image.is_nested:
             li_tensors = latent_image.unbind()
             if noise.is_nested:
-                n_tensors = noise.unbind()
+                # Truncate extra noise components, pad missing ones with zeros
+                n_tensors = list(noise.unbind()[:len(li_tensors)])
+                for i in range(len(n_tensors), len(li_tensors)):
+                    n_tensors.append(torch.zeros_like(li_tensors[i]))
             else:
                 # Noise only covers video -- pad remaining components (audio) with zeros
                 n_tensors = [noise]