Adds SwarmVideoResampleFPS; resamples controlnet preview videos

jtreminio · jtreminio · commit b56108b329c5 · 2026-05-19T22:36:48.000-05:00
diff --git a/src/BuiltinExtensions/ComfyUIBackend/ExtraNodes/SwarmComfyCommon/SwarmVideo.py b/src/BuiltinExtensions/ComfyUIBackend/ExtraNodes/SwarmComfyCommon/SwarmVideo.py
@@ -0,0 +1,134 @@
+from __future__ import annotations
+
+import logging
+
+import math
+import torch
+from comfy_api.latest import io
+
+logger = logging.getLogger(__name__)
+
+
+class SwarmVideoResampleFPS(io.ComfyNode):
+    MIN_FPS: float = 1.0
+    MAX_FPS: float = 120.0
+    STEP_FPS: float = 1.0
+    DEFAULT_FPS_OUT: float = 24.0
+    METHOD_LINEAR: str = "linear"
+    METHOD_NEAREST: str = "nearest"
+
+    @classmethod
+    def define_schema(cls) -> io.Schema:
+        return io.Schema(
+            node_id="SwarmVideoResampleFPS",
+            display_name="Swarm Video Resample FPS",
+            category="SwarmUI/video",
+            description="Resample a video from fps_in to fps_out while preserving total duration.",
+            inputs=[
+                io.Image.Input(
+                    "images",
+                    tooltip="The images to resample.",
+                ),
+                io.Float.Input(
+                    "fps_in",
+                    min=cls.MIN_FPS,
+                    max=cls.MAX_FPS,
+                    step=cls.STEP_FPS,
+                    tooltip="Source frame rate.",
+                ),
+                io.Float.Input(
+                    "fps_out",
+                    default=cls.DEFAULT_FPS_OUT,
+                    min=cls.MIN_FPS,
+                    max=cls.MAX_FPS,
+                    step=cls.STEP_FPS,
+                    tooltip="Target frame rate.",
+                ),
+                io.Combo.Input(
+                    "method",
+                    options=[cls.METHOD_LINEAR, cls.METHOD_NEAREST],
+                    default=cls.METHOD_LINEAR,
+                    tooltip=(
+                        "linear: each output frame is a linear blend of the two source frames bracketing its timestamp. "
+                        "Equivalent to ffmpeg's framerate filter. Slightly more expensive; avoids the duplicated-frame artifact. "
+                        "See https://ffmpeg.org/ffmpeg-filters.html#framerate\n"
+                        "nearest: each output frame is the source frame closest in time. "
+                        "Equivalent to ffmpeg's fps filter. Cheap; can produce visible judder on pans. "
+                        "See https://ffmpeg.org/ffmpeg-filters.html#fps-1"
+                    ),
+                ),
+            ],
+            outputs=[
+                io.Image.Output("images"),
+                io.Float.Output("fps"),
+            ],
+        )
+
+    @classmethod
+    @torch.inference_mode()
+    def execute(cls, images: torch.Tensor, fps_in: float, fps_out: float, method: str) -> io.NodeOutput:
+        if fps_in <= 0 or fps_out <= 0:
+            raise ValueError(f"SwarmVideoResampleFPS: fps_in and fps_out must be positive (got {fps_in}, {fps_out})")
+
+        frame_count_in = int(images.shape[0])
+        if frame_count_in <= 1 or math.isclose(fps_in, fps_out):
+            return io.NodeOutput(images, float(fps_out))
+
+        duration_sec = frame_count_in / fps_in
+        frame_count_out = max(1, round(duration_sec * fps_out))
+        source_positions = cls._source_positions(frame_count_out, fps_in, fps_out, images.device)
+
+        if method == cls.METHOD_NEAREST:
+            resampled = cls._sample_nearest(images, source_positions)
+        else:
+            resampled = cls._sample_linear(images, source_positions)
+
+        logger.info(
+            "SwarmVideoResampleFPS: %d frames @ %s fps -> %d frames @ %s fps (%s)",
+            frame_count_in, fps_in, frame_count_out, fps_out, method,
+        )
+        return io.NodeOutput(resampled, float(fps_out))
+
+    @classmethod
+    def _source_positions(cls, frame_count_out: int, fps_in: float, fps_out: float, device: torch.device) -> torch.Tensor:
+        """Fractional source-frame index for each output frame.
+
+        Each output frame should display what the source had at the same
+        timestamp. The output frame at index i plays at time i / fps_out, and
+        the source frame visible at that time is at index i * (fps_in / fps_out).
+        """
+        output_indices = torch.arange(frame_count_out, dtype=torch.float64, device=device)
+        return output_indices * (fps_in / fps_out)
+
+    @classmethod
+    def _sample_nearest(cls, source_frames: torch.Tensor, source_positions: torch.Tensor) -> torch.Tensor:
+        """Pick the closest source frame for each fractional position.
+        
+        See https://ffmpeg.org/ffmpeg-filters.html#fps-1
+        """
+        last_idx = source_frames.shape[0] - 1
+        nearest_idx = torch.clamp(source_positions.round().long(), 0, last_idx)
+        return source_frames[nearest_idx].contiguous()
+
+    @classmethod
+    def _sample_linear(cls, source_frames: torch.Tensor, source_positions: torch.Tensor) -> torch.Tensor:
+        """Linearly blend the two source frames bracketing each fractional position.
+        
+        See https://ffmpeg.org/ffmpeg-filters.html#framerate
+        """
+        last_idx = source_frames.shape[0] - 1
+        lower_idx = torch.clamp(source_positions.floor().long(), 0, last_idx)
+        upper_idx = torch.clamp(lower_idx + 1, 0, last_idx)
+
+        blend_weight = (source_positions - lower_idx.to(torch.float64)).to(source_frames.dtype)
+        # Reshape weight to [N_out, 1, 1, ...] so it broadcasts across the H/W/C
+        # dims of the per-frame tensors during the blend.
+        broadcast_shape = (-1,) + (1,) * (source_frames.ndim - 1)
+        blend_weight = blend_weight.view(*broadcast_shape)
+
+        return ((1.0 - blend_weight) * source_frames[lower_idx] + blend_weight * source_frames[upper_idx]).contiguous()
+
+
+NODE_CLASS_MAPPINGS = {
+    "SwarmVideoResampleFPS": SwarmVideoResampleFPS,
+}
diff --git a/src/BuiltinExtensions/ComfyUIBackend/ExtraNodes/SwarmComfyCommon/__init__.py b/src/BuiltinExtensions/ComfyUIBackend/ExtraNodes/SwarmComfyCommon/__init__.py
@@ -1,6 +1,6 @@
 import os, folder_paths
 
-from . import SwarmBlending, SwarmClipSeg, SwarmImages, SwarmInternalUtil, SwarmKSampler, SwarmLoadImageB64, SwarmLoraLoader, SwarmMasks, SwarmSaveImageWS, SwarmTiling, SwarmExtractLora, SwarmUnsampler, SwarmLatents, SwarmInputNodes, SwarmTextHandling, SwarmReference, SwarmMath, SwarmSam2, SwarmAudio
+from . import SwarmBlending, SwarmClipSeg, SwarmImages, SwarmInternalUtil, SwarmKSampler, SwarmLoadImageB64, SwarmLoraLoader, SwarmMasks, SwarmSaveImageWS, SwarmTiling, SwarmExtractLora, SwarmUnsampler, SwarmLatents, SwarmInputNodes, SwarmTextHandling, SwarmReference, SwarmMath, SwarmSam2, SwarmAudio, SwarmVideo
 
 WEB_DIRECTORY = "./web"
 
@@ -24,6 +24,7 @@
     | SwarmMath.NODE_CLASS_MAPPINGS
     | SwarmSam2.NODE_CLASS_MAPPINGS
     | SwarmAudio.NODE_CLASS_MAPPINGS
+    | SwarmVideo.NODE_CLASS_MAPPINGS
 )
 
 # TODO: Why is there no comfy core register method? 0.o
diff --git a/src/BuiltinExtensions/ComfyUIBackend/WorkflowGenerator.cs b/src/BuiltinExtensions/ComfyUIBackend/WorkflowGenerator.cs
@@ -471,6 +471,7 @@ public WGNodeData LoadImage(ImageFile img, string param, bool resize, string nod
                     {
                         ["video"] = NodePath(result, 0)
                     });
+                    NodeHelpers["video_components_split"] = splitNode;
                     result = splitNode;
                     attachedAudio = new([splitNode, 1], this, WGNodeData.DT_AUDIO, CurrentCompat());
                 }
diff --git a/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorSteps.cs b/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorSteps.cs
@@ -1090,6 +1090,17 @@ bool getBestFor(string phrase)
                             ["scale_method"] = "lanczos"
                         });
                         imageNodeActual = imageNodeActual.WithPath([multipleOf8, 0]);
+                        if (imageNodeActual.DataType == WGNodeData.DT_VIDEO && g.NodeHelpers.TryGetValue("video_components_split", out string splitNodeId))
+                        {
+                            string resampleNode = g.CreateNode("SwarmVideoResampleFPS", new JObject()
+                            {
+                                ["images"] = imageNodeActual.Path,
+                                ["fps_in"] = NodePath(splitNodeId, 2),
+                                ["fps_out"] = 24.0,
+                                ["method"] = "linear"
+                            });
+                            imageNodeActual = imageNodeActual.WithPath([resampleNode, 0]);
+                        }
                         if (g.UserInput.Get(T2IParamTypes.ControlNetPreviewOnly))
                         {
                             g.CurrentMedia = imageNodeActual;

Original file line number	Diff line number	Diff line change
`@@ -471,6 +471,7 @@ public WGNodeData LoadImage(ImageFile img, string param, bool resize, string nod`
`471`	`471`	`{`
`472`	`472`	`["video"] = NodePath(result, 0)`
`473`	`473`	`});`
	`474`	`+ NodeHelpers["video_components_split"] = splitNode;`
`474`	`475`	`result = splitNode;`
`475`	`476`	`attachedAudio = new([splitNode, 1], this, WGNodeData.DT_AUDIO, CurrentCompat());`
`476`	`477`	`}`