Skip to content

Commit b56108b

Browse files
committed
Adds SwarmVideoResampleFPS; resamples controlnet preview videos
1 parent 438d7a4 commit b56108b

4 files changed

Lines changed: 148 additions & 1 deletion

File tree

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
from __future__ import annotations
2+
3+
import logging
4+
5+
import math
6+
import torch
7+
from comfy_api.latest import io
8+
9+
logger = logging.getLogger(__name__)
10+
11+
12+
class SwarmVideoResampleFPS(io.ComfyNode):
13+
MIN_FPS: float = 1.0
14+
MAX_FPS: float = 120.0
15+
STEP_FPS: float = 1.0
16+
DEFAULT_FPS_OUT: float = 24.0
17+
METHOD_LINEAR: str = "linear"
18+
METHOD_NEAREST: str = "nearest"
19+
20+
@classmethod
21+
def define_schema(cls) -> io.Schema:
22+
return io.Schema(
23+
node_id="SwarmVideoResampleFPS",
24+
display_name="Swarm Video Resample FPS",
25+
category="SwarmUI/video",
26+
description="Resample a video from fps_in to fps_out while preserving total duration.",
27+
inputs=[
28+
io.Image.Input(
29+
"images",
30+
tooltip="The images to resample.",
31+
),
32+
io.Float.Input(
33+
"fps_in",
34+
min=cls.MIN_FPS,
35+
max=cls.MAX_FPS,
36+
step=cls.STEP_FPS,
37+
tooltip="Source frame rate.",
38+
),
39+
io.Float.Input(
40+
"fps_out",
41+
default=cls.DEFAULT_FPS_OUT,
42+
min=cls.MIN_FPS,
43+
max=cls.MAX_FPS,
44+
step=cls.STEP_FPS,
45+
tooltip="Target frame rate.",
46+
),
47+
io.Combo.Input(
48+
"method",
49+
options=[cls.METHOD_LINEAR, cls.METHOD_NEAREST],
50+
default=cls.METHOD_LINEAR,
51+
tooltip=(
52+
"linear: each output frame is a linear blend of the two source frames bracketing its timestamp. "
53+
"Equivalent to ffmpeg's framerate filter. Slightly more expensive; avoids the duplicated-frame artifact. "
54+
"See https://ffmpeg.org/ffmpeg-filters.html#framerate\n"
55+
"nearest: each output frame is the source frame closest in time. "
56+
"Equivalent to ffmpeg's fps filter. Cheap; can produce visible judder on pans. "
57+
"See https://ffmpeg.org/ffmpeg-filters.html#fps-1"
58+
),
59+
),
60+
],
61+
outputs=[
62+
io.Image.Output("images"),
63+
io.Float.Output("fps"),
64+
],
65+
)
66+
67+
@classmethod
68+
@torch.inference_mode()
69+
def execute(cls, images: torch.Tensor, fps_in: float, fps_out: float, method: str) -> io.NodeOutput:
70+
if fps_in <= 0 or fps_out <= 0:
71+
raise ValueError(f"SwarmVideoResampleFPS: fps_in and fps_out must be positive (got {fps_in}, {fps_out})")
72+
73+
frame_count_in = int(images.shape[0])
74+
if frame_count_in <= 1 or math.isclose(fps_in, fps_out):
75+
return io.NodeOutput(images, float(fps_out))
76+
77+
duration_sec = frame_count_in / fps_in
78+
frame_count_out = max(1, round(duration_sec * fps_out))
79+
source_positions = cls._source_positions(frame_count_out, fps_in, fps_out, images.device)
80+
81+
if method == cls.METHOD_NEAREST:
82+
resampled = cls._sample_nearest(images, source_positions)
83+
else:
84+
resampled = cls._sample_linear(images, source_positions)
85+
86+
logger.info(
87+
"SwarmVideoResampleFPS: %d frames @ %s fps -> %d frames @ %s fps (%s)",
88+
frame_count_in, fps_in, frame_count_out, fps_out, method,
89+
)
90+
return io.NodeOutput(resampled, float(fps_out))
91+
92+
@classmethod
93+
def _source_positions(cls, frame_count_out: int, fps_in: float, fps_out: float, device: torch.device) -> torch.Tensor:
94+
"""Fractional source-frame index for each output frame.
95+
96+
Each output frame should display what the source had at the same
97+
timestamp. The output frame at index i plays at time i / fps_out, and
98+
the source frame visible at that time is at index i * (fps_in / fps_out).
99+
"""
100+
output_indices = torch.arange(frame_count_out, dtype=torch.float64, device=device)
101+
return output_indices * (fps_in / fps_out)
102+
103+
@classmethod
104+
def _sample_nearest(cls, source_frames: torch.Tensor, source_positions: torch.Tensor) -> torch.Tensor:
105+
"""Pick the closest source frame for each fractional position.
106+
107+
See https://ffmpeg.org/ffmpeg-filters.html#fps-1
108+
"""
109+
last_idx = source_frames.shape[0] - 1
110+
nearest_idx = torch.clamp(source_positions.round().long(), 0, last_idx)
111+
return source_frames[nearest_idx].contiguous()
112+
113+
@classmethod
114+
def _sample_linear(cls, source_frames: torch.Tensor, source_positions: torch.Tensor) -> torch.Tensor:
115+
"""Linearly blend the two source frames bracketing each fractional position.
116+
117+
See https://ffmpeg.org/ffmpeg-filters.html#framerate
118+
"""
119+
last_idx = source_frames.shape[0] - 1
120+
lower_idx = torch.clamp(source_positions.floor().long(), 0, last_idx)
121+
upper_idx = torch.clamp(lower_idx + 1, 0, last_idx)
122+
123+
blend_weight = (source_positions - lower_idx.to(torch.float64)).to(source_frames.dtype)
124+
# Reshape weight to [N_out, 1, 1, ...] so it broadcasts across the H/W/C
125+
# dims of the per-frame tensors during the blend.
126+
broadcast_shape = (-1,) + (1,) * (source_frames.ndim - 1)
127+
blend_weight = blend_weight.view(*broadcast_shape)
128+
129+
return ((1.0 - blend_weight) * source_frames[lower_idx] + blend_weight * source_frames[upper_idx]).contiguous()
130+
131+
132+
NODE_CLASS_MAPPINGS = {
133+
"SwarmVideoResampleFPS": SwarmVideoResampleFPS,
134+
}

src/BuiltinExtensions/ComfyUIBackend/ExtraNodes/SwarmComfyCommon/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import os, folder_paths
22

3-
from . import SwarmBlending, SwarmClipSeg, SwarmImages, SwarmInternalUtil, SwarmKSampler, SwarmLoadImageB64, SwarmLoraLoader, SwarmMasks, SwarmSaveImageWS, SwarmTiling, SwarmExtractLora, SwarmUnsampler, SwarmLatents, SwarmInputNodes, SwarmTextHandling, SwarmReference, SwarmMath, SwarmSam2, SwarmAudio
3+
from . import SwarmBlending, SwarmClipSeg, SwarmImages, SwarmInternalUtil, SwarmKSampler, SwarmLoadImageB64, SwarmLoraLoader, SwarmMasks, SwarmSaveImageWS, SwarmTiling, SwarmExtractLora, SwarmUnsampler, SwarmLatents, SwarmInputNodes, SwarmTextHandling, SwarmReference, SwarmMath, SwarmSam2, SwarmAudio, SwarmVideo
44

55
WEB_DIRECTORY = "./web"
66

@@ -24,6 +24,7 @@
2424
| SwarmMath.NODE_CLASS_MAPPINGS
2525
| SwarmSam2.NODE_CLASS_MAPPINGS
2626
| SwarmAudio.NODE_CLASS_MAPPINGS
27+
| SwarmVideo.NODE_CLASS_MAPPINGS
2728
)
2829

2930
# TODO: Why is there no comfy core register method? 0.o

src/BuiltinExtensions/ComfyUIBackend/WorkflowGenerator.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -471,6 +471,7 @@ public WGNodeData LoadImage(ImageFile img, string param, bool resize, string nod
471471
{
472472
["video"] = NodePath(result, 0)
473473
});
474+
NodeHelpers["video_components_split"] = splitNode;
474475
result = splitNode;
475476
attachedAudio = new([splitNode, 1], this, WGNodeData.DT_AUDIO, CurrentCompat());
476477
}

src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorSteps.cs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,6 +1090,17 @@ bool getBestFor(string phrase)
10901090
["scale_method"] = "lanczos"
10911091
});
10921092
imageNodeActual = imageNodeActual.WithPath([multipleOf8, 0]);
1093+
if (imageNodeActual.DataType == WGNodeData.DT_VIDEO && g.NodeHelpers.TryGetValue("video_components_split", out string splitNodeId))
1094+
{
1095+
string resampleNode = g.CreateNode("SwarmVideoResampleFPS", new JObject()
1096+
{
1097+
["images"] = imageNodeActual.Path,
1098+
["fps_in"] = NodePath(splitNodeId, 2),
1099+
["fps_out"] = 24.0,
1100+
["method"] = "linear"
1101+
});
1102+
imageNodeActual = imageNodeActual.WithPath([resampleNode, 0]);
1103+
}
10931104
if (g.UserInput.Get(T2IParamTypes.ControlNetPreviewOnly))
10941105
{
10951106
g.CurrentMedia = imageNodeActual;

0 commit comments

Comments
 (0)