Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
from __future__ import annotations

import logging
import math

import torch
from comfy_api.latest import io

logger = logging.getLogger(__name__)


class SwarmVideoResampleFPS(io.ComfyNode):
MIN_FPS: float = 1.0
MAX_FPS_IN: float = 1000.0
MAX_FPS_OUT: float = 120.0
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this split is silly, just allow [0.01, 99999] there's no reason to have strict range limits anywhere here except creating future edge case bugs to fix

STEP_FPS: float = 1.0
DEFAULT_FPS_OUT: float = 24.0
METHOD_LINEAR: str = "linear"
METHOD_NEAREST: str = "nearest"

@classmethod
def define_schema(cls) -> io.Schema:
return io.Schema(
node_id="SwarmVideoResampleFPS",
display_name="Swarm Video Resample FPS",
category="SwarmUI/video",
description="Resample a video from fps_in to fps_out while preserving total duration.",
inputs=[
io.Image.Input("images", tooltip="The images to resample."),
io.Float.Input("fps_in", min=cls.MIN_FPS, max=cls.MAX_FPS_IN, step=cls.STEP_FPS, tooltip="Source frame rate."),
io.Float.Input("fps_out", default=cls.DEFAULT_FPS_OUT, min=cls.MIN_FPS, max=cls.MAX_FPS_OUT, step=cls.STEP_FPS, tooltip="Target frame rate."),
io.Combo.Input("method", options=[cls.METHOD_LINEAR, cls.METHOD_NEAREST], default=cls.METHOD_LINEAR,
tooltip=(
"linear: each output frame is a linear blend of the two source frames bracketing its timestamp. "
"Equivalent to ffmpeg's framerate filter. Slightly more expensive; avoids the duplicated-frame artifact. "
"See https://ffmpeg.org/ffmpeg-filters.html#framerate\n"
"nearest: each output frame is the source frame closest in time. "
"Equivalent to ffmpeg's fps filter. Cheap; can produce visible judder on pans. "
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

weird line splits here

"See https://ffmpeg.org/ffmpeg-filters.html#fps-1"
),
),
],
outputs=[
io.Image.Output("images"),
io.Float.Output("fps"),
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unnecessary line splits

],
)

@classmethod
@torch.inference_mode()
def execute(cls, images: torch.Tensor, fps_in: float, fps_out: float, method: str) -> io.NodeOutput:
if fps_in <= 0 or fps_out <= 0:
raise ValueError(f"SwarmVideoResampleFPS: fps_in and fps_out must be positive (got {fps_in}, {fps_out})")

frame_count_in = int(images.shape[0])
if frame_count_in <= 1 or math.isclose(fps_in, fps_out):
return io.NodeOutput(images, float(fps_out))

duration_sec = frame_count_in / fps_in
frame_count_out = max(1, round(duration_sec * fps_out))
source_positions = torch.arange(frame_count_out, dtype=torch.float64, device=images.device) / fps_out * fps_in

if method == cls.METHOD_NEAREST:
resampled = cls._sample_nearest(images, source_positions)
else:
resampled = cls._sample_linear(images, source_positions)

logger.info(f"SwarmVideoResampleFPS: {frame_count_in} frames @ {fps_in} fps -> {frame_count_out} frames @ {fps_out} fps ({method})")
return io.NodeOutput(resampled, float(fps_out))

@classmethod
def _sample_nearest(cls, source_frames: torch.Tensor, source_positions: torch.Tensor) -> torch.Tensor:
"""Pick the closest source frame for each fractional position.

See https://ffmpeg.org/ffmpeg-filters.html#fps-1
"""
nearest_idx = source_positions.round().long()
last_valid_idx = source_frames.shape[0] - 1
nearest_idx = torch.clamp(nearest_idx, 0, last_valid_idx)
return source_frames[nearest_idx].contiguous()

@classmethod
def _sample_linear(cls, source_frames: torch.Tensor, source_positions: torch.Tensor) -> torch.Tensor:
"""Linearly blend the two source frames bracketing each fractional position.

See https://ffmpeg.org/ffmpeg-filters.html#framerate
"""
last_valid_idx = source_frames.shape[0] - 1
lower_idx = torch.clamp(source_positions.floor().long(), 0, last_valid_idx)
upper_idx = torch.clamp(lower_idx + 1, 0, last_valid_idx)
blend_weight = (source_positions - lower_idx.to(torch.float64)).to(source_frames.dtype)
while blend_weight.ndim < source_frames.ndim:
blend_weight = blend_weight.unsqueeze(-1)

lower_frames = source_frames[lower_idx]
upper_frames = source_frames[upper_idx]
return ((1.0 - blend_weight) * lower_frames + blend_weight * upper_frames).contiguous()


NODE_CLASS_MAPPINGS = {
"SwarmVideoResampleFPS": SwarmVideoResampleFPS,
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os, folder_paths

from . import SwarmBlending, SwarmClipSeg, SwarmImages, SwarmInternalUtil, SwarmKSampler, SwarmLoadImageB64, SwarmLoraLoader, SwarmMasks, SwarmSaveImageWS, SwarmTiling, SwarmExtractLora, SwarmUnsampler, SwarmLatents, SwarmInputNodes, SwarmTextHandling, SwarmReference, SwarmMath, SwarmSam2, SwarmAudio
from . import SwarmBlending, SwarmClipSeg, SwarmImages, SwarmInternalUtil, SwarmKSampler, SwarmLoadImageB64, SwarmLoraLoader, SwarmMasks, SwarmSaveImageWS, SwarmTiling, SwarmExtractLora, SwarmUnsampler, SwarmLatents, SwarmInputNodes, SwarmTextHandling, SwarmReference, SwarmMath, SwarmSam2, SwarmAudio, SwarmVideo

WEB_DIRECTORY = "./web"

Expand All @@ -24,6 +24,7 @@
| SwarmMath.NODE_CLASS_MAPPINGS
| SwarmSam2.NODE_CLASS_MAPPINGS
| SwarmAudio.NODE_CLASS_MAPPINGS
| SwarmVideo.NODE_CLASS_MAPPINGS
)

# TODO: Why is there no comfy core register method? 0.o
Expand Down
1 change: 1 addition & 0 deletions src/BuiltinExtensions/ComfyUIBackend/WorkflowGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,7 @@ public WGNodeData LoadImage(ImageFile img, string param, bool resize, string nod
{
["video"] = NodePath(result, 0)
});
NodeHelpers["video_components_split"] = splitNode;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This method of forwarding data is liable to get mixed up from a different load call in the same workflowgen

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should I add a new property to WGNodeData? SourceFPSNode or similar?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably change public int? FPS = null; to a JToken that can either be a raw value or a node path ref

result = splitNode;
attachedAudio = new([splitNode, 1], this, WGNodeData.DT_AUDIO, CurrentCompat());
}
Expand Down
11 changes: 11 additions & 0 deletions src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorSteps.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1090,6 +1090,17 @@ bool getBestFor(string phrase)
["scale_method"] = "lanczos"
});
imageNodeActual = imageNodeActual.WithPath([multipleOf8, 0]);
if (imageNodeActual.DataType == WGNodeData.DT_VIDEO && g.NodeHelpers.TryGetValue("video_components_split", out string splitNodeId))
{
string resampleNode = g.CreateNode("SwarmVideoResampleFPS", new JObject()
{
["images"] = imageNodeActual.Path,
["fps_in"] = NodePath(splitNodeId, 2),
["fps_out"] = 24.0,
["method"] = "linear"
});
imageNodeActual = imageNodeActual.WithPath([resampleNode, 0]);
}
if (g.UserInput.Get(T2IParamTypes.ControlNetPreviewOnly))
{
g.CurrentMedia = imageNodeActual;
Expand Down
Loading