Adds SwarmVideoResampleFPS; resamples controlnet preview videos

jtreminio · jtreminio · commit 27171c93c75e · 2026-05-19T22:45:52.000-05:00
diff --git a/src/BuiltinExtensions/ComfyUIBackend/ExtraNodes/SwarmComfyCommon/SwarmVideo.py b/src/BuiltinExtensions/ComfyUIBackend/ExtraNodes/SwarmComfyCommon/SwarmVideo.py
@@ -91,42 +91,45 @@ def execute(cls, images: torch.Tensor, fps_in: float, fps_out: float, method: st
 
     @classmethod
     def _source_positions(cls, frame_count_out: int, fps_in: float, fps_out: float, device: torch.device) -> torch.Tensor:
-        """Fractional source-frame index for each output frame.
+        """For each output frame, the (fractional) source-frame index it should display.
 
-        Each output frame should display what the source had at the same
-        timestamp. The output frame at index i plays at time i / fps_out, and
-        the source frame visible at that time is at index i * (fps_in / fps_out).
+        Computed in two steps:
+        1. Convert each output-frame index into the timestamp (in seconds) at
+           which that frame will be shown:  timestamp = index / fps_out.
+        2. Convert that timestamp into the source-frame index visible at the
+           same moment in the original video:  source_index = timestamp * fps_in.
         """
         output_indices = torch.arange(frame_count_out, dtype=torch.float64, device=device)
-        return output_indices * (fps_in / fps_out)
+        output_timestamps_sec = output_indices / fps_out
+        return output_timestamps_sec * fps_in
 
     @classmethod
     def _sample_nearest(cls, source_frames: torch.Tensor, source_positions: torch.Tensor) -> torch.Tensor:
         """Pick the closest source frame for each fractional position.
-        
+
         See https://ffmpeg.org/ffmpeg-filters.html#fps-1
         """
-        last_idx = source_frames.shape[0] - 1
-        nearest_idx = torch.clamp(source_positions.round().long(), 0, last_idx)
+        nearest_idx = source_positions.round().long()
+        last_valid_idx = source_frames.shape[0] - 1
+        nearest_idx = torch.clamp(nearest_idx, 0, last_valid_idx)
         return source_frames[nearest_idx].contiguous()
 
     @classmethod
     def _sample_linear(cls, source_frames: torch.Tensor, source_positions: torch.Tensor) -> torch.Tensor:
         """Linearly blend the two source frames bracketing each fractional position.
-        
+
         See https://ffmpeg.org/ffmpeg-filters.html#framerate
         """
-        last_idx = source_frames.shape[0] - 1
-        lower_idx = torch.clamp(source_positions.floor().long(), 0, last_idx)
-        upper_idx = torch.clamp(lower_idx + 1, 0, last_idx)
-
+        last_valid_idx = source_frames.shape[0] - 1
+        lower_idx = torch.clamp(source_positions.floor().long(), 0, last_valid_idx)
+        upper_idx = torch.clamp(lower_idx + 1, 0, last_valid_idx)
         blend_weight = (source_positions - lower_idx.to(torch.float64)).to(source_frames.dtype)
-        # Reshape weight to [N_out, 1, 1, ...] so it broadcasts across the H/W/C
-        # dims of the per-frame tensors during the blend.
-        broadcast_shape = (-1,) + (1,) * (source_frames.ndim - 1)
-        blend_weight = blend_weight.view(*broadcast_shape)
+        while blend_weight.ndim < source_frames.ndim:
+            blend_weight = blend_weight.unsqueeze(-1)
 
-        return ((1.0 - blend_weight) * source_frames[lower_idx] + blend_weight * source_frames[upper_idx]).contiguous()
+        lower_frames = source_frames[lower_idx]
+        upper_frames = source_frames[upper_idx]
+        return ((1.0 - blend_weight) * lower_frames + blend_weight * upper_frames).contiguous()
 
 
 NODE_CLASS_MAPPINGS = {