fix: Proper memory estimation for frame interpolation when not using dynamic VRAM (#13698)

kijai · web-flow · commit c33d26c283ea · 2026-05-04T20:20:40.000+03:00
diff --git a/comfy_extras/frame_interpolation_models/film_net.py b/comfy_extras/frame_interpolation_models/film_net.py
@@ -199,6 +199,9 @@ def __init__(self, pyramid_levels=7, fusion_pyramid_levels=5, specialized_levels
     def get_dtype(self):
         return self.extract.extract_sublevels.convs[0][0].conv.weight.dtype
 
+    def memory_used_forward(self, shape, dtype):
+        return 1700 * shape[1] * shape[2] * dtype.itemsize
+
     def _build_warp_grids(self, H, W, device):
         """Pre-compute warp grids for all pyramid levels."""
         if (H, W) in self._warp_grids:
diff --git a/comfy_extras/frame_interpolation_models/ifnet.py b/comfy_extras/frame_interpolation_models/ifnet.py
@@ -74,6 +74,9 @@ def __init__(self, head_ch=4, channels=(192, 128, 96, 64, 32), device=None, dtyp
     def get_dtype(self):
         return self.encode.cnn0.weight.dtype
 
+    def memory_used_forward(self, shape, dtype):
+        return 300 * shape[1] * shape[2] * dtype.itemsize
+
     def _build_warp_grids(self, H, W, device):
         if (H, W) in self._warp_grids:
             return
diff --git a/comfy_extras/nodes_frame_interpolation.py b/comfy_extras/nodes_frame_interpolation.py
@@ -37,7 +37,7 @@ def execute(cls, model_name) -> io.NodeOutput:
         model = cls._detect_and_load(sd)
         dtype = torch.float16 if model_management.should_use_fp16(model_management.get_torch_device()) else torch.float32
         model.eval().to(dtype)
-        patcher = comfy.model_patcher.ModelPatcher(
+        patcher = comfy.model_patcher.CoreModelPatcher(
             model,
             load_device=model_management.get_torch_device(),
             offload_device=model_management.unet_offload_device(),
@@ -98,16 +98,13 @@ def execute(cls, interp_model, images, multiplier) -> io.NodeOutput:
         if num_frames < 2 or multiplier < 2:
             return io.NodeOutput(images)
 
-        model_management.load_model_gpu(interp_model)
         device = interp_model.load_device
         dtype = interp_model.model_dtype()
         inference_model = interp_model.model
-
-        # Free VRAM for inference activations (model weights + ~20x a single frame's worth)
-        H, W = images.shape[1], images.shape[2]
-        activation_mem = H * W * 3 * images.element_size() * 20
-        model_management.free_memory(activation_mem, device)
+        activation_mem = inference_model.memory_used_forward(images.shape, dtype)
+        model_management.load_models_gpu([interp_model], memory_required=activation_mem)
         align = getattr(inference_model, "pad_align", 1)
+        H, W = images.shape[1], images.shape[2]
 
         # Prepare a single padded frame on device for determining output dimensions
         def prepare_frame(idx):