Skip to content

Commit fce0398

Browse files
authored
dynamicVRAM + --cache-ram 2 (CORE-117) (#13603)
* pinned_memory: remove JIT RAM pressure release This doesn't work, as freeing intermediates for pins needs to be higher-priority than freeing pins-for-pins if and when you are going to do that. So this is too late as pins-for-pins is model load time and we dont have JIT pins-for-pins. * cacheing: Add a filter to only free intermediates from inactive wfs This is to get priorities in amongst pins straight. * mm: free inactive-ram from RAM cache first Stuff from inactive workflows should be freed before anything else. * caching: purge old ModelPatchers first Dont try and score them, just dump them at the first sign of trouble if they arent part of the workflow.
1 parent dae3d34 commit fce0398

4 files changed

Lines changed: 9 additions & 8 deletions

File tree

comfy/model_management.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -663,6 +663,7 @@ def minimum_inference_memory():
663663

664664
def free_memory(memory_required, device, keep_loaded=[], for_dynamic=False, pins_required=0, ram_required=0):
665665
cleanup_models_gc()
666+
comfy.memory_management.extra_ram_release(max(pins_required, ram_required))
666667
unloaded_model = []
667668
can_unload = []
668669
unloaded_models = []

comfy/pinned_memory.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import comfy.memory_management
33
import comfy_aimdo.host_buffer
44
import comfy_aimdo.torch
5-
import psutil
65

76
from comfy.cli_args import args
87

@@ -12,11 +11,6 @@ def get_pin(module):
1211
def pin_memory(module):
1312
if module.pin_failed or args.disable_pinned_memory or get_pin(module) is not None:
1413
return
15-
#FIXME: This is a RAM cache trigger event
16-
ram_headroom = comfy.memory_management.RAM_CACHE_HEADROOM
17-
#we split the difference and assume half the RAM cache headroom is for us
18-
if ram_headroom > 0 and psutil.virtual_memory().available < (ram_headroom * 0.5):
19-
comfy.memory_management.extra_ram_release(ram_headroom)
2014

2115
size = comfy.memory_management.vram_aligned_size([ module.weight, module.bias ])
2216

comfy_execution/caching.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import time
66
import torch
77
from typing import Sequence, Mapping, Dict
8+
from comfy.model_patcher import ModelPatcher
89
from comfy_execution.graph import DynamicPrompt
910
from abc import ABC, abstractmethod
1011

@@ -523,13 +524,15 @@ def set_local(self, node_id, value):
523524
self.timestamps[self.cache_key_set.get_data_key(node_id)] = time.time()
524525
super().set_local(node_id, value)
525526

526-
def ram_release(self, target):
527+
def ram_release(self, target, free_active=False):
527528
if psutil.virtual_memory().available >= target:
528529
return
529530

530531
clean_list = []
531532

532533
for key, cache_entry in self.cache.items():
534+
if not free_active and self.used_generation[key] == self.generation:
535+
continue
533536
oom_score = RAM_CACHE_OLD_WORKFLOW_OOM_MULTIPLIER ** (self.generation - self.used_generation[key])
534537

535538
ram_usage = RAM_CACHE_DEFAULT_RAM_USAGE
@@ -542,6 +545,9 @@ def scan_list_for_ram_usage(outputs):
542545
scan_list_for_ram_usage(output)
543546
elif isinstance(output, torch.Tensor) and output.device.type == 'cpu':
544547
ram_usage += output.numel() * output.element_size()
548+
elif isinstance(output, ModelPatcher) and self.used_generation[key] != self.generation:
549+
#old ModelPatchers are the first to go
550+
ram_usage = 1e30
545551
scan_list_for_ram_usage(cache_entry.outputs)
546552

547553
oom_score *= ram_usage

execution.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -779,7 +779,7 @@ async def execute_async(self, prompt, prompt_id, extra_data={}, execute_outputs=
779779

780780
if self.cache_type == CacheType.RAM_PRESSURE:
781781
comfy.model_management.free_memory(0, None, pins_required=ram_headroom, ram_required=ram_headroom)
782-
comfy.memory_management.extra_ram_release(ram_headroom)
782+
ram_release_callback(ram_headroom, free_active=True)
783783
else:
784784
# Only execute when the while-loop ends without break
785785
# Send cached UI for intermediate output nodes that weren't executed

0 commit comments

Comments
 (0)