Skip to content

Commit f5b902b

Browse files
authored
Merge pull request #341 from AInVFX/main
v2.5.13: Fix triton import error, OOM on long video float32 conversion, macOS CLI watermark
2 parents 112275d + 19825fa commit f5b902b

8 files changed

Lines changed: 76 additions & 9 deletions

File tree

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,12 @@ We're actively working on improvements and new features. To stay informed:
3636

3737
## 🚀 Updates
3838

39+
**2025.11.30 - Version 2.5.13**
40+
41+
- **🔧 Fix: PyTorch 2.7+ triton import error** - Resolved installation crash caused by triton.ops import chain on newer triton versions
42+
- **💾 Fix: OOM on float32 conversion for long videos** - Graceful fallback to native dtype when insufficient memory for float32 conversion
43+
- **🍎 Fix: CLI watermark error on macOS** - Resolved MPS-related watermark processing crash on Apple Silicon
44+
3945
**2025.11.28 - Version 2.5.12**
4046

4147
- **🐛 Fix: Color artifacts regression** - Reverted in-place tensor operations in video transform pipeline that caused color artifacts on some images

inference_cli.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,14 @@
6464
if mp.get_start_method(allow_none=True) != 'spawn':
6565
mp.set_start_method('spawn', force=True)
6666

67-
# Configure VRAM management and validate CUDA devices before heavy imports
68-
if platform.system() != "Darwin":
67+
# Configure platform-specific memory management before heavy imports
68+
# Must be set BEFORE import torch
69+
if platform.system() == "Darwin":
70+
# MPS allocator requires: low_watermark <= high_watermark
71+
# Setting both to 0.0 disables PyTorch memory limits, letting macOS manage memory
72+
os.environ.setdefault("PYTORCH_MPS_HIGH_WATERMARK_RATIO", "0.0")
73+
os.environ.setdefault("PYTORCH_MPS_LOW_WATERMARK_RATIO", "0.0")
74+
else:
6975
os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "backend:cudaMallocAsync")
7076

7177
# Pre-parse CUDA device argument for validation and environment setup

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[project]
22
name = "seedvr2_videoupscaler"
33
description = "SeedVR2 official ComfyUI integration: ByteDance-Seed's one-step diffusion-based video/image upscaling with memory-efficient inference"
4-
version = "2.5.12"
4+
version = "2.5.13"
55
authors = [
66
{name = "numz"},
77
{name = "adrientoupet"}

src/interfaces/video_upscaler.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -509,15 +509,21 @@ def cleanup(dit_cache: bool = False, vae_cache: bool = False) -> None:
509509
)
510510

511511
sample = ctx['final_video']
512-
512+
debug.log("", category="none", force=True)
513+
513514
# Ensure CPU tensor in float32 for maximum ComfyUI compatibility
514515
if torch.is_tensor(sample):
515516
if sample.is_cuda or sample.is_mps:
516517
sample = sample.cpu()
517518
if sample.dtype != torch.float32:
518-
sample = sample.to(torch.float32)
519+
src_dtype = sample.dtype
520+
try:
521+
sample = sample.to(torch.float32)
522+
debug.log(f"Converted output from {src_dtype} to float32", category="precision")
523+
except Exception as e:
524+
debug.log(f"Could not convert to float32: {e}. Output is {src_dtype}, compatibility with other nodes not guaranteed",
525+
level="WARNING", category="precision", force=True)
519526

520-
debug.log("", category="none", force=True)
521527
debug.log("Upscaling completed successfully!", category="success", force=True)
522528
debug.end_timer("generation", "Video generation")
523529

src/models/video_vae_v3/modules/attn_video_vae.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
import torch.nn as nn
1818
import torch.nn.functional as F
1919
from diffusers.models.attention_processor import Attention, SpatialNorm
20-
from diffusers.models.autoencoders.vae import DecoderOutput, DiagonalGaussianDistribution
2120
from diffusers.models.downsampling import Downsample2D
2221
from diffusers.models.lora import LoRACompatibleConv
2322
from diffusers.models.modeling_outputs import AutoencoderKLOutput
@@ -46,6 +45,8 @@
4645
CausalAutoencoderOutput,
4746
CausalDecoderOutput,
4847
CausalEncoderOutput,
48+
DecoderOutput,
49+
DiagonalGaussianDistribution,
4950
MemoryState,
5051
_inflation_mode_t,
5152
_memory_device_t,

src/models/video_vae_v3/modules/types.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,51 @@ class CausalEncoderOutput(NamedTuple):
7474

7575
class CausalDecoderOutput(NamedTuple):
7676
sample: torch.Tensor
77+
78+
79+
class DecoderOutput:
80+
"""Output of decoding method - matches diffusers.models.autoencoders.vae.DecoderOutput"""
81+
def __init__(self, sample: torch.Tensor, commit_loss: Optional[torch.Tensor] = None):
82+
self.sample = sample
83+
self.commit_loss = commit_loss
84+
85+
86+
class DiagonalGaussianDistribution:
87+
"""Matches diffusers.models.autoencoders.vae.DiagonalGaussianDistribution exactly."""
88+
def __init__(self, parameters: torch.Tensor, deterministic: bool = False):
89+
self.parameters = parameters
90+
self.mean, self.logvar = torch.chunk(parameters, 2, dim=1)
91+
self.logvar = torch.clamp(self.logvar, -30.0, 20.0)
92+
self.deterministic = deterministic
93+
self.std = torch.exp(0.5 * self.logvar)
94+
self.var = torch.exp(self.logvar)
95+
if self.deterministic:
96+
self.var = self.std = torch.zeros_like(
97+
self.mean, device=self.parameters.device, dtype=self.parameters.dtype
98+
)
99+
100+
def sample(self, generator: Optional[torch.Generator] = None) -> torch.Tensor:
101+
if self.deterministic:
102+
return self.mode()
103+
sample = torch.randn(
104+
self.mean.shape,
105+
generator=generator,
106+
device=self.parameters.device,
107+
dtype=self.parameters.dtype,
108+
)
109+
return self.mean + self.std * sample
110+
111+
def mode(self) -> torch.Tensor:
112+
return self.mean
113+
114+
def kl(self, other: Optional["DiagonalGaussianDistribution"] = None) -> torch.Tensor:
115+
if other is None:
116+
return 0.5 * torch.sum(
117+
self.mean.pow(2) + self.var - 1.0 - self.logvar,
118+
dim=[1, 2, 3],
119+
)
120+
return 0.5 * torch.sum(
121+
(self.mean - other.mean).pow(2) / other.var
122+
+ self.var / other.var - 1.0 - self.logvar + other.logvar,
123+
dim=[1, 2, 3],
124+
)

src/models/video_vae_v3/modules/video_vae.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
import torch
1616
import torch.nn as nn
1717
import torch.nn.functional as F
18-
from diffusers.models.autoencoders.vae import DiagonalGaussianDistribution
1918
from einops import rearrange
2019
from ....common.half_precision_fixes import safe_pad_operation
2120

@@ -36,6 +35,7 @@
3635
CausalAutoencoderOutput,
3736
CausalDecoderOutput,
3837
CausalEncoderOutput,
38+
DiagonalGaussianDistribution,
3939
MemoryState,
4040
_inflation_mode_t,
4141
_memory_device_t,

src/utils/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
"""
55

66
# Version information
7-
__version__ = "2.5.12"
7+
__version__ = "2.5.13"
88

99
import os
1010
import warnings

0 commit comments

Comments
 (0)