Skip to content

Commit a110a24

Browse files
Davids048SolitaryThinkerRandNMR73XOR-opjzhang38
committed
[infra] Add NVFP4 quantization support
Apply Dreamverse monorepo changes for stack slice 11/13 from the source branch. Source-Branch: will/dreamverse-monorepo Source-SHA: 03d3e61 Dreamverse-Stack: 11/13 Co-authored-by: SolitaryThinker <wlsaidhi@gmail.com> Co-authored-by: Matthew Noto <99706358+RandNMR73@users.noreply.github.com> Co-authored-by: XOR-op <17672363+XOR-op@users.noreply.github.com> Co-authored-by: Zhang Peiyuan <42993249+jzhang38@users.noreply.github.com>
1 parent 0d30d64 commit a110a24

9 files changed

Lines changed: 6037 additions & 5 deletions

File tree

fastvideo/configs/models/vaes/base.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ class VAEConfig(ModelConfig):
3737
use_tiling: bool = True
3838
use_temporal_tiling: bool = True
3939
use_parallel_tiling: bool = True
40+
# When True, latent preparation skips the schedule shift on frames
41+
# whose temporal index is below the model's first-frame conditioning
42+
# threshold. LTX-2 reads this in the latent prep stage.
43+
use_temporal_scaling_frames: bool = True
4044

4145
def __post_init__(self):
4246
self.blend_num_frames = self.tile_sample_min_num_frames - self.tile_sample_stride_num_frames

fastvideo/layers/linear.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,15 @@ def __init__(
191191
if quant_config is None:
192192
self.quant_method: QuantizeMethodBase | None = (UnquantizedLinearMethod())
193193
else:
194+
# ``get_quant_method`` returns ``None`` for layers the config
195+
# has decided not to quantize (e.g. ``NVFP4Config`` only tags
196+
# a curated subset of LTX-2 attention/FFN layers). Fall back
197+
# to ``UnquantizedLinearMethod`` so untagged layers behave
198+
# like a plain ``nn.Linear`` instead of breaking subclass
199+
# asserts.
194200
self.quant_method = quant_config.get_quant_method(self, prefix=prefix)
201+
if self.quant_method is None:
202+
self.quant_method = UnquantizedLinearMethod()
195203

196204
def forward(self, x: torch.Tensor) -> tuple[torch.Tensor, Parameter | None]:
197205
raise NotImplementedError
@@ -230,8 +238,14 @@ def __init__(
230238
prefix=prefix,
231239
)
232240

233-
# All the linear layer supports quant method.
234-
assert self.quant_method is not None
241+
# ``QuantizationConfig.get_quant_method`` may return ``None`` for
242+
# layers it doesn't intend to quantize (e.g. ``NVFP4Config`` only
243+
# tags a specific subset of LTX-2 attention/FFN layers). Fall
244+
# back to ``UnquantizedLinearMethod`` so non-matched layers
245+
# behave like a plain ``nn.Linear``.
246+
if self.quant_method is None:
247+
self.quant_method = UnquantizedLinearMethod()
248+
235249
self.quant_method.create_weights(
236250
self,
237251
self.input_size,

fastvideo/layers/quantization/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from fastvideo.layers.quantization.base_config import QuantizationConfig
44

5-
QuantizationMethods = Literal[None, "AbsMaxFP8"]
5+
QuantizationMethods = Literal[None, "AbsMaxFP8", "NVFP4"]
66

77
QUANTIZATION_METHODS: list[str] = list(get_args(QuantizationMethods))
88

@@ -51,9 +51,11 @@ def get_quantization_config(quantization: str) -> type[QuantizationConfig]:
5151

5252
# lazy import to avoid triggering `torch.compile` too early
5353
from .absmax_fp8 import AbsMaxFP8Config
54+
from .nvfp4_config import NVFP4Config
5455

5556
method_to_config: dict[str, type[QuantizationConfig]] = {
5657
"AbsMaxFP8": AbsMaxFP8Config,
58+
"NVFP4": NVFP4Config,
5759
}
5860
# Update the `method_to_config` with customized quantization methods.
5961
method_to_config.update(_CUSTOMIZED_METHOD_TO_QUANT_CONFIG)

0 commit comments

Comments
 (0)