hao-ai-lab
diff --git a/‎fastvideo/configs/models/vaes/base.py‎
Lines changed: 4 additions & 0 deletions b/‎fastvideo/configs/models/vaes/base.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎fastvideo/layers/linear.py‎
Lines changed: 16 additions & 2 deletions b/‎fastvideo/layers/linear.py‎
Lines changed: 16 additions & 2 deletions
diff --git a/‎fastvideo/layers/quantization/__init__.py‎
Lines changed: 3 additions & 1 deletion b/‎fastvideo/layers/quantization/__init__.py‎
Lines changed: 3 additions & 1 deletion
@@ -37,6 +37,10 @@ class VAEConfig(ModelConfig):
     use_tiling: bool = True
     use_temporal_tiling: bool = True
     use_parallel_tiling: bool = True
+    # When True, latent preparation skips the schedule shift on frames
+    # whose temporal index is below the model's first-frame conditioning
+    # threshold. LTX-2 reads this in the latent prep stage.
+    use_temporal_scaling_frames: bool = True
 
     def __post_init__(self):
         self.blend_num_frames = self.tile_sample_min_num_frames - self.tile_sample_stride_num_frames
 
@@ -191,7 +191,15 @@ def __init__(
         if quant_config is None:
             self.quant_method: QuantizeMethodBase | None = (UnquantizedLinearMethod())
         else:
+            # ``get_quant_method`` returns ``None`` for layers the config
+            # has decided not to quantize (e.g. ``NVFP4Config`` only tags
+            # a curated subset of LTX-2 attention/FFN layers). Fall back
+            # to ``UnquantizedLinearMethod`` so untagged layers behave
+            # like a plain ``nn.Linear`` instead of breaking subclass
+            # asserts.
             self.quant_method = quant_config.get_quant_method(self, prefix=prefix)
+            if self.quant_method is None:
+                self.quant_method = UnquantizedLinearMethod()
 
     def forward(self, x: torch.Tensor) -> tuple[torch.Tensor, Parameter | None]:
         raise NotImplementedError
@@ -230,8 +238,14 @@ def __init__(
             prefix=prefix,
         )
 
-        # All the linear layer supports quant method.
-        assert self.quant_method is not None
+        # ``QuantizationConfig.get_quant_method`` may return ``None`` for
+        # layers it doesn't intend to quantize (e.g. ``NVFP4Config`` only
+        # tags a specific subset of LTX-2 attention/FFN layers). Fall
+        # back to ``UnquantizedLinearMethod`` so non-matched layers
+        # behave like a plain ``nn.Linear``.
+        if self.quant_method is None:
+            self.quant_method = UnquantizedLinearMethod()
+
         self.quant_method.create_weights(
             self,
             self.input_size,
 
@@ -2,7 +2,7 @@
 
 from fastvideo.layers.quantization.base_config import QuantizationConfig
 
-QuantizationMethods = Literal[None, "AbsMaxFP8"]
+QuantizationMethods = Literal[None, "AbsMaxFP8", "NVFP4"]
 
 QUANTIZATION_METHODS: list[str] = list(get_args(QuantizationMethods))
 
@@ -51,9 +51,11 @@ def get_quantization_config(quantization: str) -> type[QuantizationConfig]:
 
     # lazy import to avoid triggering `torch.compile` too early
     from .absmax_fp8 import AbsMaxFP8Config
+    from .nvfp4_config import NVFP4Config
 
     method_to_config: dict[str, type[QuantizationConfig]] = {
         "AbsMaxFP8": AbsMaxFP8Config,
+        "NVFP4": NVFP4Config,
     }
     # Update the `method_to_config` with customized quantization methods.
     method_to_config.update(_CUSTOMIZED_METHOD_TO_QUANT_CONFIG)