Update the LTX2 API calls during the calibration (#926)

jingyu-ml · kevalmorabia97 · commit eb5d5490bc88 · 2026-02-28T01:54:31.000+05:30
## What does this PR do? **Type of change:** Bug fix  **Overview:** Update LTX-2 integration to match latest upstream API 1. The LTX-2 codebase removed/replaced several APIs. This MR updates all affected files: 2. Replace cfg_guidance_scale with MultiModalGuiderParams: The pipeline __call__ no longer accepts a single cfg_guidance_scale float. It now requires two MultiModalGuiderParams objects (video_guider_params and audio_guider_params) that control CFG, STG, rescale, cross-modality guidance, and skip-step settings. Updated in ltx-2.py, ltx-2-fp8.py, ltx-2-onestage.py, calibration.py, and models_utils.py. 3. Replace fp8transformer with QuantizationPolicy: The TI2VidTwoStagesPipeline constructor no longer accepts the fp8transformer boolean flag. FP8 quantization is now configured via quantization=QuantizationPolicy.fp8_cast(). Updated in ltx-2-fp8.py and pipeline_manager.py (with backwards-compatible support for the old --extra-param fp8transformer=true CLI flag). 4. Remove DEFAULT_CFG_GUIDANCE_SCALE constant: Replaced by DEFAULT_VIDEO_GUIDER_PARAMS and DEFAULT_AUDIO_GUIDER_PARAMS in all import sites. ## Usage  ```bash python quantize.py --model ltx-2 --format fp4 --batch-size 1 --calib-size 1 --n-steps 40 --extra-param checkpoint_path=./ltx-2-19b-dev-fp8.safetensors --extra-param distilled_lora_path=./ltx-2-19b-distilled-lora-384.safetensors --extra-param spatial_upsampler_path=./ltx-2-spatial-upscaler-x2-1.0.safetensors --extra-param gemma_root=./gemma-3-12b-it-qat-q4_0-unquantized --extra-param fp8transformer=true --hf-ckpt-dir ./ltx2-nvfp4 ``` ## Testing  ## Before your PR is "*Ready for review*"  - **Make sure you read and follow [Contributor guidelines](https://github.com/NVIDIA/Model-Optimizer/blob/main/CONTRIBUTING.md)** and your commits are signed. - **Is this change backward compatible?**: Yes/No  - **Did you write any new necessary tests?**: Yes/No - **Did you add or update any necessary documentation?**: Yes/No - **Did you update [Changelog](https://github.com/NVIDIA/Model-Optimizer/blob/main/CHANGELOG.rst)?**: Yes/No  ## Additional Information   ## Summary by CodeRabbit ## Release Notes * **Updates** * Default resolution for LTX2 models adjusted to 768x1280 * Guidance parameter configuration updated for video and audio pipelines * FP8 quantization parameter handling refined  Signed-off-by: Jingyu Xin <jingyux@nvidia.com>
diff --git a/examples/diffusers/quantization/calibration.py b/examples/diffusers/quantization/calibration.py
@@ -121,6 +121,10 @@ def _run_wan_video_calibration(
 
     def _run_ltx2_calibration(self, prompt_batch: list[str], extra_args: dict[str, Any]) -> None:
         from ltx_core.model.video_vae import TilingConfig
+        from ltx_pipelines.utils.constants import (
+            DEFAULT_AUDIO_GUIDER_PARAMS,
+            DEFAULT_VIDEO_GUIDER_PARAMS,
+        )
 
         prompt = prompt_batch[0]
         extra_params = self.pipeline_manager.config.extra_params
@@ -134,9 +138,8 @@ def _run_ltx2_calibration(self, prompt_batch: list[str], extra_args: dict[str, A
             "num_frames": extra_params.get("num_frames", extra_args.get("num_frames", 121)),
             "frame_rate": extra_params.get("frame_rate", extra_args.get("frame_rate", 24.0)),
             "num_inference_steps": self.config.n_steps,
-            "cfg_guidance_scale": extra_params.get(
-                "cfg_guidance_scale", extra_args.get("cfg_guidance_scale", 4.0)
-            ),
+            "video_guider_params": DEFAULT_VIDEO_GUIDER_PARAMS,
+            "audio_guider_params": DEFAULT_AUDIO_GUIDER_PARAMS,
             "images": extra_params.get("images", []),
             "tiling_config": extra_params.get("tiling_config", TilingConfig.default()),
         }
diff --git a/examples/diffusers/quantization/models_utils.py b/examples/diffusers/quantization/models_utils.py
@@ -163,11 +163,10 @@ def get_model_filter_func(model_type: ModelType) -> Callable[[str], bool]:
         "backbone": "transformer",
         "dataset": _SD_PROMPTS_DATASET,
         "inference_extra_args": {
-            "height": 1024,
-            "width": 1536,
+            "height": 768,
+            "width": 1280,
             "num_frames": 121,
             "frame_rate": 24.0,
-            "cfg_guidance_scale": 4.0,
             "negative_prompt": "worst quality, inconsistent motion, blurry, jittery, distorted",
         },
     },
diff --git a/examples/diffusers/quantization/pipeline_manager.py b/examples/diffusers/quantization/pipeline_manager.py
@@ -213,7 +213,9 @@ def _create_ltx2_pipeline(self) -> Any:
         distilled_lora_strength = params.pop("distilled_lora_strength", 0.8)
         spatial_upsampler_path = params.pop("spatial_upsampler_path", None)
         gemma_root = params.pop("gemma_root", None)
-        fp8transformer = params.pop("fp8transformer", False)
+        fp8_quantization = params.pop("fp8_quantization", None) or params.pop(
+            "fp8transformer", False
+        )
 
         if not checkpoint_path:
             raise ValueError("Missing required extra_param: checkpoint_path.")
@@ -225,6 +227,7 @@ def _create_ltx2_pipeline(self) -> Any:
             raise ValueError("Missing required extra_param: gemma_root.")
 
         from ltx_core.loader import LTXV_LORA_COMFY_RENAMING_MAP, LoraPathStrengthAndSDOps
+        from ltx_core.quantization import QuantizationPolicy
         from ltx_pipelines.ti2vid_two_stages import TI2VidTwoStagesPipeline
 
         distilled_lora = [
@@ -240,7 +243,7 @@ def _create_ltx2_pipeline(self) -> Any:
             "spatial_upsampler_path": str(spatial_upsampler_path),
             "gemma_root": str(gemma_root),
             "loras": [],
-            "fp8transformer": bool(fp8transformer),
+            "quantization": QuantizationPolicy.fp8_cast() if fp8_quantization else None,
         }
         pipeline_kwargs.update(params)
         return TI2VidTwoStagesPipeline(**pipeline_kwargs)