Refactor to extract kwargs in postprocess call

YASH Nankani · YASH Nankani · commit e30a8f166761 · 2026-04-14T01:12:20.000-07:00
Signed-off-by: YASH Nankani &lt;ynankani@dl325g11-0771.ipp4a1.colossus.nvidia.com&gt;
diff --git a/modelopt/torch/export/unified_export_hf.py b/modelopt/torch/export/unified_export_hf.py
@@ -149,12 +149,9 @@ def _save_component_state_dict_safetensors(
 
 def _postprocess_safetensors(
     export_dir: Path,
-    merged_base_safetensor_path: str | None = None,
-    model_type: str | None = None,
+    pipe: Any | None = None,
     hf_quant_config: dict | None = None,
-    enable_layerwise_quant_metadata: bool = True,
-    padding_strategy: str | None = None,
-    enable_swizzle_layout: bool = False,
+    **kwargs,
 ) -> None:
     """Post-process saved safetensors files for deployment compatibility.
 
@@ -174,13 +171,38 @@ def _postprocess_safetensors(
 
     Args:
         export_dir: Directory containing the saved ``.safetensors`` file(s).
-        merged_base_safetensor_path: Path to base model safetensors for merge.
-        model_type: Key into ``DIFFUSION_MERGE_FUNCTIONS`` (e.g., ``"ltx2"``).
+        pipe: The diffusion pipeline / model.  Used to infer the model type
+            (via :func:`get_diffusion_model_type`) when
+            ``merged_base_safetensor_path`` is set.
         hf_quant_config: Quantization config dict to embed in metadata.
-        enable_layerwise_quant_metadata: Whether to build per-layer metadata.
-        padding_strategy: ``"row"``, ``"row_col"``, or None.
-        enable_swizzle_layout: Whether to swizzle block scales.
+        **kwargs: Runtime-specific keyword arguments:
+            merged_base_safetensor_path (str, optional): When provided, merges
+                the exported transformer weights with non-transformer components
+                (VAE, vocoder, text encoders, etc.) from this base safetensors
+                file to produce a single-file checkpoint compatible with ComfyUI.
+                Value should be the path to a full base model ``.safetensors``
+                file (e.g. ``"path/to/ltx-2-19b-dev.safetensors"``).
+            enable_layerwise_quant_metadata (bool, optional): When True
+                (default), includes per-layer ``_quantization_metadata`` in the
+                checkpoint metadata so that inference runtimes (e.g., ComfyUI)
+                can identify which layers are quantized and in what format. Set
+                to False to skip.
+            enable_swizzle_layout (bool, optional): When True, rearranges NVFP4
+                block scales from ModelOpt's flat layout to cuBLAS 2-D tiled
+                layout. Required for runtimes that consume cuBLAS block-scaled
+                GEMM (e.g., comfy_kitchen). Defaults to False.
+            padding_strategy (str | None, optional): Padding strategy for NVFP4
+                weight and scale tensors. ``"row"`` pads rows to multiples of
+                16 (columns assumed already aligned). ``"row_col"`` pads both
+                dimensions. ``None`` (default) disables padding. Independent of
+                ``enable_swizzle_layout``.
+
     """
+    merged_base_safetensor_path: str | None = kwargs.get("merged_base_safetensor_path")
+    enable_layerwise_quant_metadata: bool = kwargs.get("enable_layerwise_quant_metadata", True)
+    enable_swizzle_layout: bool = kwargs.get("enable_swizzle_layout", False)
+    padding_strategy: str | None = kwargs.get("padding_strategy")
+
     safetensor_files = sorted(export_dir.glob("*.safetensors"))
     if not safetensor_files:
         return
@@ -193,6 +215,14 @@ def _postprocess_safetensors(
             "Export with a larger max_shard_size or disable merge/metadata options."
         )
 
+    model_type: str | None = None
+    if merged_base_safetensor_path is not None:
+        if pipe is None:
+            raise ValueError(
+                "`pipe` must be provided when `merged_base_safetensor_path` is set."
+            )
+        model_type = get_diffusion_model_type(pipe)
+
     for sf_path in safetensor_files:
         with safe_open(str(sf_path), framework="pt") as f:
             metadata = dict(f.metadata() or {})
@@ -948,11 +978,8 @@ def _export_diffusers_checkpoint(
     dtype: torch.dtype | None,
     export_dir: Path,
     components: list[str] | None,
-    merged_base_safetensor_path: str | None = None,
     max_shard_size: int | str = "10GB",
-    enable_layerwise_quant_metadata: bool = True,
-    enable_swizzle_layout: bool = False,
-    padding_strategy: str | None = None,
+    **kwargs,
 ) -> None:
     """Internal: Export diffusion(-like) model/pipeline checkpoint.
 
@@ -966,19 +993,11 @@ def _export_diffusers_checkpoint(
         export_dir: The directory to save the exported checkpoint.
         components: Optional list of component names to export. Only used for pipelines.
             If None, all components are exported.
-        merged_base_safetensor_path: If provided, merge the exported transformer weights
-            with non-transformer components (VAE, vocoder, text encoders, etc.) from this
-            base safetensors file and add quantization metadata to produce a single-file
-            checkpoint compatible with ComfyUI. This should be the path to a full base
-            model ``.safetensors`` file, e.g. ``"path/to/ltx-2-19b-dev.safetensors"``.
         max_shard_size: Maximum size of each shard file. If the model exceeds this size,
             it will be sharded into multiple files and a .safetensors.index.json will be
             created. Use smaller values like "5GB" or "2GB" to force sharding.
-        enable_layerwise_quant_metadata: If True (default), include per-layer
-            ``_quantization_metadata`` in the merged checkpoint metadata.
-        enable_swizzle_layout: If True, swizzle NVFP4 block scales to cuBLAS tiled layout.
-        padding_strategy: ``"row"``, ``"row_col"``, or None. Pads NVFP4 weight/scale
-            tensors independently of swizzle.
+        **kwargs: Runtime-specific post-processing options forwarded to
+            :func:`_postprocess_safetensors`. See its docstring for details.
     """
     export_dir = Path(export_dir)
 
@@ -989,9 +1008,6 @@ def _export_diffusers_checkpoint(
         warnings.warn("No exportable components found in the model.")
         return
 
-    # Resolve model type once (only needed when merging with a base checkpoint)
-    model_type = get_diffusion_model_type(pipe) if merged_base_safetensor_path else None
-
     # Separate nn.Module components for quantization-aware export
     module_components = {
         name: comp for name, comp in all_components.items() if isinstance(comp, nn.Module)
@@ -1052,12 +1068,9 @@ def _export_diffusers_checkpoint(
             # Step 7: Post-process — merge, metadata, padding, swizzle
             _postprocess_safetensors(
                 component_export_dir,
-                merged_base_safetensor_path=merged_base_safetensor_path,
-                model_type=model_type,
+                pipe,
                 hf_quant_config=hf_quant_config,
-                enable_layerwise_quant_metadata=enable_layerwise_quant_metadata,
-                padding_strategy=padding_strategy,
-                enable_swizzle_layout=enable_swizzle_layout,
+                **kwargs,
             )
 
             # Step 8: Update config.json with quantization info
@@ -1229,31 +1242,10 @@ def export_hf_checkpoint(
             to export. If None, all quantized components are exported.
         extra_state_dict: Extra state dictionary to add to the exported model.
         max_shard_size: Maximum size of each safetensors shard file. Defaults to "10GB".
-        **kwargs: Internal-only keyword arguments. Supported keys:
-            merged_base_safetensor_path (str, optional). When provided, merges the
-            exported diffusion transformer weights with non-transformer components
-            (VAE, vocoder, text encoders, etc.) from this base safetensors file to
-            produce a single-file checkpoint compatible with ComfyUI. Value should be
-            the path to a full base model ``.safetensors`` file
-            (e.g. ``"path/to/ltx-2-19b-dev.safetensors"``).
-            Only used for diffusion model exports.
-            enable_layerwise_quant_metadata (bool, optional). When True (default),
-            includes per-layer ``_quantization_metadata`` in the checkpoint metadata
-            so that inference runtimes (e.g., ComfyUI) can identify which layers are
-            quantized and in what format. Set to False to skip.
-            enable_swizzle_layout (bool, optional). When True, rearranges NVFP4 block
-            scales from ModelOpt's flat layout to cuBLAS 2-D tiled layout. Required
-            for runtimes that consume cuBLAS block-scaled GEMM (e.g., comfy_kitchen).
-            Defaults to False.
-            padding_strategy (str | None, optional). Padding strategy for NVFP4 weight
-            and scale tensors. ``"row"`` pads rows to multiples of 16 (columns assumed
-            already aligned). ``"row_col"`` pads both dimensions. ``None`` (default)
-            disables padding. Independent of ``enable_swizzle_layout``.
+        **kwargs: Runtime-specific post-processing options forwarded to
+            :func:`_postprocess_safetensors` for diffusion model exports.
+            See its docstring for supported keys.
     """
-    merged_base_safetensor_path: str | None = kwargs.get("merged_base_safetensor_path")
-    enable_layerwise_quant_metadata: bool = kwargs.get("enable_layerwise_quant_metadata", True)
-    enable_swizzle_layout: bool = kwargs.get("enable_swizzle_layout", False)
-    padding_strategy: str | None = kwargs.get("padding_strategy")
     export_dir = Path(export_dir)
     export_dir.mkdir(parents=True, exist_ok=True)
 
@@ -1266,11 +1258,8 @@ def export_hf_checkpoint(
             dtype,
             export_dir,
             components,
-            merged_base_safetensor_path,
             max_shard_size,
-            enable_layerwise_quant_metadata,
-            enable_swizzle_layout,
-            padding_strategy,
+            **kwargs,
         )
         return
 
diff --git a/tests/unit/torch/export/test_nvfp4_utils.py b/tests/unit/torch/export/test_nvfp4_utils.py
@@ -27,6 +27,7 @@
     pad_nvfp4_weights,
     swizzle_nvfp4_scales,
 )
+from modelopt.torch.export.unified_export_hf import _postprocess_safetensors
 
 
 def _make_nvfp4_state_dict(rows=32, cols=64):
@@ -146,8 +147,6 @@ def test_small_scale_needs_internal_padding(self):
 
 class TestPostprocessSafetensors:
     def test_metadata_injection(self, tmp_path):
-        from modelopt.torch.export.unified_export_hf import _postprocess_safetensors
-
         sd = {"weight": torch.randn(4, 4)}
         save_file(sd, str(tmp_path / "model.safetensors"))
 
@@ -169,8 +168,6 @@ def test_metadata_injection(self, tmp_path):
         }
 
     def test_padding_and_swizzle(self, tmp_path):
-        from modelopt.torch.export.unified_export_hf import _postprocess_safetensors
-
         sd = _make_nvfp4_state_dict(rows=20, cols=64)
         save_file(sd, str(tmp_path / "model.safetensors"))
 
@@ -187,23 +184,18 @@ def test_padding_and_swizzle(self, tmp_path):
         assert reloaded["layer0.weight_scale"].shape == (128, 64 // 16)
 
     def test_sharded_guard(self, tmp_path):
-        from modelopt.torch.export.unified_export_hf import _postprocess_safetensors
-
         save_file({"w": torch.randn(2, 2)}, str(tmp_path / "model.safetensors"))
         (tmp_path / "model.safetensors.index.json").write_text("{}")
 
         with pytest.raises(NotImplementedError, match="sharded"):
             _postprocess_safetensors(
                 tmp_path,
                 merged_base_safetensor_path="/fake/path.safetensors",
-                model_type="ltx2",
                 enable_layerwise_quant_metadata=True,
             )
 
     def test_preserves_existing_metadata(self, tmp_path):
         """Simulate save_pretrained output: safetensors with pre-existing metadata."""
-        from modelopt.torch.export.unified_export_hf import _postprocess_safetensors
-
         sd = _make_nvfp4_state_dict(rows=20, cols=64)
         preexisting_metadata = {"format": "pt", "_class_name": "MyModel"}
         save_file(sd, str(tmp_path / "model.safetensors"), metadata=preexisting_metadata)
@@ -230,6 +222,22 @@ def test_preserves_existing_metadata(self, tmp_path):
         assert "layer0" in layer_meta["layers"]
 
     def test_no_safetensor_files(self, tmp_path):
-        from modelopt.torch.export.unified_export_hf import _postprocess_safetensors
-
         _postprocess_safetensors(tmp_path)
+
+    def test_unknown_kwargs_silently_ignored(self, tmp_path):
+        sd = {"weight": torch.randn(4, 4)}
+        save_file(sd, str(tmp_path / "model.safetensors"))
+
+        _postprocess_safetensors(tmp_path, bad_option=True)
+
+        reloaded = load_file(str(tmp_path / "model.safetensors"))
+        assert torch.allclose(reloaded["weight"], sd["weight"])
+
+    def test_merge_requires_pipe(self, tmp_path):
+        save_file({"w": torch.randn(2, 2)}, str(tmp_path / "model.safetensors"))
+
+        with pytest.raises(ValueError, match="`pipe` must be provided"):
+            _postprocess_safetensors(
+                tmp_path,
+                merged_base_safetensor_path="/fake/path.safetensors",
+            )