Address review commnets

YASH Nankani · YASH Nankani · commit bbfbdb9a0b65 · 2026-04-09T01:49:46.000-07:00
Signed-off-by: YASH Nankani &lt;ynankani@2u1g-x570-0073.ipp2a1.colossus.nvidia.com&gt;
diff --git a/modelopt/torch/export/diffusers_utils.py b/modelopt/torch/export/diffusers_utils.py
@@ -889,7 +889,7 @@ def _roundup(a: int, b: int) -> int:
             (_roundup(scale.shape[1], 16) - scale.shape[1]) if padding_strategy == "row_col" else 0
         )
 
-        if pad_r > 0 or pad_c_w > 0:
+        if pad_r > 0 or pad_c_w > 0 or pad_c_s > 0:
             state_dict[w_key] = torch.nn.functional.pad(weight, (0, pad_c_w, 0, pad_r))
             state_dict[s_key] = torch.nn.functional.pad(scale, (0, pad_c_s, 0, pad_r))
             padded_count += 1
diff --git a/tests/unit/torch/export/test_nvfp4_utils.py b/tests/unit/torch/export/test_nvfp4_utils.py
@@ -19,6 +19,7 @@
 
 import pytest
 import torch
+from safetensors import safe_open
 from safetensors.torch import load_file, save_file
 
 from modelopt.torch.export.diffusers_utils import (
@@ -159,6 +160,13 @@ def test_metadata_injection(self, tmp_path):
 
         reloaded = load_file(str(tmp_path / "model.safetensors"))
         assert torch.allclose(reloaded["weight"], sd["weight"])
+        with safe_open(str(tmp_path / "model.safetensors"), framework="pt", device="cpu") as f:
+            metadata = f.metadata()
+        assert json.loads(metadata["quantization_config"]) == hf_quant_config
+        assert json.loads(metadata["_quantization_metadata"]) == {
+            "format_version": "1.0",
+            "layers": {},
+        }
 
     def test_padding_and_swizzle(self, tmp_path):
         from modelopt.torch.export.unified_export_hf import _postprocess_safetensors
@@ -176,6 +184,7 @@ def test_padding_and_swizzle(self, tmp_path):
         reloaded = load_file(str(tmp_path / "model.safetensors"))
         assert reloaded["layer0.weight"].shape[0] == 32
         assert reloaded["layer0.weight_scale"].dtype == torch.float8_e4m3fn
+        assert reloaded["layer0.weight_scale"].shape == (128, 64 // 16)
 
     def test_sharded_guard(self, tmp_path):
         from modelopt.torch.export.unified_export_hf import _postprocess_safetensors

Original file line number	Diff line number	Diff line change
`@@ -889,7 +889,7 @@ def _roundup(a: int, b: int) -> int:`
`889`	`889`	`(_roundup(scale.shape[1], 16) - scale.shape[1]) if padding_strategy == "row_col" else 0`
`890`	`890`	`)`
`891`	`891`
`892`		`- if pad_r > 0 or pad_c_w > 0:`
	`892`	`+ if pad_r > 0 or pad_c_w > 0 or pad_c_s > 0:`
`893`	`893`	`state_dict[w_key] = torch.nn.functional.pad(weight, (0, pad_c_w, 0, pad_r))`
`894`	`894`	`state_dict[s_key] = torch.nn.functional.pad(scale, (0, pad_c_s, 0, pad_r))`
`895`	`895`	`padded_count += 1`