Remove quantization_config in config.json from original deepseek models (#753)

Edwardf0t1 · kevalmorabia97 · commit c8f46fc417d8 · 2026-01-16T00:51:39.000+05:30
## What does this PR do? **Type of change:** Bug fix **Overview:** DeepSeek original checkpoints may include a `quantization_config` field in `config.json` (describing the source checkpoint's quantization). When we export ModelOpt quantization configs to `hf_quant_config.json`, leaving the original `quantization_config` in place can be confusing. Add a function to remove it. ## Usage  ```python # Add a code snippet demonstrating how to use this ``` ## Testing  ## Before your PR is "*Ready for review*"  - **Make sure you read and follow [Contributor guidelines](https://github.com/NVIDIA/Model-Optimizer/blob/main/CONTRIBUTING.md)** and your commits are signed. - **Is this change backward compatible?**: Yes  - **Did you write any new necessary tests?**: Yes/No - **Did you add or update any necessary documentation?**: Yes/No - **Did you update [Changelog](https://github.com/NVIDIA/Model-Optimizer/blob/main/CHANGELOG.rst)?**: Yes/No  ## Additional Information Resolve nvbug https://nvbugspro.nvidia.com/bug/5736665 --------- Signed-off-by: Zhiyu Cheng <zhiyuc@nvidia.com>
diff --git a/examples/deepseek/quantize_to_nvfp4.py b/examples/deepseek/quantize_to_nvfp4.py
@@ -82,6 +82,20 @@ def _remap_key(key_dict: dict[str, Any]):
     key_dict.update(new_dict)
 
 
+def remove_quantization_config_from_original_config(export_dir: str) -> None:
+    """Remove `quantization_config` from exported HF `config.json`.
+
+    Assumes the exported checkpoint directory has a `config.json` containing `quantization_config`.
+    """
+    config_path = os.path.join(export_dir, "config.json")
+    with open(config_path) as f:
+        cfg = json.load(f)
+    del cfg["quantization_config"]
+    with open(config_path, "w") as f:
+        json.dump(cfg, f, indent=2, sort_keys=True)
+        f.write("\n")
+
+
 def load_and_preprocess_state_dict(modelopt_state_root, world_size=8):
     state_dict_list = [
         torch.load(f"{modelopt_state_root}/amax_dict_rank{rank}-mp{world_size}.pt")
@@ -302,3 +316,5 @@ def get_tensor(tensor_name):
         save_root=args.fp4_path,
         per_layer_quant_config=per_layer_quant_config,
     )
+
+    remove_quantization_config_from_original_config(args.fp4_path)