Skip to content

Commit 5d56eff

Browse files
authored
convert : add support for Nemotron Nano 3 Omni (#22481)
This commit adds support for NVIDIA Nemotron Nano 3 Omni model enabling this model to be converted to GGUF.
1 parent 52e5f0a commit 5d56eff

1 file changed

Lines changed: 28 additions & 2 deletions

File tree

convert_hf_to_gguf.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -728,6 +728,9 @@ def _flush_nvfp4_experts(self, key, expert_blocks, expert_scales, expert_input_s
728728

729729
del experts, merged
730730

731+
def _needs_nvfp4_processing(self) -> bool:
732+
return True
733+
731734
def prepare_tensors(self):
732735
# detect NVFP4 quantization (ModelOpt format)
733736
quant_algo = (self.hparams.get("quantization_config") or {}).get("quant_algo")
@@ -758,7 +761,7 @@ def prepare_tensors(self):
758761
# NVFP4 weights are repacked and written directly to gguf_writer.
759762
# This must run before dequant_model so NVFP4 tensors are removed
760763
# from model_tensors, leaving only non-NVFP4 (e.g. FP8) for dequant.
761-
if self._is_nvfp4:
764+
if self._is_nvfp4 and self._needs_nvfp4_processing():
762765
self._generate_nvfp4_tensors()
763766

764767
self.dequant_model()
@@ -2190,6 +2193,10 @@ def __init__(self, *args, **kwargs):
21902193
# merge configs
21912194
self.preprocessor_config = {**self.preprocessor_config, **cfg}
21922195

2196+
def _needs_nvfp4_processing(self) -> bool:
2197+
# nvfp4 quantization applies to the text model only.
2198+
return False
2199+
21932200
def get_vision_config(self) -> dict[str, Any] | None:
21942201
config_name = "vision_config" if not self.is_mistral_format else "vision_encoder"
21952202
return self.global_config.get(config_name)
@@ -4450,6 +4457,12 @@ def get_vision_config(self) -> dict[str, Any] | None:
44504457
}
44514458
return vision_config
44524459

4460+
def dequant_model(self):
4461+
if self._is_nvfp4:
4462+
# Skip nvfp4 quantization for vision/audio model.
4463+
return
4464+
super().dequant_model()
4465+
44534466
def set_gguf_parameters(self):
44544467
if "image_mean" not in self.preprocessor_config:
44554468
self.preprocessor_config["image_mean"] = [0.485, 0.456, 0.406]
@@ -4473,6 +4486,10 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
44734486
if "input_conditioner" in name:
44744487
return
44754488

4489+
# mtmd does not support video yet so skip tensors related to video.
4490+
if "radio_model.model.patch_generator.video_embedder" in name:
4491+
return
4492+
44764493
# RADIO's pos_embed doesn't have .weight suffix, but clip.cpp expects it
44774494
if "patch_generator.pos_embed" in name:
44784495
if not name.endswith(".weight"):
@@ -10820,7 +10837,11 @@ def __init__(self, *args, **kwargs):
1082010837
# uses self.model_arch to build the tensor name map, and all MoE-specific
1082110838
# mappings would be missed if it were called with the default non-MoE arch.
1082210839
hparams = ModelBase.load_hparams(args[0], self.is_mistral_format)
10823-
if "num_experts_per_tok" in hparams:
10840+
has_moe_params = (
10841+
"num_experts_per_tok" in hparams
10842+
or (isinstance(hparams.get("llm_config"), dict) and "num_experts_per_tok" in hparams["llm_config"])
10843+
)
10844+
if has_moe_params:
1082410845
self.model_arch = gguf.MODEL_ARCH.NEMOTRON_H_MOE
1082510846
self.is_moe = True
1082610847

@@ -10967,6 +10988,11 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
1096710988
if name.startswith(("vision_model.", "mlp1.")):
1096810989
return
1096910990

10991+
if name.startswith(("sound_encoder.")):
10992+
return
10993+
if name.startswith(("sound_projection.")):
10994+
return
10995+
1097010996
# Strip language_model. prefix for VLM models (e.g., Nemotron Nano 12B v2 VL)
1097110997
if name.startswith("language_model."):
1097210998
name = name[len("language_model."):]

0 commit comments

Comments
 (0)