convert : add support for Nemotron Nano 3 Omni (#22481)
This commit adds support for NVIDIA Nemotron Nano 3 Omni model enabling this model to be converted to GGUF.
This commit is contained in:
+28
-2
@@ -728,6 +728,9 @@ class ModelBase:
|
|||||||
|
|
||||||
del experts, merged
|
del experts, merged
|
||||||
|
|
||||||
|
def _needs_nvfp4_processing(self) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
def prepare_tensors(self):
|
def prepare_tensors(self):
|
||||||
# detect NVFP4 quantization (ModelOpt format)
|
# detect NVFP4 quantization (ModelOpt format)
|
||||||
quant_algo = (self.hparams.get("quantization_config") or {}).get("quant_algo")
|
quant_algo = (self.hparams.get("quantization_config") or {}).get("quant_algo")
|
||||||
@@ -758,7 +761,7 @@ class ModelBase:
|
|||||||
# NVFP4 weights are repacked and written directly to gguf_writer.
|
# NVFP4 weights are repacked and written directly to gguf_writer.
|
||||||
# This must run before dequant_model so NVFP4 tensors are removed
|
# This must run before dequant_model so NVFP4 tensors are removed
|
||||||
# from model_tensors, leaving only non-NVFP4 (e.g. FP8) for dequant.
|
# from model_tensors, leaving only non-NVFP4 (e.g. FP8) for dequant.
|
||||||
if self._is_nvfp4:
|
if self._is_nvfp4 and self._needs_nvfp4_processing():
|
||||||
self._generate_nvfp4_tensors()
|
self._generate_nvfp4_tensors()
|
||||||
|
|
||||||
self.dequant_model()
|
self.dequant_model()
|
||||||
@@ -2190,6 +2193,10 @@ class MmprojModel(ModelBase):
|
|||||||
# merge configs
|
# merge configs
|
||||||
self.preprocessor_config = {**self.preprocessor_config, **cfg}
|
self.preprocessor_config = {**self.preprocessor_config, **cfg}
|
||||||
|
|
||||||
|
def _needs_nvfp4_processing(self) -> bool:
|
||||||
|
# nvfp4 quantization applies to the text model only.
|
||||||
|
return False
|
||||||
|
|
||||||
def get_vision_config(self) -> dict[str, Any] | None:
|
def get_vision_config(self) -> dict[str, Any] | None:
|
||||||
config_name = "vision_config" if not self.is_mistral_format else "vision_encoder"
|
config_name = "vision_config" if not self.is_mistral_format else "vision_encoder"
|
||||||
return self.global_config.get(config_name)
|
return self.global_config.get(config_name)
|
||||||
@@ -4450,6 +4457,12 @@ class NemotronNanoV2VLModel(MmprojModel):
|
|||||||
}
|
}
|
||||||
return vision_config
|
return vision_config
|
||||||
|
|
||||||
|
def dequant_model(self):
|
||||||
|
if self._is_nvfp4:
|
||||||
|
# Skip nvfp4 quantization for vision/audio model.
|
||||||
|
return
|
||||||
|
super().dequant_model()
|
||||||
|
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
if "image_mean" not in self.preprocessor_config:
|
if "image_mean" not in self.preprocessor_config:
|
||||||
self.preprocessor_config["image_mean"] = [0.485, 0.456, 0.406]
|
self.preprocessor_config["image_mean"] = [0.485, 0.456, 0.406]
|
||||||
@@ -4473,6 +4486,10 @@ class NemotronNanoV2VLModel(MmprojModel):
|
|||||||
if "input_conditioner" in name:
|
if "input_conditioner" in name:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# mtmd does not support video yet so skip tensors related to video.
|
||||||
|
if "radio_model.model.patch_generator.video_embedder" in name:
|
||||||
|
return
|
||||||
|
|
||||||
# RADIO's pos_embed doesn't have .weight suffix, but clip.cpp expects it
|
# RADIO's pos_embed doesn't have .weight suffix, but clip.cpp expects it
|
||||||
if "patch_generator.pos_embed" in name:
|
if "patch_generator.pos_embed" in name:
|
||||||
if not name.endswith(".weight"):
|
if not name.endswith(".weight"):
|
||||||
@@ -10820,7 +10837,11 @@ class NemotronHModel(GraniteHybridModel):
|
|||||||
# uses self.model_arch to build the tensor name map, and all MoE-specific
|
# uses self.model_arch to build the tensor name map, and all MoE-specific
|
||||||
# mappings would be missed if it were called with the default non-MoE arch.
|
# mappings would be missed if it were called with the default non-MoE arch.
|
||||||
hparams = ModelBase.load_hparams(args[0], self.is_mistral_format)
|
hparams = ModelBase.load_hparams(args[0], self.is_mistral_format)
|
||||||
if "num_experts_per_tok" in hparams:
|
has_moe_params = (
|
||||||
|
"num_experts_per_tok" in hparams
|
||||||
|
or (isinstance(hparams.get("llm_config"), dict) and "num_experts_per_tok" in hparams["llm_config"])
|
||||||
|
)
|
||||||
|
if has_moe_params:
|
||||||
self.model_arch = gguf.MODEL_ARCH.NEMOTRON_H_MOE
|
self.model_arch = gguf.MODEL_ARCH.NEMOTRON_H_MOE
|
||||||
self.is_moe = True
|
self.is_moe = True
|
||||||
|
|
||||||
@@ -10967,6 +10988,11 @@ class NemotronHModel(GraniteHybridModel):
|
|||||||
if name.startswith(("vision_model.", "mlp1.")):
|
if name.startswith(("vision_model.", "mlp1.")):
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if name.startswith(("sound_encoder.")):
|
||||||
|
return
|
||||||
|
if name.startswith(("sound_projection.")):
|
||||||
|
return
|
||||||
|
|
||||||
# Strip language_model. prefix for VLM models (e.g., Nemotron Nano 12B v2 VL)
|
# Strip language_model. prefix for VLM models (e.g., Nemotron Nano 12B v2 VL)
|
||||||
if name.startswith("language_model."):
|
if name.startswith("language_model."):
|
||||||
name = name[len("language_model."):]
|
name = name[len("language_model."):]
|
||||||
|
|||||||
Reference in New Issue
Block a user