diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index bf8af863a..90c2b7094 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -728,6 +728,9 @@ class ModelBase: del experts, merged + def _needs_nvfp4_processing(self) -> bool: + return True + def prepare_tensors(self): # detect NVFP4 quantization (ModelOpt format) quant_algo = (self.hparams.get("quantization_config") or {}).get("quant_algo") @@ -758,7 +761,7 @@ class ModelBase: # NVFP4 weights are repacked and written directly to gguf_writer. # This must run before dequant_model so NVFP4 tensors are removed # from model_tensors, leaving only non-NVFP4 (e.g. FP8) for dequant. - if self._is_nvfp4: + if self._is_nvfp4 and self._needs_nvfp4_processing(): self._generate_nvfp4_tensors() self.dequant_model() @@ -2190,6 +2193,10 @@ class MmprojModel(ModelBase): # merge configs self.preprocessor_config = {**self.preprocessor_config, **cfg} + def _needs_nvfp4_processing(self) -> bool: + # nvfp4 quantization applies to the text model only. + return False + def get_vision_config(self) -> dict[str, Any] | None: config_name = "vision_config" if not self.is_mistral_format else "vision_encoder" return self.global_config.get(config_name) @@ -4450,6 +4457,12 @@ class NemotronNanoV2VLModel(MmprojModel): } return vision_config + def dequant_model(self): + if self._is_nvfp4: + # Skip nvfp4 quantization for vision/audio model. + return + super().dequant_model() + def set_gguf_parameters(self): if "image_mean" not in self.preprocessor_config: self.preprocessor_config["image_mean"] = [0.485, 0.456, 0.406] @@ -4473,6 +4486,10 @@ class NemotronNanoV2VLModel(MmprojModel): if "input_conditioner" in name: return + # mtmd does not support video yet so skip tensors related to video. + if "radio_model.model.patch_generator.video_embedder" in name: + return + # RADIO's pos_embed doesn't have .weight suffix, but clip.cpp expects it if "patch_generator.pos_embed" in name: if not name.endswith(".weight"): @@ -10820,7 +10837,11 @@ class NemotronHModel(GraniteHybridModel): # uses self.model_arch to build the tensor name map, and all MoE-specific # mappings would be missed if it were called with the default non-MoE arch. hparams = ModelBase.load_hparams(args[0], self.is_mistral_format) - if "num_experts_per_tok" in hparams: + has_moe_params = ( + "num_experts_per_tok" in hparams + or (isinstance(hparams.get("llm_config"), dict) and "num_experts_per_tok" in hparams["llm_config"]) + ) + if has_moe_params: self.model_arch = gguf.MODEL_ARCH.NEMOTRON_H_MOE self.is_moe = True @@ -10967,6 +10988,11 @@ class NemotronHModel(GraniteHybridModel): if name.startswith(("vision_model.", "mlp1.")): return + if name.startswith(("sound_encoder.")): + return + if name.startswith(("sound_projection.")): + return + # Strip language_model. prefix for VLM models (e.g., Nemotron Nano 12B v2 VL) if name.startswith("language_model."): name = name[len("language_model."):]