Follow comments

samuraieng · samuraieng · commit cf7de9da70a5 · 2026-04-27T18:31:31.000+09:00
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
@@ -13201,15 +13201,13 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
 class Sarashina2VLTextModel(LlamaModel):
     model_arch = gguf.MODEL_ARCH.LLAMA
 
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        hparams = ModelBase.load_hparams(self.dir_model, is_mistral_format=False)
-        self.origin_hf_arch = hparams.get('architectures', [None])[0]
-
     def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
         if name.startswith("llm."):
-            name = name[len("llm."):]
-            yield from super().modify_tensors(data_torch, name, bid)
+            name = name.replace("llm.", "", 1)
+        elif name.startswith("norm.") or name.startswith("visual."):
+            return
+
+        yield from super().modify_tensors(data_torch, name, bid)
 
 
 @ModelBase.register("Sarashina2VisionForCausalLM")
@@ -13474,10 +13472,8 @@ def get_model_architecture(hparams: dict[str, Any], model_type: ModelType) -> st
     # Step3-VL keeps text config under text_config but uses a custom top-level architecture.
     # For text conversion we route to a dedicated text-only class.
     # TODO: refactor this later to avoid adding exception here
-    if model_type == ModelType.TEXT and arch == "StepVLForConditionalGeneration":
+    if model_type == ModelType.TEXT and arch in ("StepVLForConditionalGeneration", "Sarashina2VisionForCausalLM"):
         return arch
-    if model_type == ModelType.TEXT and arch == "Sarashina2VisionForCausalLM":
-        return "Sarashina2VisionForCausalLM"
 
     # if "architectures" is found in the sub-config, use that instead
     if model_type == ModelType.TEXT and text_config.get("architectures") is not None: