@@ -2955,6 +2955,13 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
29552955 else:
29562956 return
29572957
2958+ if self.origin_hf_arch.startswith('Sarashina2VisionForCausalLM'):
2959+ # Remove llm. from name
2960+ if name.startswith("llm."):
2961+ name = name[len("llm."):]
2962+ elif name.startswith("visual.") or name in ("norm.weight", "norm.bias"):
2963+ return #Skip processing "modify_tensors"
2964+
29582965 yield from super().modify_tensors(data_torch, name, bid)
29592966
29602967 def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
@@ -4210,6 +4217,8 @@ def set_gguf_parameters(self):
42104217 super().set_gguf_parameters()
42114218 assert self.hparams_vision is not None
42124219 hparams = self.hparams_vision
4220+ if "sarashina2_vision" in self.global_config['model_type']:
4221+ self.global_config['model_type'] = "qwen2_vl"
42134222 model_type = self.global_config['model_type']
42144223 if model_type == 'qwen2_vl':
42154224 self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.QWEN2VL)
@@ -13370,6 +13379,8 @@ def get_model_architecture(hparams: dict[str, Any], model_type: ModelType) -> st
1337013379 arch = text_config["architectures"][0]
1337113380 elif model_type == ModelType.MMPROJ and vision_config.get("architectures") is not None:
1337213381 arch = vision_config["architectures"][0]
13382+ if "Sarashina" in arch:
13383+ arch = "Qwen2VLForConditionalGeneration"
1337313384 if arch is None:
1337413385 raise ValueError("Failed to detect model architecture")
1337513386 return arch
0 commit comments