Skip to content

Commit d02f666

Browse files
committed
update
1 parent 81df3f7 commit d02f666

1 file changed

Lines changed: 11 additions & 0 deletions

File tree

convert_hf_to_gguf.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2955,6 +2955,13 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
29552955
else:
29562956
return
29572957

2958+
if self.origin_hf_arch.startswith('Sarashina2VisionForCausalLM'):
2959+
# Remove llm. from name
2960+
if name.startswith("llm."):
2961+
name = name[len("llm."):]
2962+
elif name.startswith("visual.") or name in ("norm.weight", "norm.bias"):
2963+
return #Skip processing "modify_tensors"
2964+
29582965
yield from super().modify_tensors(data_torch, name, bid)
29592966

29602967
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
@@ -4210,6 +4217,8 @@ def set_gguf_parameters(self):
42104217
super().set_gguf_parameters()
42114218
assert self.hparams_vision is not None
42124219
hparams = self.hparams_vision
4220+
if "sarashina2_vision" in self.global_config['model_type']:
4221+
self.global_config['model_type'] = "qwen2_vl"
42134222
model_type = self.global_config['model_type']
42144223
if model_type == 'qwen2_vl':
42154224
self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.QWEN2VL)
@@ -13370,6 +13379,8 @@ def get_model_architecture(hparams: dict[str, Any], model_type: ModelType) -> st
1337013379
arch = text_config["architectures"][0]
1337113380
elif model_type == ModelType.MMPROJ and vision_config.get("architectures") is not None:
1337213381
arch = vision_config["architectures"][0]
13382+
if "Sarashina" in arch:
13383+
arch = "Qwen2VLForConditionalGeneration"
1337313384
if arch is None:
1337413385
raise ValueError("Failed to detect model architecture")
1337513386
return arch

0 commit comments

Comments
 (0)