Skip to content

Commit f3eeb2b

Browse files
committed
modify to follow review comments
1 parent 7f823f8 commit f3eeb2b

1 file changed

Lines changed: 2 additions & 49 deletions

File tree

convert_hf_to_gguf.py

Lines changed: 2 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -2810,7 +2810,6 @@ def prepare_tensors(self):
28102810
"LlavaForConditionalGeneration",
28112811
"VoxtralForConditionalGeneration",
28122812
"IQuestCoderForCausalLM",
2813-
"Sarashina2VisionForCausalLM",
28142813
"LlamaModel")
28152814
class LlamaModel(TextModel):
28162815
model_arch = gguf.MODEL_ARCH.LLAMA
@@ -13124,56 +13123,10 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
1312413123

1312513124

1312613125
@ModelBase.register("Sarashina2VisionForCausalLM")
13127-
class Sarashina2VLVisionModel(MmprojModel):
13128-
model_type = ModelType.MMPROJ
13129-
13126+
class Sarashina2VLVisionModel(Qwen2VLVisionModel):
1313013127
def __init__(self, *args, **kwargs):
1313113128
super().__init__(*args, **kwargs)
13132-
assert self.hparams_vision is not None
13133-
self.hparams_vision["image_size"] = self.hparams_vision.get("image_size", 560)
13134-
# rename config.json values
13135-
self.hparams_vision["num_attention_heads"] = self.hparams_vision.get("num_heads")
13136-
self.hparams_vision["num_hidden_layers"] = self.hparams_vision.get("depth")
13137-
if "embed_dim" in self.hparams_vision: # qwen2vl
13138-
self.hparams_vision["intermediate_size"] = self.hparams_vision.get("hidden_size")
13139-
self.hparams_vision["hidden_size"] = self.hparams_vision.get("embed_dim")
13140-
13141-
def set_gguf_parameters(self):
13142-
super().set_gguf_parameters()
13143-
self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.QWEN2VL)
13144-
self.gguf_writer.add_vision_spatial_merge_size(2)
13145-
self.gguf_writer.add_vision_attention_layernorm_eps(self.global_config.get("rms_norm_eps", 1e-6))
13146-
13147-
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
13148-
assert self.hparams_vision is not None
13149-
# Two tensors will be ignored
13150-
#if name in ('norm.weight', 'norm.bias'):
13151-
# name = "visual.post_layer" + name
13152-
if name.startswith("visual."):
13153-
# process visual tensors
13154-
# split QKV tensors if needed
13155-
if ".qkv." in name:
13156-
if data_torch.ndim == 2: # weight
13157-
c3, _ = data_torch.shape
13158-
else: # bias
13159-
c3 = data_torch.shape[0]
13160-
assert c3 % 3 == 0
13161-
c = c3 // 3
13162-
wq = data_torch[:c]
13163-
wk = data_torch[c: c * 2]
13164-
wv = data_torch[c * 2:]
13165-
yield from super().modify_tensors(wq, name.replace("qkv", "q"), bid)
13166-
yield from super().modify_tensors(wk, name.replace("qkv", "k"), bid)
13167-
yield from super().modify_tensors(wv, name.replace("qkv", "v"), bid)
13168-
elif 'patch_embed.proj.weight' in name:
13169-
# split Conv3D into Conv2Ds
13170-
c1, c2, kt, kh, kw = data_torch.shape
13171-
del c1, c2, kh, kw # unused
13172-
assert kt == 2, "Current implementation only support temporal_patch_size of 2"
13173-
yield (gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_ENC_EMBD_PATCH] + ".weight" , data_torch[:, :, 0, ...])
13174-
yield (gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_ENC_EMBD_PATCH] + ".weight.1", data_torch[:, :, 1, ...])
13175-
else:
13176-
yield from super().modify_tensors(data_torch, name, bid)
13129+
self.global_config['model_type'] = "qwen2_vl"
1317713130

1317813131

1317913132
###### CONVERSION LOGIC ######

0 commit comments

Comments
 (0)