@@ -13385,6 +13385,26 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
1338513385 yield from super().modify_tensors(data_torch, name, bid)
1338613386
1338713387
13388+ @ModelBase.register("Sarashina2VisionForCausalLM")
13389+ class Sarashina2VLTextModel(LlamaModel):
13390+ model_arch = gguf.MODEL_ARCH.LLAMA
13391+
13392+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
13393+ if name.startswith("llm."):
13394+ name = name.replace("llm.", "", 1)
13395+ elif name.startswith("norm.") or name.startswith("visual."):
13396+ return
13397+
13398+ yield from super().modify_tensors(data_torch, name, bid)
13399+
13400+
13401+ @ModelBase.register("Sarashina2VisionForCausalLM")
13402+ class Sarashina2VLVisionModel(Qwen2VLVisionModel):
13403+ def __init__(self, *args, **kwargs):
13404+ super().__init__(*args, **kwargs)
13405+ self.global_config['model_type'] = "qwen2_vl"
13406+
13407+
1338813408###### CONVERSION LOGIC ######
1338913409
1339013410
@@ -13641,7 +13661,7 @@ def get_model_architecture(hparams: dict[str, Any], model_type: ModelType) -> st
1364113661 # Step3-VL keeps text config under text_config but uses a custom top-level architecture.
1364213662 # For text conversion we route to a dedicated text-only class.
1364313663 # TODO: refactor this later to avoid adding exception here
13644- if model_type == ModelType.TEXT and arch == "StepVLForConditionalGeneration":
13664+ if model_type == ModelType.TEXT and arch in ( "StepVLForConditionalGeneration", "Sarashina2VisionForCausalLM") :
1364513665 return arch
1364613666
1364713667 # if "architectures" is found in the sub-config, use that instead
0 commit comments