@@ -13188,6 +13188,26 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
1318813188 yield from super().modify_tensors(data_torch, name, bid)
1318913189
1319013190
13191+ @ModelBase.register("Sarashina2VisionForCausalLM")
13192+ class Sarashina2VLTextModel(LlamaModel):
13193+ model_arch = gguf.MODEL_ARCH.LLAMA
13194+
13195+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
13196+ if name.startswith("llm."):
13197+ name = name.replace("llm.", "", 1)
13198+ elif name.startswith("norm.") or name.startswith("visual."):
13199+ return
13200+
13201+ yield from super().modify_tensors(data_torch, name, bid)
13202+
13203+
13204+ @ModelBase.register("Sarashina2VisionForCausalLM")
13205+ class Sarashina2VLVisionModel(Qwen2VLVisionModel):
13206+ def __init__(self, *args, **kwargs):
13207+ super().__init__(*args, **kwargs)
13208+ self.global_config['model_type'] = "qwen2_vl"
13209+
13210+
1319113211###### CONVERSION LOGIC ######
1319213212
1319313213
@@ -13443,7 +13463,7 @@ def get_model_architecture(hparams: dict[str, Any], model_type: ModelType) -> st
1344313463 # Step3-VL keeps text config under text_config but uses a custom top-level architecture.
1344413464 # For text conversion we route to a dedicated text-only class.
1344513465 # TODO: refactor this later to avoid adding exception here
13446- if model_type == ModelType.TEXT and arch == "StepVLForConditionalGeneration":
13466+ if model_type == ModelType.TEXT and arch in ( "StepVLForConditionalGeneration", "Sarashina2VisionForCausalLM") :
1344713467 return arch
1344813468
1344913469 # if "architectures" is found in the sub-config, use that instead
0 commit comments