@@ -13373,6 +13373,26 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
1337313373 yield from super().modify_tensors(data_torch, name, bid)
1337413374
1337513375
13376+ @ModelBase.register("Sarashina2VisionForCausalLM")
13377+ class Sarashina2VLTextModel(LlamaModel):
13378+ model_arch = gguf.MODEL_ARCH.LLAMA
13379+
13380+ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
13381+ if name.startswith("llm."):
13382+ name = name.replace("llm.", "", 1)
13383+ elif name.startswith("norm.") or name.startswith("visual."):
13384+ return
13385+
13386+ yield from super().modify_tensors(data_torch, name, bid)
13387+
13388+
13389+ @ModelBase.register("Sarashina2VisionForCausalLM")
13390+ class Sarashina2VLVisionModel(Qwen2VLVisionModel):
13391+ def __init__(self, *args, **kwargs):
13392+ super().__init__(*args, **kwargs)
13393+ self.global_config['model_type'] = "qwen2_vl"
13394+
13395+
1337613396###### CONVERSION LOGIC ######
1337713397
1337813398
@@ -13629,7 +13649,7 @@ def get_model_architecture(hparams: dict[str, Any], model_type: ModelType) -> st
1362913649 # Step3-VL keeps text config under text_config but uses a custom top-level architecture.
1363013650 # For text conversion we route to a dedicated text-only class.
1363113651 # TODO: refactor this later to avoid adding exception here
13632- if model_type == ModelType.TEXT and arch == "StepVLForConditionalGeneration":
13652+ if model_type == ModelType.TEXT and arch in ( "StepVLForConditionalGeneration", "Sarashina2VisionForCausalLM") :
1363313653 return arch
1363413654
1363513655 # if "architectures" is found in the sub-config, use that instead
0 commit comments