1414from .qwen import QwenModel
1515
1616
17- @ModelBase .register ("DeepseekOCRForCausalLM" )
17+ @ModelBase .register ("DeepseekOCRForCausalLM" , "UnlimitedOCRForCausalLM" )
1818class DeepseekOCRVisionModel (MmprojModel ):
1919 def __init__ (self , * args , ** kwargs ):
2020 super ().__init__ (* args , ** kwargs )
@@ -205,6 +205,8 @@ def prepare_tensors(self):
205205@ModelBase .register (
206206 "DeepseekV2ForCausalLM" ,
207207 "DeepseekV3ForCausalLM" ,
208+ "DeepseekOCRForCausalLM" ,
209+ "UnlimitedOCRForCausalLM" ,
208210 "KimiVLForConditionalGeneration" ,
209211 "KimiK25ForConditionalGeneration" ,
210212 "YoutuForCausalLM" ,
@@ -224,7 +226,7 @@ def __init__(self, *args, **kwargs):
224226 self .origin_hf_arch = hparams .get ('architectures' , [None ])[0 ]
225227
226228 # special handling for Deepseek OCR
227- if self .origin_hf_arch in ("DeepseekOCRForCausalLM" , "DeepseekOCR2ForCausalLM" ):
229+ if self .origin_hf_arch in ("DeepseekOCRForCausalLM" , "DeepseekOCR2ForCausalLM" , "UnlimitedOCRForCausalLM" ):
228230 self .model_arch = gguf .MODEL_ARCH .DEEPSEEK2OCR
229231 self .gguf_writer .arch = gguf .MODEL_ARCH_NAMES [self .model_arch ]
230232 self .gguf_writer .add_architecture ()
@@ -350,6 +352,12 @@ def set_gguf_parameters(self):
350352
351353 self .gguf_writer .add_rope_dimension_count (hparams ["qk_rope_head_dim" ])
352354
355+ # Unlimited-OCR sliding window; written for metadata, the decoder ignores it (full MHA)
356+ if is_ocr :
357+ sliding_window = hparams .get ("sliding_window_size" ) or hparams .get ("sliding_window" )
358+ if sliding_window :
359+ self .gguf_writer .add_sliding_window (sliding_window )
360+
353361 if (rope_mscale_all := self .rope_parameters .get ("mscale_all_dim" )) is not None :
354362 # [TAG_DEEPSEEK2_YARN_LOG_MUL_FIX]
355363 # note: for legacy reasons, this is not consistent with the other usages of self.gguf_writer.add_rope_scaling_yarn_log_mul
0 commit comments